Skip to content

Commit 199f128

Browse files
authored
24958: A general fix for SBFDS index issues (#490)
1 parent 6339ee8 commit 199f128

4 files changed

Lines changed: 101 additions & 113 deletions

File tree

src/Amalgam/SBFDSColumnData.cpp

Lines changed: 79 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -250,24 +250,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value
250250
auto old_value_entry = sortedNumberValueEntries.find(old_number_value);
251251

252252
if(old_value_entry == end(sortedNumberValueEntries))
253-
{
254-
//value must have changed sizes, look in each size
255-
//note that this is inefficient -- if this ends up being a bottleneck,
256-
//an additional data structure will need to be built to maintain the previous size
257-
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
258-
for(auto cur_id_entry = begin(sortedNumberValueEntries); cur_id_entry != end(sortedNumberValueEntries); ++cur_id_entry)
259-
{
260-
if(cur_id_entry->second.indicesWithValue.contains(index))
261-
{
262-
old_value_entry = cur_id_entry;
263-
break;
264-
}
265-
}
266-
267-
//if not found anywhere, then there's index corruption
268-
if(old_value_entry == end(sortedNumberValueEntries))
269-
assert(false);
270-
}
253+
assert(false);
271254

272255
//if there are multiple entries for this number, just remove the id from the old value
273256
if(old_value_entry->second.indicesWithValue.size() > 1)
@@ -307,24 +290,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value
307290
size_t new_value_index = 0;
308291
auto old_id_entry = stringIdValueEntries.find(old_sid_value);
309292
if(old_id_entry == end(stringIdValueEntries))
310-
{
311-
//value must have changed sizes, look in each size
312-
//note that this is inefficient -- if this ends up being a bottleneck,
313-
//an additional data structure will need to be built to maintain the previous size
314-
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
315-
for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry)
316-
{
317-
if(cur_id_entry->second != nullptr && cur_id_entry->second->indicesWithValue.contains(index))
318-
{
319-
old_id_entry = cur_id_entry;
320-
break;
321-
}
322-
}
323-
324-
//if not found anywhere, then there's index corruption
325-
if(old_id_entry == end(stringIdValueEntries))
326-
assert(false);
327-
}
293+
assert(false);
328294

329295
//if there are multiple entries for this string, just move the id
330296
if(old_id_entry->second->indicesWithValue.size() > 1)
@@ -399,24 +365,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value
399365
//need to emplace above before searching to ensure new_size_entry does not become invalidated
400366
auto old_size_entry = valueCodeSizeToIndices.find(old_code_size);
401367
if(old_size_entry == end(valueCodeSizeToIndices))
402-
{
403-
//value must have changed sizes, look in each size
404-
//note that this is inefficient -- if this ends up being a bottleneck,
405-
//an additional data structure will need to be built to maintain the previous size
406-
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
407-
for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry)
408-
{
409-
if(cur_id_entry->second != nullptr && cur_id_entry->second->contains(index))
410-
{
411-
old_size_entry = cur_id_entry;
412-
break;
413-
}
414-
}
415-
416-
//if not found anywhere, then there's index corruption
417-
if(old_size_entry == end(valueCodeSizeToIndices))
418-
assert(false);
419-
}
368+
assert(false);
420369

421370
//if there are multiple entries for this string, just move the id
422371
if(old_size_entry->second->size() > 1)
@@ -493,24 +442,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
493442
//look up value
494443
auto value_entry = sortedNumberValueEntries.find(resolved_value.number);
495444
if(value_entry == end(sortedNumberValueEntries))
496-
{
497-
//value must have changed sizes, look in each size
498-
//note that this is inefficient -- if this ends up being a bottleneck,
499-
//an additional data structure will need to be built to maintain the previous size
500-
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
501-
for(auto cur_value_entry = begin(sortedNumberValueEntries); cur_value_entry != end(sortedNumberValueEntries); ++cur_value_entry)
502-
{
503-
if(cur_value_entry->second.indicesWithValue.contains(index))
504-
{
505-
value_entry = cur_value_entry;
506-
break;
507-
}
508-
}
509-
510-
//if not found anywhere, then there's index corruption
511-
if(value_entry == end(sortedNumberValueEntries))
512-
assert(false);
513-
}
445+
assert(false);
514446

515447
//if the bucket has only one entry, we must delete the entire bucket
516448
if(value_entry->second.indicesWithValue.size() == 1)
@@ -535,24 +467,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
535467

536468
auto id_entry = stringIdValueEntries.find(resolved_value.stringID);
537469
if(id_entry == end(stringIdValueEntries))
538-
{
539-
//value must have changed sizes, look in each size
540-
//note that this is inefficient -- if this ends up being a bottleneck,
541-
//an additional data structure will need to be built to maintain the previous size
542-
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
543-
for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry)
544-
{
545-
if(cur_id_entry->second->indicesWithValue.contains(index))
546-
{
547-
id_entry = cur_id_entry;
548-
break;
549-
}
550-
}
551-
552-
//if not found anywhere, then there's index corruption
553-
if(id_entry == end(stringIdValueEntries))
554-
assert(false);
555-
}
470+
assert(false);
556471

557472
auto &entities = id_entry->second->indicesWithValue;
558473
entities.erase(index);
@@ -578,24 +493,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
578493
size_t num_indices = EvaluableNode::GetDeepSize(value.code);
579494
auto id_entry = valueCodeSizeToIndices.find(num_indices);
580495
if(id_entry == end(valueCodeSizeToIndices))
581-
{
582-
//value must have changed sizes, look in each size
583-
//note that this is inefficient -- if this ends up being a bottleneck,
584-
//an additional data structure will need to be built to maintain the previous size
585-
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
586-
for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry)
587-
{
588-
if(cur_id_entry->second->contains(index))
589-
{
590-
id_entry = cur_id_entry;
591-
break;
592-
}
593-
}
594-
595-
//if not found anywhere, then there's index corruption
596-
if(id_entry == end(valueCodeSizeToIndices))
597-
assert(false);
598-
}
496+
assert(false);
599497

600498
//remove the entity
601499
auto &entities = *(id_entry->second);
@@ -620,6 +518,79 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
620518
valueEntries[index] = std::numeric_limits<double>::quiet_NaN();
621519
}
622520

521+
void SBFDSColumnData::RemoveIndexFromCaches(size_t index)
522+
{
523+
if(invalidIndices.EraseAndRetrieve(index))
524+
return;
525+
if(nullIndices.EraseAndRetrieve(index))
526+
return;
527+
if(falseBoolIndices.EraseAndRetrieve(index))
528+
return;
529+
if(trueBoolIndices.EraseAndRetrieve(index))
530+
return;
531+
532+
for(auto cur_value_entry = begin(sortedNumberValueEntries); cur_value_entry != end(sortedNumberValueEntries); ++cur_value_entry)
533+
{
534+
if(cur_value_entry->second.indicesWithValue.contains(index))
535+
{
536+
if(cur_value_entry->second.indicesWithValue.size() == 1)
537+
{
538+
internedNumberValues.DeleteInternIndex(cur_value_entry->second.valueInternIndex);
539+
sortedNumberValueEntries.erase(cur_value_entry);
540+
}
541+
else //else we can just remove the id from the bucket
542+
{
543+
cur_value_entry->second.indicesWithValue.erase(index);
544+
}
545+
546+
numberIndices.erase(index);
547+
return;
548+
}
549+
}
550+
551+
for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry)
552+
{
553+
if(cur_id_entry->second->indicesWithValue.contains(index))
554+
{
555+
auto &entities = cur_id_entry->second->indicesWithValue;
556+
entities.erase(index);
557+
558+
//if no more entries have the value, remove it
559+
if(entities.size() == 0)
560+
{
561+
internedStringIdValues.DeleteInternIndex(cur_id_entry->second->valueInternIndex);
562+
stringIdValueEntries.erase(cur_id_entry);
563+
}
564+
565+
//see if need to compute new longest string
566+
if(index == indexWithLongestString)
567+
RecomputeLongestString();
568+
569+
stringIdIndices.erase(index);
570+
return;
571+
}
572+
}
573+
574+
for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry)
575+
{
576+
if(cur_id_entry->second->contains(index))
577+
{
578+
auto &entities = *(cur_id_entry->second);
579+
entities.erase(index);
580+
581+
if(entities.size() == 0)
582+
valueCodeSizeToIndices.erase(cur_id_entry);
583+
584+
//see if need to update largest code
585+
if(index == indexWithLargestCode)
586+
RecomputeLargestCode();
587+
588+
codeIndices.erase(index);
589+
return;
590+
}
591+
}
592+
}
593+
623594
void SBFDSColumnData::Optimize()
624595
{
625596
#ifdef SBFDS_VERIFICATION

src/Amalgam/SBFDSColumnData.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ class SBFDSColumnData
167167
void DeleteIndexValue(EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue value,
168168
size_t index, bool remove_last_entity);
169169

170+
//removes all of an index's data from the caches regardless of type
171+
// it should be followed up with an appropriate insert operation with the new value
172+
// to maintain cache consistency
173+
//TODO 24298: attempt to remove this and pass in the previous value for every label change
174+
void RemoveIndexFromCaches(size_t index);
175+
170176
//changes column to/from interning as would yield best performance
171177
void Optimize();
172178

@@ -443,6 +449,12 @@ class SBFDSColumnData
443449
//used for debugging to make sure all entities are valid
444450
inline void VerifyAllEntities(size_t max_num_entities = std::numeric_limits<size_t>::max())
445451
{
452+
size_t num_entities = invalidIndices.size() + nullIndices.size() + falseBoolIndices.size() + trueBoolIndices.size()
453+
+ numberIndices.size() + stringIdIndices.size() + codeIndices.size();
454+
455+
if(max_num_entities < std::numeric_limits<size_t>::max())
456+
assert(num_entities == max_num_entities);
457+
446458
for(auto &value_entry : sortedNumberValueEntries)
447459
{
448460
//ensure all interned values are valid

src/Amalgam/SeparableBoxFilterDataStore.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,10 @@ void SeparableBoxFilterDataStore::UpdateAllEntityLabels(Entity *entity, size_t e
221221

222222
for(auto &column_data : columnData)
223223
{
224+
//TODO 24298: switch this to use ChangeIndexValue if possible
225+
column_data->RemoveIndexFromCaches(entity_index);
224226
auto [value, found] = entity->GetValueAtLabelAsImmediateValue(column_data->stringId);
225-
column_data->ChangeIndexValue(value.nodeType, value.nodeValue, entity_index);
227+
column_data->InsertIndexValue(value.nodeType, value.nodeValue, entity_index);
226228
}
227229

228230
//clean up any labels that aren't relevant
@@ -251,17 +253,20 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit
251253
VerifyAllEntitiesForColumn(column_index);
252254
#endif
253255

254-
//get the new value
256+
//TODO 24298: switch this to use ChangeIndexValue if possible
257+
column_data->RemoveIndexFromCaches(entity_index);
255258
auto [value, found] = entity->GetValueAtLabelAsImmediateValue(column_data->stringId);
256-
257-
column_data->ChangeIndexValue(value.nodeType, value.nodeValue, entity_index);
259+
column_data->InsertIndexValue(value.nodeType, value.nodeValue, entity_index);
258260

259261
//remove the label if no longer relevant
260262
if(IsColumnIndexRemovable(column_index))
261263
RemoveColumnIndex(column_index);
262264
else
263265
OptimizeColumn(column_index);
264266

267+
#ifdef SBFDS_VERIFICATION
268+
VerifyAllEntitiesForColumn(column_index);
269+
#endif
265270
}
266271

267272
//populates distances_out with all entities and their distances that have a distance to target less than max_dist

src/Amalgam/SeparableBoxFilterDataStore.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ class SeparableBoxFilterDataStore
672672
inline void VerifyAllEntitiesForAllColumns()
673673
{
674674
for(auto &column_data : columnData)
675-
column_data->VerifyAllEntities();
675+
column_data->VerifyAllEntities(numEntities);
676676
}
677677

678678
//deletes the index and associated data

0 commit comments

Comments
 (0)