diff --git a/src/Amalgam/SBFDSColumnData.cpp b/src/Amalgam/SBFDSColumnData.cpp index 449c1524..ab1b3e7c 100644 --- a/src/Amalgam/SBFDSColumnData.cpp +++ b/src/Amalgam/SBFDSColumnData.cpp @@ -250,24 +250,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value auto old_value_entry = sortedNumberValueEntries.find(old_number_value); if(old_value_entry == end(sortedNumberValueEntries)) - { - //value must have changed sizes, look in each size - //note that this is inefficient -- if this ends up being a bottleneck, - //an additional data structure will need to be built to maintain the previous size - //TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found - for(auto cur_id_entry = begin(sortedNumberValueEntries); cur_id_entry != end(sortedNumberValueEntries); ++cur_id_entry) - { - if(cur_id_entry->second.indicesWithValue.contains(index)) - { - old_value_entry = cur_id_entry; - break; - } - } - - //if not found anywhere, then there's index corruption - if(old_value_entry == end(sortedNumberValueEntries)) - assert(false); - } + assert(false); //if there are multiple entries for this number, just remove the id from the old value if(old_value_entry->second.indicesWithValue.size() > 1) @@ -307,24 +290,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value size_t new_value_index = 0; auto old_id_entry = stringIdValueEntries.find(old_sid_value); if(old_id_entry == end(stringIdValueEntries)) - { - //value must have changed sizes, look in each size - //note that this is inefficient -- if this ends up being a bottleneck, - //an additional data structure will need to be built to maintain the previous size - //TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found - for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry) - { - if(cur_id_entry->second != nullptr && cur_id_entry->second->indicesWithValue.contains(index)) - { - old_id_entry = cur_id_entry; - break; - } - } - - //if not found anywhere, then there's index corruption - if(old_id_entry == end(stringIdValueEntries)) - assert(false); - } + assert(false); //if there are multiple entries for this string, just move the id if(old_id_entry->second->indicesWithValue.size() > 1) @@ -399,24 +365,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value //need to emplace above before searching to ensure new_size_entry does not become invalidated auto old_size_entry = valueCodeSizeToIndices.find(old_code_size); if(old_size_entry == end(valueCodeSizeToIndices)) - { - //value must have changed sizes, look in each size - //note that this is inefficient -- if this ends up being a bottleneck, - //an additional data structure will need to be built to maintain the previous size - //TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found - for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry) - { - if(cur_id_entry->second != nullptr && cur_id_entry->second->contains(index)) - { - old_size_entry = cur_id_entry; - break; - } - } - - //if not found anywhere, then there's index corruption - if(old_size_entry == end(valueCodeSizeToIndices)) - assert(false); - } + assert(false); //if there are multiple entries for this string, just move the id if(old_size_entry->second->size() > 1) @@ -493,24 +442,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ //look up value auto value_entry = sortedNumberValueEntries.find(resolved_value.number); if(value_entry == end(sortedNumberValueEntries)) - { - //value must have changed sizes, look in each size - //note that this is inefficient -- if this ends up being a bottleneck, - //an additional data structure will need to be built to maintain the previous size - //TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found - for(auto cur_value_entry = begin(sortedNumberValueEntries); cur_value_entry != end(sortedNumberValueEntries); ++cur_value_entry) - { - if(cur_value_entry->second.indicesWithValue.contains(index)) - { - value_entry = cur_value_entry; - break; - } - } - - //if not found anywhere, then there's index corruption - if(value_entry == end(sortedNumberValueEntries)) - assert(false); - } + assert(false); //if the bucket has only one entry, we must delete the entire bucket if(value_entry->second.indicesWithValue.size() == 1) @@ -535,24 +467,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ auto id_entry = stringIdValueEntries.find(resolved_value.stringID); if(id_entry == end(stringIdValueEntries)) - { - //value must have changed sizes, look in each size - //note that this is inefficient -- if this ends up being a bottleneck, - //an additional data structure will need to be built to maintain the previous size - //TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found - for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry) - { - if(cur_id_entry->second->indicesWithValue.contains(index)) - { - id_entry = cur_id_entry; - break; - } - } - - //if not found anywhere, then there's index corruption - if(id_entry == end(stringIdValueEntries)) - assert(false); - } + assert(false); auto &entities = id_entry->second->indicesWithValue; entities.erase(index); @@ -578,24 +493,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ size_t num_indices = EvaluableNode::GetDeepSize(value.code); auto id_entry = valueCodeSizeToIndices.find(num_indices); if(id_entry == end(valueCodeSizeToIndices)) - { - //value must have changed sizes, look in each size - //note that this is inefficient -- if this ends up being a bottleneck, - //an additional data structure will need to be built to maintain the previous size - //TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found - for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry) - { - if(cur_id_entry->second->contains(index)) - { - id_entry = cur_id_entry; - break; - } - } - - //if not found anywhere, then there's index corruption - if(id_entry == end(valueCodeSizeToIndices)) - assert(false); - } + assert(false); //remove the entity auto &entities = *(id_entry->second); @@ -620,6 +518,79 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ valueEntries[index] = std::numeric_limits::quiet_NaN(); } +void SBFDSColumnData::RemoveIndexFromCaches(size_t index) +{ + if(invalidIndices.EraseAndRetrieve(index)) + return; + if(nullIndices.EraseAndRetrieve(index)) + return; + if(falseBoolIndices.EraseAndRetrieve(index)) + return; + if(trueBoolIndices.EraseAndRetrieve(index)) + return; + + for(auto cur_value_entry = begin(sortedNumberValueEntries); cur_value_entry != end(sortedNumberValueEntries); ++cur_value_entry) + { + if(cur_value_entry->second.indicesWithValue.contains(index)) + { + if(cur_value_entry->second.indicesWithValue.size() == 1) + { + internedNumberValues.DeleteInternIndex(cur_value_entry->second.valueInternIndex); + sortedNumberValueEntries.erase(cur_value_entry); + } + else //else we can just remove the id from the bucket + { + cur_value_entry->second.indicesWithValue.erase(index); + } + + numberIndices.erase(index); + return; + } + } + + for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry) + { + if(cur_id_entry->second->indicesWithValue.contains(index)) + { + auto &entities = cur_id_entry->second->indicesWithValue; + entities.erase(index); + + //if no more entries have the value, remove it + if(entities.size() == 0) + { + internedStringIdValues.DeleteInternIndex(cur_id_entry->second->valueInternIndex); + stringIdValueEntries.erase(cur_id_entry); + } + + //see if need to compute new longest string + if(index == indexWithLongestString) + RecomputeLongestString(); + + stringIdIndices.erase(index); + return; + } + } + + for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry) + { + if(cur_id_entry->second->contains(index)) + { + auto &entities = *(cur_id_entry->second); + entities.erase(index); + + if(entities.size() == 0) + valueCodeSizeToIndices.erase(cur_id_entry); + + //see if need to update largest code + if(index == indexWithLargestCode) + RecomputeLargestCode(); + + codeIndices.erase(index); + return; + } + } +} + void SBFDSColumnData::Optimize() { #ifdef SBFDS_VERIFICATION diff --git a/src/Amalgam/SBFDSColumnData.h b/src/Amalgam/SBFDSColumnData.h index d00055e2..cc1798d7 100644 --- a/src/Amalgam/SBFDSColumnData.h +++ b/src/Amalgam/SBFDSColumnData.h @@ -167,6 +167,12 @@ class SBFDSColumnData void DeleteIndexValue(EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue value, size_t index, bool remove_last_entity); + //removes all of an index's data from the caches regardless of type + // it should be followed up with an appropriate insert operation with the new value + // to maintain cache consistency + //TODO 24298: attempt to remove this and pass in the previous value for every label change + void RemoveIndexFromCaches(size_t index); + //changes column to/from interning as would yield best performance void Optimize(); @@ -443,6 +449,12 @@ class SBFDSColumnData //used for debugging to make sure all entities are valid inline void VerifyAllEntities(size_t max_num_entities = std::numeric_limits::max()) { + size_t num_entities = invalidIndices.size() + nullIndices.size() + falseBoolIndices.size() + trueBoolIndices.size() + + numberIndices.size() + stringIdIndices.size() + codeIndices.size(); + + if(max_num_entities < std::numeric_limits::max()) + assert(num_entities == max_num_entities); + for(auto &value_entry : sortedNumberValueEntries) { //ensure all interned values are valid diff --git a/src/Amalgam/SeparableBoxFilterDataStore.cpp b/src/Amalgam/SeparableBoxFilterDataStore.cpp index c264d4bf..124fad1f 100644 --- a/src/Amalgam/SeparableBoxFilterDataStore.cpp +++ b/src/Amalgam/SeparableBoxFilterDataStore.cpp @@ -221,8 +221,10 @@ void SeparableBoxFilterDataStore::UpdateAllEntityLabels(Entity *entity, size_t e for(auto &column_data : columnData) { + //TODO 24298: switch this to use ChangeIndexValue if possible + column_data->RemoveIndexFromCaches(entity_index); auto [value, found] = entity->GetValueAtLabelAsImmediateValue(column_data->stringId); - column_data->ChangeIndexValue(value.nodeType, value.nodeValue, entity_index); + column_data->InsertIndexValue(value.nodeType, value.nodeValue, entity_index); } //clean up any labels that aren't relevant @@ -251,10 +253,10 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit VerifyAllEntitiesForColumn(column_index); #endif - //get the new value + //TODO 24298: switch this to use ChangeIndexValue if possible + column_data->RemoveIndexFromCaches(entity_index); auto [value, found] = entity->GetValueAtLabelAsImmediateValue(column_data->stringId); - - column_data->ChangeIndexValue(value.nodeType, value.nodeValue, entity_index); + column_data->InsertIndexValue(value.nodeType, value.nodeValue, entity_index); //remove the label if no longer relevant if(IsColumnIndexRemovable(column_index)) @@ -262,6 +264,9 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit else OptimizeColumn(column_index); +#ifdef SBFDS_VERIFICATION + VerifyAllEntitiesForColumn(column_index); +#endif } //populates distances_out with all entities and their distances that have a distance to target less than max_dist diff --git a/src/Amalgam/SeparableBoxFilterDataStore.h b/src/Amalgam/SeparableBoxFilterDataStore.h index 7916f173..7ba572b7 100644 --- a/src/Amalgam/SeparableBoxFilterDataStore.h +++ b/src/Amalgam/SeparableBoxFilterDataStore.h @@ -672,7 +672,7 @@ class SeparableBoxFilterDataStore inline void VerifyAllEntitiesForAllColumns() { for(auto &column_data : columnData) - column_data->VerifyAllEntities(); + column_data->VerifyAllEntities(numEntities); } //deletes the index and associated data