Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 79 additions & 108 deletions src/Amalgam/SBFDSColumnData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,24 +250,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value
auto old_value_entry = sortedNumberValueEntries.find(old_number_value);

if(old_value_entry == end(sortedNumberValueEntries))
{
//value must have changed sizes, look in each size
//note that this is inefficient -- if this ends up being a bottleneck,
//an additional data structure will need to be built to maintain the previous size
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
for(auto cur_id_entry = begin(sortedNumberValueEntries); cur_id_entry != end(sortedNumberValueEntries); ++cur_id_entry)
{
if(cur_id_entry->second.indicesWithValue.contains(index))
{
old_value_entry = cur_id_entry;
break;
}
}

//if not found anywhere, then there's index corruption
if(old_value_entry == end(sortedNumberValueEntries))
assert(false);
}
assert(false);

//if there are multiple entries for this number, just remove the id from the old value
if(old_value_entry->second.indicesWithValue.size() > 1)
Expand Down Expand Up @@ -307,24 +290,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value
size_t new_value_index = 0;
auto old_id_entry = stringIdValueEntries.find(old_sid_value);
if(old_id_entry == end(stringIdValueEntries))
{
//value must have changed sizes, look in each size
//note that this is inefficient -- if this ends up being a bottleneck,
//an additional data structure will need to be built to maintain the previous size
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry)
{
if(cur_id_entry->second != nullptr && cur_id_entry->second->indicesWithValue.contains(index))
{
old_id_entry = cur_id_entry;
break;
}
}

//if not found anywhere, then there's index corruption
if(old_id_entry == end(stringIdValueEntries))
assert(false);
}
assert(false);

//if there are multiple entries for this string, just move the id
if(old_id_entry->second->indicesWithValue.size() > 1)
Expand Down Expand Up @@ -399,24 +365,7 @@ void SBFDSColumnData::ChangeIndexValue(EvaluableNodeImmediateValueType new_value
//need to emplace above before searching to ensure new_size_entry does not become invalidated
auto old_size_entry = valueCodeSizeToIndices.find(old_code_size);
if(old_size_entry == end(valueCodeSizeToIndices))
{
//value must have changed sizes, look in each size
//note that this is inefficient -- if this ends up being a bottleneck,
//an additional data structure will need to be built to maintain the previous size
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry)
{
if(cur_id_entry->second != nullptr && cur_id_entry->second->contains(index))
{
old_size_entry = cur_id_entry;
break;
}
}

//if not found anywhere, then there's index corruption
if(old_size_entry == end(valueCodeSizeToIndices))
assert(false);
}
assert(false);

//if there are multiple entries for this string, just move the id
if(old_size_entry->second->size() > 1)
Expand Down Expand Up @@ -493,24 +442,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
//look up value
auto value_entry = sortedNumberValueEntries.find(resolved_value.number);
if(value_entry == end(sortedNumberValueEntries))
{
//value must have changed sizes, look in each size
//note that this is inefficient -- if this ends up being a bottleneck,
//an additional data structure will need to be built to maintain the previous size
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
for(auto cur_value_entry = begin(sortedNumberValueEntries); cur_value_entry != end(sortedNumberValueEntries); ++cur_value_entry)
{
if(cur_value_entry->second.indicesWithValue.contains(index))
{
value_entry = cur_value_entry;
break;
}
}

//if not found anywhere, then there's index corruption
if(value_entry == end(sortedNumberValueEntries))
assert(false);
}
assert(false);

//if the bucket has only one entry, we must delete the entire bucket
if(value_entry->second.indicesWithValue.size() == 1)
Expand All @@ -535,24 +467,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ

auto id_entry = stringIdValueEntries.find(resolved_value.stringID);
if(id_entry == end(stringIdValueEntries))
{
//value must have changed sizes, look in each size
//note that this is inefficient -- if this ends up being a bottleneck,
//an additional data structure will need to be built to maintain the previous size
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry)
{
if(cur_id_entry->second->indicesWithValue.contains(index))
{
id_entry = cur_id_entry;
break;
}
}

//if not found anywhere, then there's index corruption
if(id_entry == end(stringIdValueEntries))
assert(false);
}
assert(false);

auto &entities = id_entry->second->indicesWithValue;
entities.erase(index);
Expand All @@ -578,24 +493,7 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
size_t num_indices = EvaluableNode::GetDeepSize(value.code);
auto id_entry = valueCodeSizeToIndices.find(num_indices);
if(id_entry == end(valueCodeSizeToIndices))
{
//value must have changed sizes, look in each size
//note that this is inefficient -- if this ends up being a bottleneck,
//an additional data structure will need to be built to maintain the previous size
//TODO 24298: ensure index size is always correct and updated so entities won't be missed, remove this code and assert false if not found
for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry)
{
if(cur_id_entry->second->contains(index))
{
id_entry = cur_id_entry;
break;
}
}

//if not found anywhere, then there's index corruption
if(id_entry == end(valueCodeSizeToIndices))
assert(false);
}
assert(false);

//remove the entity
auto &entities = *(id_entry->second);
Expand All @@ -620,6 +518,79 @@ void SBFDSColumnData::DeleteIndexValue(EvaluableNodeImmediateValueType value_typ
valueEntries[index] = std::numeric_limits<double>::quiet_NaN();
}

void SBFDSColumnData::RemoveIndexFromCaches(size_t index)
{
if(invalidIndices.EraseAndRetrieve(index))
return;
if(nullIndices.EraseAndRetrieve(index))
return;
if(falseBoolIndices.EraseAndRetrieve(index))
return;
if(trueBoolIndices.EraseAndRetrieve(index))
return;

for(auto cur_value_entry = begin(sortedNumberValueEntries); cur_value_entry != end(sortedNumberValueEntries); ++cur_value_entry)
{
if(cur_value_entry->second.indicesWithValue.contains(index))
{
if(cur_value_entry->second.indicesWithValue.size() == 1)
{
internedNumberValues.DeleteInternIndex(cur_value_entry->second.valueInternIndex);
sortedNumberValueEntries.erase(cur_value_entry);
}
else //else we can just remove the id from the bucket
{
cur_value_entry->second.indicesWithValue.erase(index);
}

numberIndices.erase(index);
return;
}
}

for(auto cur_id_entry = begin(stringIdValueEntries); cur_id_entry != end(stringIdValueEntries); ++cur_id_entry)
{
if(cur_id_entry->second->indicesWithValue.contains(index))
{
auto &entities = cur_id_entry->second->indicesWithValue;
entities.erase(index);

//if no more entries have the value, remove it
if(entities.size() == 0)
{
internedStringIdValues.DeleteInternIndex(cur_id_entry->second->valueInternIndex);
stringIdValueEntries.erase(cur_id_entry);
}

//see if need to compute new longest string
if(index == indexWithLongestString)
RecomputeLongestString();

stringIdIndices.erase(index);
return;
}
}

for(auto cur_id_entry = begin(valueCodeSizeToIndices); cur_id_entry != end(valueCodeSizeToIndices); ++cur_id_entry)
{
if(cur_id_entry->second->contains(index))
{
auto &entities = *(cur_id_entry->second);
entities.erase(index);

if(entities.size() == 0)
valueCodeSizeToIndices.erase(cur_id_entry);

//see if need to update largest code
if(index == indexWithLargestCode)
RecomputeLargestCode();

codeIndices.erase(index);
return;
}
}
}

void SBFDSColumnData::Optimize()
{
#ifdef SBFDS_VERIFICATION
Expand Down
12 changes: 12 additions & 0 deletions src/Amalgam/SBFDSColumnData.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ class SBFDSColumnData
void DeleteIndexValue(EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue value,
size_t index, bool remove_last_entity);

//removes all of an index's data from the caches regardless of type
// it should be followed up with an appropriate insert operation with the new value
// to maintain cache consistency
//TODO 24298: attempt to remove this and pass in the previous value for every label change
void RemoveIndexFromCaches(size_t index);

//changes column to/from interning as would yield best performance
void Optimize();

Expand Down Expand Up @@ -443,6 +449,12 @@ class SBFDSColumnData
//used for debugging to make sure all entities are valid
inline void VerifyAllEntities(size_t max_num_entities = std::numeric_limits<size_t>::max())
{
size_t num_entities = invalidIndices.size() + nullIndices.size() + falseBoolIndices.size() + trueBoolIndices.size()
+ numberIndices.size() + stringIdIndices.size() + codeIndices.size();

if(max_num_entities < std::numeric_limits<size_t>::max())
assert(num_entities == max_num_entities);

for(auto &value_entry : sortedNumberValueEntries)
{
//ensure all interned values are valid
Expand Down
13 changes: 9 additions & 4 deletions src/Amalgam/SeparableBoxFilterDataStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,10 @@ void SeparableBoxFilterDataStore::UpdateAllEntityLabels(Entity *entity, size_t e

for(auto &column_data : columnData)
{
//TODO 24298: switch this to use ChangeIndexValue if possible
column_data->RemoveIndexFromCaches(entity_index);
auto [value, found] = entity->GetValueAtLabelAsImmediateValue(column_data->stringId);
column_data->ChangeIndexValue(value.nodeType, value.nodeValue, entity_index);
column_data->InsertIndexValue(value.nodeType, value.nodeValue, entity_index);
}

//clean up any labels that aren't relevant
Expand Down Expand Up @@ -251,17 +253,20 @@ void SeparableBoxFilterDataStore::UpdateEntityLabel(Entity *entity, size_t entit
VerifyAllEntitiesForColumn(column_index);
#endif

//get the new value
//TODO 24298: switch this to use ChangeIndexValue if possible
column_data->RemoveIndexFromCaches(entity_index);
auto [value, found] = entity->GetValueAtLabelAsImmediateValue(column_data->stringId);

column_data->ChangeIndexValue(value.nodeType, value.nodeValue, entity_index);
column_data->InsertIndexValue(value.nodeType, value.nodeValue, entity_index);

//remove the label if no longer relevant
if(IsColumnIndexRemovable(column_index))
RemoveColumnIndex(column_index);
else
OptimizeColumn(column_index);

#ifdef SBFDS_VERIFICATION
VerifyAllEntitiesForColumn(column_index);
#endif
}

//populates distances_out with all entities and their distances that have a distance to target less than max_dist
Expand Down
2 changes: 1 addition & 1 deletion src/Amalgam/SeparableBoxFilterDataStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ class SeparableBoxFilterDataStore
inline void VerifyAllEntitiesForAllColumns()
{
for(auto &column_data : columnData)
column_data->VerifyAllEntities();
column_data->VerifyAllEntities(numEntities);
}

//deletes the index and associated data
Expand Down