Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ CMAKE_BUILD_PARALLEL_LEVEL ?= 16
.PHONY: ci
ci: format all-tests

Comment thread
Taepper marked this conversation as resolved.
.PHONY: conanprofile
conanprofile:
buildScripts/create-conanprofile

Expand Down
2 changes: 2 additions & 0 deletions documentation/input_format.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ defaultAminoAcidSequence: E
- `type`: One of `string`, `int`, `float`, `date`, `boolean`
- `generateIndex`: Set to `true` to create a bitmap index for fast equality lookups. This is only valid for `string` columns
- `generateLineageIndex`: Path to lineage definition file for hierarchical queries. This is only possible if `generateIndex` is also set
- `treatUnknownLineagesAsNull`: Treats unknown lineage values as null when adding them to the lineage index
- `isPhyloTreeField`: Mark this column as a phyloTreeField, which enables the phylogenetic queries. See [phylogenetic_queries.md](phylogenetic_queries.md)

`defaultNucleotideSequence` and `defaultAminoAcidSequence` are optional and set the default sequence to be searched for, so that the sequence name can be omitted in queries.

Expand Down
2 changes: 1 addition & 1 deletion src/silo/common/serialization_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1774967790
1776329324
8 changes: 8 additions & 0 deletions src/silo/config/database_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ bool YAML::convert<silo::config::DatabaseMetadata>::decode(
} else {
metadata.phylo_tree_node_identifier = false;
}
if (node["treatUnknownLineagesAsNull"].IsDefined()) {
metadata.treat_unknown_lineages_as_null = node["treatUnknownLineagesAsNull"].as<bool>();
} else {
metadata.treat_unknown_lineages_as_null = false;
}
Comment thread
Taepper marked this conversation as resolved.
return true;
}
YAML::Node YAML::convert<silo::config::DatabaseMetadata>::encode(
Expand All @@ -164,6 +169,9 @@ YAML::Node YAML::convert<silo::config::DatabaseMetadata>::encode(
if (metadata.phylo_tree_node_identifier) {
node["isPhyloTreeField"] = true;
}
if (metadata.treat_unknown_lineages_as_null) {
node["treatUnknownLineagesAsNull"] = true;
}
return node;
}

Expand Down
1 change: 1 addition & 0 deletions src/silo/config/database_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class DatabaseMetadata {
bool generate_index;
std::optional<std::string> generate_lineage_index;
bool phylo_tree_node_identifier;
bool treat_unknown_lineages_as_null;

[[nodiscard]] schema::ColumnType getColumnType() const;
};
Expand Down
2 changes: 1 addition & 1 deletion src/silo/initialize/initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void ColumnMetadataInitializer::operator()<storage::column::IndexedStringColumn>
);
}
metadata = std::make_shared<storage::column::IndexedStringColumn::Metadata>(
config_metadata.name, lineage_tree.value()
config_metadata.name, lineage_tree.value(), config_metadata.treat_unknown_lineages_as_null
);
} else {
metadata =
Expand Down
17 changes: 11 additions & 6 deletions src/silo/storage/column/indexed_string_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,24 @@ namespace silo::storage::column {

IndexedStringColumnMetadata::IndexedStringColumnMetadata(
std::string column_name,
common::LineageTreeAndIdMap lineage_tree_and_id_map
common::LineageTreeAndIdMap lineage_tree_and_id_map,
bool treat_unknown_lineages_as_null
)
: ColumnMetadata(std::move(column_name)),
dictionary(lineage_tree_and_id_map.lineage_id_lookup_map.copy()),
lineage_tree(std::move(lineage_tree_and_id_map)) {}
lineage_tree(std::move(lineage_tree_and_id_map)),
treat_unknown_lineages_as_null(treat_unknown_lineages_as_null) {}

IndexedStringColumnMetadata::IndexedStringColumnMetadata(
std::string column_name,
common::BidirectionalStringMap dictionary,
common::LineageTreeAndIdMap lineage_tree_and_id_map
common::LineageTreeAndIdMap lineage_tree_and_id_map,
bool treat_unknown_lineages_as_null
)
: ColumnMetadata(std::move(column_name)),
dictionary(std::move(dictionary)),
lineage_tree(std::move(lineage_tree_and_id_map)) {}
lineage_tree(std::move(lineage_tree_and_id_map)),
treat_unknown_lineages_as_null(treat_unknown_lineages_as_null) {}

IndexedStringColumn::IndexedStringColumn(IndexedStringColumnMetadata* metadata)
: metadata(metadata) {
Expand Down Expand Up @@ -57,15 +61,16 @@ std::expected<void, std::string> IndexedStringColumn::insert(std::string_view va

if (lineage_index.has_value()) {
const auto value_id = metadata->dictionary.getId(value);
if (!value_id.has_value()) {
if (value_id.has_value()) {
lineage_index.value().insert(row_id, value_id.value());
} else if (!metadata->treat_unknown_lineages_as_null) {
return std::unexpected(fmt::format(
"The value '{}' is not a valid lineage value for column '{}'. "
"Is your lineage definition file outdated?",
value,
metadata->column_name
));
}
lineage_index->insert(row_id, value_id.value());
}

const Idx value_id = metadata->dictionary.getOrCreateId(value);
Expand Down
15 changes: 12 additions & 3 deletions src/silo/storage/column/indexed_string_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class IndexedStringColumnMetadata : public ColumnMetadata {
public:
common::BidirectionalStringMap dictionary;
std::optional<common::LineageTreeAndIdMap> lineage_tree;
bool treat_unknown_lineages_as_null = false;

explicit IndexedStringColumnMetadata(std::string column_name)
: ColumnMetadata(std::move(column_name)) {}
Expand All @@ -40,13 +41,15 @@ class IndexedStringColumnMetadata : public ColumnMetadata {

IndexedStringColumnMetadata(
std::string column_name,
common::LineageTreeAndIdMap lineage_tree_and_id_map
common::LineageTreeAndIdMap lineage_tree_and_id_map,
bool treat_unknown_lineages_as_null
);

IndexedStringColumnMetadata(
std::string column_name,
silo::common::BidirectionalStringMap dictionary,
common::LineageTreeAndIdMap lineage_tree_and_id_map
common::LineageTreeAndIdMap lineage_tree_and_id_map,
bool treat_unknown_lineages_as_null
);

IndexedStringColumnMetadata() = delete;
Expand Down Expand Up @@ -132,6 +135,7 @@ template <class Archive>
archive & object.column_name;
archive & object.dictionary;
archive & object.lineage_tree;
archive & object.treat_unknown_lineages_as_null;
}
} // namespace boost::serialization

Expand All @@ -146,12 +150,17 @@ template <class Archive>
std::string column_name;
silo::common::BidirectionalStringMap dictionary;
std::optional<silo::common::LineageTreeAndIdMap> lineage_tree;
bool treat_unknown_lineages_as_null;
archive & column_name;
archive & dictionary;
archive & lineage_tree;
archive & treat_unknown_lineages_as_null;
if (lineage_tree.has_value()) {
object = std::make_shared<silo::storage::column::IndexedStringColumnMetadata>(
std::move(column_name), std::move(dictionary), std::move(lineage_tree.value())
std::move(column_name),
std::move(dictionary),
std::move(lineage_tree.value()),
treat_unknown_lineages_as_null
);
} else {
object = std::make_shared<silo::storage::column::IndexedStringColumnMetadata>(
Expand Down
29 changes: 25 additions & 4 deletions src/silo/storage/column/indexed_string_column.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TEST(IndexedStringColumn, addingLineageAndThenSublineageFiltersCorrectly) {
auto lineage_definition = LineageTreeAndIdMap::fromLineageDefinitionFilePath(
"testBaseData/exampleDataset/lineage_definition.yaml"
);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition, false);
IndexedStringColumn under_test{&column_metadata};

ASSERT_TRUE(under_test.insert({"BA.1.1"}));
Expand Down Expand Up @@ -90,7 +90,7 @@ TEST(IndexedStringColumn, addingSublineageAndThenLineageFiltersCorrectly) {
auto lineage_definition = LineageTreeAndIdMap::fromLineageDefinitionFilePath(
"testBaseData/exampleDataset/lineage_definition.yaml"
);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition, false);
IndexedStringColumn under_test{&column_metadata};

ASSERT_TRUE(under_test.insert({"BA.1.1.1"}));
Expand Down Expand Up @@ -138,7 +138,7 @@ TEST(IndexedStringColumn, queryParentLineageThatWasNeverInserted) {
auto lineage_definition = LineageTreeAndIdMap::fromLineageDefinitionFilePath(
"testBaseData/exampleDataset/lineage_definition.yaml"
);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition, false);
IndexedStringColumn under_test{&column_metadata};

ASSERT_TRUE(under_test.insert({"BA.1.1.1"}));
Expand Down Expand Up @@ -168,7 +168,7 @@ A: {}
A.1:
parents: ["A"]
)"));
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition);
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition, false);
IndexedStringColumn under_test{&column_metadata};
ASSERT_TRUE(under_test.insert({"A"}));
auto success = under_test.insert({"A.2"});
Expand All @@ -180,4 +180,25 @@ A.1:
);
}

TEST(IndexedStringColumn, ignoringErrorWhenInsertingIncorrectLineagesIfSpecified) {
auto lineage_definition =
LineageTreeAndIdMap::fromLineageDefinitionFile(LineageDefinitionFile::fromYAMLString(R"(
A: {}
A.1:
parents: ["A"]
)"));
IndexedStringColumnMetadata column_metadata("some_column", lineage_definition, true);
IndexedStringColumn under_test{&column_metadata};
ASSERT_TRUE(under_test.insert({"A"}));
ASSERT_TRUE(under_test.insert({"not in the lineage hierarchy"}));
Comment thread
Taepper marked this conversation as resolved.
EXPECT_EQ(
*under_test.getLineageIndex()
->filterIncludingSublineages(
under_test.getValueId("A").value(), RecombinantEdgeFollowingMode::DO_NOT_FOLLOW
)
.value(),
roaring::Roaring({0})
);
}

// NOLINTEND(bugprone-unchecked-optional-access)
2 changes: 0 additions & 2 deletions testBaseData/siloSerializedState/1774967818/data_version.silo

This file was deleted.

2 changes: 2 additions & 0 deletions testBaseData/siloSerializedState/1776329347/data_version.silo
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
timestamp: 1776329347
serializationVersion: 1776329324
Binary file not shown.
Binary file not shown.