Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions documentation/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ Returns metadata about the loaded database.
"version": "0.1.0",
"sequenceCount": 100,
"horizontalBitmapsSize": 5594,
"verticalBitmapsSize": 28102,
"numberOfPartitions": 1
"verticalBitmapsSize": 28102
}
```

Expand All @@ -68,7 +67,6 @@ Returns metadata about the loaded database.
| `sequenceCount` | Total number of sequences in the database |
| `horizontalBitmapsSize` | Size of horizontal bitmap indexes (bytes) |
| `verticalBitmapsSize` | Size of vertical bitmap indexes (bytes) |
| `numberOfPartitions` | Number of table partitions |

---

Expand Down
6 changes: 2 additions & 4 deletions documentation/data_directories.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@ output/
1700000000/
data_version.silo
database_schema.silo
default/
... (partition data)
default.silo
1700100000/
data_version.silo
database_schema.silo
default/
...
default.silo
```

### data_version.silo format
Expand Down
1 change: 0 additions & 1 deletion endToEndTests/test/info.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ describe('The /info endpoint', () => {
sequenceCount: 100,
horizontalBitmapsSize: 5595,
verticalBitmapsSize: 28102,
numberOfPartitions: 1,
});
});
});
Expand Down
3 changes: 1 addition & 2 deletions src/silo/api/lineage_definition_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ void LineageDefinitionHandler::get(
throw BadRequest("The column {} is not of type indexed-string.", column_name);
}
auto* metadata =
table->second->schema
->getColumnMetadata<storage::column::IndexedStringColumnPartition>(column_name)
table->second->schema->getColumnMetadata<storage::column::IndexedStringColumn>(column_name)
.value();
if (!metadata->lineage_tree.has_value()) {
throw BadRequest("The column {} does not have a lineageIndex defined.", column_name);
Expand Down
71 changes: 20 additions & 51 deletions src/silo/append/database_inserter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ std::expected<simdjson::ondemand::value, std::string> findFieldManual(

std::expected<simdjson::ondemand::value, std::string> findFieldWithFallbacks(
simdjson::ondemand::object& object,
const TablePartitionInserter::SniffedField& sniffed_field
const TableInserter::SniffedField& sniffed_field
) {
simdjson::ondemand::value column_value;
auto error = object.find_field(sniffed_field.escaped_key).get(column_value);
Expand Down Expand Up @@ -108,10 +108,11 @@ std::expected<simdjson::ondemand::object, std::string> iterateToObject(

} // namespace

std::expected<std::vector<TablePartitionInserter::SniffedField>, std::string>
TablePartitionInserter::sniffFieldOrder(simdjson::ondemand::document_reference ndjson_line) const {
std::expected<std::vector<TableInserter::SniffedField>, std::string> TableInserter::sniffFieldOrder(
simdjson::ondemand::document_reference ndjson_line
) const {
std::vector<SniffedField> order_in_json_line;
auto columns_in_table = table_partition->columns.metadata;
auto columns_in_table = table->columns.metadata;
ASSIGN_OR_RAISE(auto object, iterateToObject(ndjson_line));
for (auto maybe_field : object) {
ASSIGN_OR_RAISE_SIMDJSON(
Expand Down Expand Up @@ -161,64 +162,46 @@ TablePartitionInserter::sniffFieldOrder(simdjson::ondemand::document_reference n
return order_in_json_line;
}

std::expected<void, std::string> TablePartitionInserter::insert(
std::expected<void, std::string> TableInserter::insert(
simdjson::ondemand::document_reference ndjson_line,
const std::vector<TablePartitionInserter::SniffedField>& field_order_hint
const std::vector<TableInserter::SniffedField>& field_order_hint
) const {
EVOBENCH_SCOPE_EVERY(20, "TablePartitionInserter", "insert");
EVOBENCH_SCOPE_EVERY(20, "TableInserter", "insert");
ASSIGN_OR_RAISE(auto object, iterateToObject(ndjson_line));
for (const auto& sniffed_field : field_order_hint) {
ASSIGN_OR_RAISE(auto column_value, findFieldWithFallbacks(object, sniffed_field));
auto success_or_error = table_partition->columns.addJsonValueToColumn(
sniffed_field.column_identifier, column_value
);
auto success_or_error =
table->columns.addJsonValueToColumn(sniffed_field.column_identifier, column_value);
if (!success_or_error.has_value()) {
return success_or_error;
}
}
table_partition->sequence_count++;
table->sequence_count++;
return {};
}
Comment thread
Taepper marked this conversation as resolved.

TablePartitionInserter::Commit TablePartitionInserter::commit() const {
table_partition->finalize();
table_partition->validate();
return Commit{};
}

TablePartitionInserter TableInserter::openNewPartition() const {
if (table->getNumberOfPartitions() == 0) {
return TablePartitionInserter{table->addPartition()};
}
return TablePartitionInserter{table->getPartition(0)};
}

TablePartitionInserter TableInserter::openLastPartition() const {
if (table->getNumberOfPartitions() == 0) {
return openNewPartition();
}
return TablePartitionInserter{table->getPartition(table->getNumberOfPartitions() - 1)};
}

TableInserter::Commit TableInserter::commit() const {
try {
table->finalize();
table->validate();
return Commit{};
} catch (const silo::schema::DuplicatePrimaryKeyException& exception) {
throw silo::append::AppendException(exception.what());
}
}

TablePartitionInserter::Commit appendDataToTablePartition(
const TablePartitionInserter& partition_inserter,
TableInserter::Commit appendDataToTable(
std::shared_ptr<silo::storage::Table> table,
NdjsonLineReader& input_data
) {
EVOBENCH_SCOPE("TablePartitionInserter", "appendDataToTablePartition");
EVOBENCH_SCOPE("TableInserter", "appendDataToTable");
const TableInserter table_inserter(std::move(table));

size_t line_count = 0;

bool first_line = true;

std::vector<TablePartitionInserter::SniffedField> sniffed_field_order;
std::vector<TableInserter::SniffedField> sniffed_field_order;
for (auto [json_obj_or_error, raw_line] : input_data) {
simdjson::ondemand::document_reference ndjson_line;
auto error = json_obj_or_error.get(ndjson_line);
Expand All @@ -231,7 +214,7 @@ TablePartitionInserter::Commit appendDataToTablePartition(
}

if (first_line) {
auto sniffed_field_order_or_error = partition_inserter.sniffFieldOrder(ndjson_line);
auto sniffed_field_order_or_error = table_inserter.sniffFieldOrder(ndjson_line);
if (!sniffed_field_order_or_error.has_value()) {
throw AppendException{
"{} - current line: {}", sniffed_field_order_or_error.error(), raw_line
Expand All @@ -241,7 +224,7 @@ TablePartitionInserter::Commit appendDataToTablePartition(
first_line = false;
}

auto maybe_error = partition_inserter.insert(ndjson_line, sniffed_field_order);
auto maybe_error = table_inserter.insert(ndjson_line, sniffed_field_order);
if (!maybe_error.has_value()) {
throw AppendException{"{} - current line: {}", maybe_error.error(), raw_line};
}
Expand All @@ -252,20 +235,6 @@ TablePartitionInserter::Commit appendDataToTablePartition(
}
}

return partition_inserter.commit();
}

TableInserter::Commit appendDataToTable(
std::shared_ptr<silo::storage::Table> table,
NdjsonLineReader& input_data
) {
const TableInserter table_inserter(std::move(table));

// TODO(#738) make partition configurable
auto table_partition = table_inserter.openLastPartition();

appendDataToTablePartition(table_partition, input_data);

return table_inserter.commit();
}

Expand Down
38 changes: 6 additions & 32 deletions src/silo/append/database_inserter.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,21 @@

#include "silo/append/ndjson_line_reader.h"
#include "silo/storage/table.h"
#include "silo/storage/table_partition.h"

namespace silo::append {

class TablePartitionInserter {
std::shared_ptr<storage::TablePartition> table_partition;
class TableInserter {
std::shared_ptr<storage::Table> table;

public:
class Commit {
friend class TablePartitionInserter;
friend class TableInserter;

Commit() = default;
};

explicit TablePartitionInserter(std::shared_ptr<storage::TablePartition> table_partition)
: table_partition(std::move(table_partition)) {}
explicit TableInserter(std::shared_ptr<storage::Table> table)
: table(std::move(table)) {}

struct SniffedField {
silo::schema::ColumnIdentifier column_identifier;
Expand All @@ -47,34 +46,9 @@ class TablePartitionInserter {
[[nodiscard]] Commit commit() const;
};

class TableInserter {
std::shared_ptr<storage::Table> table;

public:
class Commit {
friend class TableInserter;

Commit() = default;
};

explicit TableInserter(std::shared_ptr<storage::Table> table)
: table(std::move(table)) {}

[[nodiscard]] TablePartitionInserter openNewPartition() const;

[[nodiscard]] TablePartitionInserter openLastPartition() const;

[[nodiscard]] Commit commit() const;
};

TablePartitionInserter::Commit appendDataToTablePartition(
const TablePartitionInserter& partition_inserter,
NdjsonLineReader& input_data
);

TableInserter::Commit appendDataToTable(
std::shared_ptr<silo::storage::Table> table,
NdjsonLineReader& input_data
);

} // namespace silo::append
} // namespace silo::append
4 changes: 2 additions & 2 deletions src/silo/common/aa_symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace silo {

namespace storage::column {
template <typename SymbolType>
class SequenceColumnPartition;
class SequenceColumn;
}

class AminoAcid {
Expand Down Expand Up @@ -52,7 +52,7 @@ class AminoAcid {
};

static constexpr schema::ColumnType COLUMN_TYPE = schema::ColumnType::AMINO_ACID_SEQUENCE;
using Column = storage::column::SequenceColumnPartition<AminoAcid>;
using Column = storage::column::SequenceColumn<AminoAcid>;

static constexpr uint32_t COUNT = 28;
static_assert(COUNT == static_cast<uint32_t>(Symbol::X) + 1);
Expand Down
4 changes: 2 additions & 2 deletions src/silo/common/nucleotide_symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace silo {

namespace storage::column {
template <typename SymbolType>
class SequenceColumnPartition;
class SequenceColumn;
}

class Nucleotide {
Expand All @@ -40,7 +40,7 @@ class Nucleotide {
};

static constexpr schema::ColumnType COLUMN_TYPE = schema::ColumnType::NUCLEOTIDE_SEQUENCE;
using Column = storage::column::SequenceColumnPartition<Nucleotide>;
using Column = storage::column::SequenceColumn<Nucleotide>;

static constexpr uint32_t COUNT = 16;

Expand Down
2 changes: 1 addition & 1 deletion src/silo/common/serialization_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1774509839
1774967790
Loading