Skip to content

Commit 3a46b3a

Browse files
authored
Merge branch 'main' into retain-float-format-docs
2 parents 4909490 + 82bdcce commit 3a46b3a

36 files changed

+1375
-75
lines changed

components/core/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ set(SOURCE_FILES_clp_s_unitTest
357357
src/clp_s/FileReader.hpp
358358
src/clp_s/FileWriter.cpp
359359
src/clp_s/FileWriter.hpp
360+
src/clp_s/FloatFormatEncoding.cpp
361+
src/clp_s/FloatFormatEncoding.hpp
360362
src/clp_s/InputConfig.cpp
361363
src/clp_s/InputConfig.hpp
362364
src/clp_s/JsonConstructor.cpp
@@ -700,6 +702,7 @@ set(SOURCE_FILES_unitTest
700702
tests/test-ffi_KeyValuePairLogEvent.cpp
701703
tests/test-ffi_SchemaTree.cpp
702704
tests/test-FileDescriptorReader.cpp
705+
tests/test-FloatFormatEncoding.cpp
703706
tests/test-GlobalMetadataDBConfig.cpp
704707
tests/test-GrepCore.cpp
705708
tests/test-hash_utils.cpp

components/core/cmake/Options/options.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ function(set_clp_s_archivereader_dependencies)
213213
CLP_NEED_MSGPACKCXX
214214
CLP_NEED_NLOHMANN_JSON
215215
CLP_NEED_SPDLOG
216+
CLP_NEED_YSTDLIB
216217
)
217218
endfunction()
218219

@@ -234,6 +235,7 @@ function(set_clp_s_archivewriter_dependencies)
234235
CLP_NEED_NLOHMANN_JSON
235236
CLP_NEED_SIMDJSON
236237
CLP_NEED_SPDLOG
238+
CLP_NEED_YSTDLIB
237239
)
238240
endfunction()
239241

components/core/src/clp_s/ArchiveReader.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ BaseColumnReader* ArchiveReader::append_reader_column(SchemaReader& reader, int3
194194
case NodeType::Float:
195195
column_reader = new FloatColumnReader(column_id);
196196
break;
197+
case NodeType::FormattedFloat:
198+
column_reader = new FormattedFloatColumnReader(column_id);
199+
break;
200+
case NodeType::DictionaryFloat:
201+
column_reader = new DictionaryFloatColumnReader(column_id, m_var_dict);
202+
break;
197203
case NodeType::ClpString:
198204
column_reader = new ClpStringColumnReader(column_id, m_var_dict, m_log_dict);
199205
break;
@@ -247,6 +253,12 @@ void ArchiveReader::append_unordered_reader_columns(
247253
case NodeType::Float:
248254
column_reader = new FloatColumnReader(column_id);
249255
break;
256+
case NodeType::FormattedFloat:
257+
column_reader = new FormattedFloatColumnReader(column_id);
258+
break;
259+
case NodeType::DictionaryFloat:
260+
column_reader = new DictionaryFloatColumnReader(column_id, m_var_dict);
261+
break;
250262
case NodeType::ClpString:
251263
column_reader = new ClpStringColumnReader(column_id, m_var_dict, m_log_dict);
252264
break;

components/core/src/clp_s/ArchiveWriter.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,12 @@ void ArchiveWriter::initialize_schema_writer(SchemaWriter* writer, Schema const&
316316
case NodeType::Float:
317317
writer->append_column(new FloatColumnWriter(id));
318318
break;
319+
case NodeType::FormattedFloat:
320+
writer->append_column(new FormattedFloatColumnWriter(id));
321+
break;
322+
case NodeType::DictionaryFloat:
323+
writer->append_column(new DictionaryFloatColumnWriter(id, m_var_dict));
324+
break;
319325
case NodeType::ClpString:
320326
writer->append_column(new ClpStringColumnWriter(id, m_var_dict, m_log_dict));
321327
break;

components/core/src/clp_s/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ set(
232232
DictionaryWriter.cpp
233233
DictionaryWriter.hpp
234234
ErrorCode.hpp
235+
FloatFormatEncoding.cpp
236+
FloatFormatEncoding.hpp
235237
JsonFileIterator.cpp
236238
JsonFileIterator.hpp
237239
JsonParser.cpp
@@ -274,6 +276,7 @@ if(CLP_BUILD_CLP_S_ARCHIVEWRITER)
274276
msgpack-cxx
275277
nlohmann_json::nlohmann_json
276278
simdjson::simdjson
279+
ystdlib::error_handling
277280
PRIVATE
278281
Boost::url
279282
clp_s::clp_dependencies
@@ -299,6 +302,8 @@ set(
299302
DictionaryEntry.hpp
300303
DictionaryReader.hpp
301304
ErrorCode.hpp
305+
FloatFormatEncoding.cpp
306+
FloatFormatEncoding.hpp
302307
JsonSerializer.hpp
303308
PackedStreamReader.cpp
304309
PackedStreamReader.hpp
@@ -335,6 +340,7 @@ if(CLP_BUILD_CLP_S_ARCHIVEREADER)
335340
clp_s::io
336341
msgpack-cxx
337342
nlohmann_json::nlohmann_json
343+
ystdlib::error_handling
338344
PRIVATE
339345
Boost::url
340346
clp_s::clp_dependencies

components/core/src/clp_s/ColumnReader.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
#include "ColumnReader.hpp"
22

3+
#include <cstddef>
4+
#include <cstdint>
5+
#include <string>
6+
#include <variant>
7+
38
#include "../clp/EncodedVariableInterpreter.hpp"
49
#include "BufferViewReader.hpp"
510
#include "ColumnWriter.hpp"
11+
#include "FloatFormatEncoding.hpp"
612
#include "Utils.hpp"
713

814
namespace clp_s {
@@ -50,6 +56,11 @@ void FloatColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
5056
m_values = reader.read_unaligned_span<double>(num_messages);
5157
}
5258

59+
void FormattedFloatColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
60+
m_values = reader.read_unaligned_span<double>(num_messages);
61+
m_formats = reader.read_unaligned_span<float_format_t>(num_messages);
62+
}
63+
5364
void
5465
Int64ColumnReader::extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) {
5566
buffer.append(std::to_string(m_values[cur_message]));
@@ -68,6 +79,12 @@ std::variant<int64_t, double, std::string, uint8_t> FloatColumnReader::extract_v
6879
return m_values[cur_message];
6980
}
7081

82+
std::variant<int64_t, double, std::string, uint8_t> FormattedFloatColumnReader::extract_value(
83+
uint64_t cur_message
84+
) {
85+
return m_values[cur_message];
86+
}
87+
7188
void BooleanColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
7289
m_values = reader.read_unaligned_span<uint8_t>(num_messages);
7390
}
@@ -77,12 +94,36 @@ FloatColumnReader::extract_string_value_into_buffer(uint64_t cur_message, std::s
7794
buffer.append(std::to_string(m_values[cur_message]));
7895
}
7996

97+
void FormattedFloatColumnReader::extract_string_value_into_buffer(
98+
uint64_t cur_message,
99+
std::string& buffer
100+
) {
101+
buffer.append(restore_encoded_float(m_values[cur_message], m_formats[cur_message]).value());
102+
}
103+
80104
std::variant<int64_t, double, std::string, uint8_t> BooleanColumnReader::extract_value(
81105
uint64_t cur_message
82106
) {
83107
return m_values[cur_message];
84108
}
85109

110+
void DictionaryFloatColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
111+
m_var_dict_ids = reader.read_unaligned_span<variable_dictionary_id_t>(num_messages);
112+
}
113+
114+
std::variant<int64_t, double, std::string, uint8_t> DictionaryFloatColumnReader::extract_value(
115+
uint64_t cur_message
116+
) {
117+
return std::stod(m_var_dict->get_value(m_var_dict_ids[cur_message]));
118+
}
119+
120+
void DictionaryFloatColumnReader::extract_string_value_into_buffer(
121+
uint64_t cur_message,
122+
std::string& buffer
123+
) {
124+
buffer.append(m_var_dict->get_value(m_var_dict_ids[cur_message]));
125+
}
126+
86127
void ClpStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
87128
m_logtypes = reader.read_unaligned_span<uint64_t>(num_messages);
88129
size_t encoded_vars_length = reader.read_value<size_t>();

components/core/src/clp_s/ColumnReader.hpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#ifndef CLP_S_COLUMNREADER_HPP
22
#define CLP_S_COLUMNREADER_HPP
33

4+
#include <cstddef>
5+
#include <cstdint>
46
#include <string>
57
#include <variant>
68

79
#include "BufferViewReader.hpp"
810
#include "DictionaryReader.hpp"
11+
#include "FloatFormatEncoding.hpp"
912
#include "SchemaTree.hpp"
1013
#include "TimestampDictionaryReader.hpp"
1114
#include "Utils.hpp"
@@ -147,6 +150,66 @@ class FloatColumnReader : public BaseColumnReader {
147150
UnalignedMemSpan<double> m_values;
148151
};
149152

153+
class FormattedFloatColumnReader : public BaseColumnReader {
154+
public:
155+
// Constructor
156+
explicit FormattedFloatColumnReader(int32_t id) : BaseColumnReader(id) {}
157+
158+
// Destructor
159+
~FormattedFloatColumnReader() override = default;
160+
161+
// Methods inherited from BaseColumnReader
162+
void load(BufferViewReader& reader, uint64_t num_messages) override;
163+
164+
NodeType get_type() override { return NodeType::FormattedFloat; }
165+
166+
std::variant<int64_t, double, std::string, uint8_t> extract_value(
167+
uint64_t cur_message
168+
) override;
169+
170+
/**
171+
* Appends the floating point value to the buffer in its original format by decoding the stored
172+
* format information.
173+
*
174+
* @param cur_message
175+
* @param buffer
176+
*/
177+
void extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) override;
178+
179+
private:
180+
UnalignedMemSpan<double> m_values;
181+
UnalignedMemSpan<float_format_t> m_formats;
182+
};
183+
184+
class DictionaryFloatColumnReader : public BaseColumnReader {
185+
public:
186+
// Constructor
187+
explicit DictionaryFloatColumnReader(
188+
int32_t id,
189+
std::shared_ptr<VariableDictionaryReader> var_dict
190+
)
191+
: BaseColumnReader(id),
192+
m_var_dict{std::move(var_dict)} {}
193+
194+
// Destructor
195+
~DictionaryFloatColumnReader() override = default;
196+
197+
// Methods inherited from BaseColumnReader
198+
void load(BufferViewReader& reader, uint64_t num_messages) override;
199+
200+
NodeType get_type() override { return NodeType::DictionaryFloat; }
201+
202+
std::variant<int64_t, double, std::string, uint8_t> extract_value(
203+
uint64_t cur_message
204+
) override;
205+
206+
void extract_string_value_into_buffer(uint64_t cur_message, std::string& buffer) override;
207+
208+
private:
209+
std::shared_ptr<VariableDictionaryReader> m_var_dict;
210+
UnalignedMemSpan<variable_dictionary_id_t> m_var_dict_ids;
211+
};
212+
150213
class BooleanColumnReader : public BaseColumnReader {
151214
public:
152215
// Constructor

components/core/src/clp_s/ColumnWriter.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "ColumnWriter.hpp"
22

3+
#include <algorithm>
4+
#include <cassert>
5+
#include <cctype>
36
#include <cstdint>
47
#include <variant>
58

@@ -46,6 +49,33 @@ void FloatColumnWriter::store(ZstdCompressor& compressor) {
4649
compressor.write(reinterpret_cast<char const*>(m_values.data()), size);
4750
}
4851

52+
size_t FormattedFloatColumnWriter::add_value(ParsedMessage::variable_t& value) {
53+
auto const& [float_value, format]{std::get<std::pair<double, float_format_t>>(value)};
54+
m_values.push_back(float_value);
55+
m_formats.push_back(format);
56+
return sizeof(double) + sizeof(float_format_t);
57+
}
58+
59+
void FormattedFloatColumnWriter::store(ZstdCompressor& compressor) {
60+
assert(m_formats.size() == m_values.size());
61+
auto const values_size = m_values.size() * sizeof(double);
62+
auto const format_size = m_formats.size() * sizeof(float_format_t);
63+
compressor.write(reinterpret_cast<char const*>(m_values.data()), values_size);
64+
compressor.write(reinterpret_cast<char const*>(m_formats.data()), format_size);
65+
}
66+
67+
size_t DictionaryFloatColumnWriter::add_value(ParsedMessage::variable_t& value) {
68+
clp::variable_dictionary_id_t id{};
69+
m_var_dict->add_entry(std::get<std::string>(value), id);
70+
m_var_dict_ids.push_back(id);
71+
return sizeof(clp::variable_dictionary_id_t);
72+
}
73+
74+
void DictionaryFloatColumnWriter::store(ZstdCompressor& compressor) {
75+
auto size{m_var_dict_ids.size() * sizeof(clp::variable_dictionary_id_t)};
76+
compressor.write(reinterpret_cast<char const*>(m_var_dict_ids.data()), size);
77+
}
78+
4979
size_t BooleanColumnWriter::add_value(ParsedMessage::variable_t& value) {
5080
m_values.push_back(std::get<bool>(value) ? 1 : 0);
5181
return sizeof(uint8_t);

components/core/src/clp_s/ColumnWriter.hpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "../clp/Defs.h"
88
#include "DictionaryWriter.hpp"
99
#include "FileWriter.hpp"
10+
#include "FloatFormatEncoding.hpp"
1011
#include "ParsedMessage.hpp"
1112
#include "TimestampDictionaryWriter.hpp"
1213
#include "ZstdCompressor.hpp"
@@ -98,6 +99,44 @@ class FloatColumnWriter : public BaseColumnWriter {
9899
std::vector<double> m_values;
99100
};
100101

102+
class FormattedFloatColumnWriter : public BaseColumnWriter {
103+
public:
104+
// Constructor
105+
explicit FormattedFloatColumnWriter(int32_t id) : BaseColumnWriter(id) {}
106+
107+
// Destructor
108+
~FormattedFloatColumnWriter() override = default;
109+
110+
// Methods inherited from BaseColumnWriter
111+
size_t add_value(ParsedMessage::variable_t& value) override;
112+
113+
void store(ZstdCompressor& compressor) override;
114+
115+
private:
116+
std::vector<double> m_values;
117+
std::vector<float_format_t> m_formats;
118+
};
119+
120+
class DictionaryFloatColumnWriter : public BaseColumnWriter {
121+
public:
122+
// Constructor
123+
DictionaryFloatColumnWriter(int32_t id, std::shared_ptr<VariableDictionaryWriter> var_dict)
124+
: BaseColumnWriter(id),
125+
m_var_dict(std::move(var_dict)) {}
126+
127+
// Destructor
128+
~DictionaryFloatColumnWriter() override = default;
129+
130+
// Methods inherited from BaseColumnWriter
131+
size_t add_value(ParsedMessage::variable_t& value) override;
132+
133+
void store(ZstdCompressor& compressor) override;
134+
135+
private:
136+
std::shared_ptr<VariableDictionaryWriter> m_var_dict;
137+
std::vector<clp::variable_dictionary_id_t> m_var_dict_ids;
138+
};
139+
101140
class BooleanColumnWriter : public BaseColumnWriter {
102141
public:
103142
// Constructor

components/core/src/clp_s/CommandLineArguments.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
244244
"print-archive-stats",
245245
po::bool_switch(&m_print_archive_stats),
246246
"Print statistics (json) about the archive after it's compressed."
247+
)(
248+
"retain-float-format",
249+
po::bool_switch(&m_retain_float_format),
250+
"Store extra information to losslessly decompress floats."
247251
)(
248252
"single-file-archive",
249253
po::bool_switch(&m_single_file_archive),

0 commit comments

Comments
 (0)