Skip to content

Commit

Permalink
Reduce attribute accumulation memory consumption (#318)
Browse files Browse the repository at this point in the history
* Add a flag to use an H3 index for the feature index

* Give mvt_value and serial_val a double-with-count concept

* Switch mean over to internal accumulation state

* Get rid of the attribute accumulation map

* Change vectors of features to vectors of pointers to features

* Fix --coalesce

* Revert "Add a flag to use an H3 index for the feature index"

This reverts commit b9b48f4.

* Update version and changelog
  • Loading branch information
e-n-f authored Jan 31, 2025
1 parent 390c362 commit 583fc37
Show file tree
Hide file tree
Showing 11 changed files with 243 additions and 232 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 2.75.0

* Reduce memory consumption in attribute accumulation and feature sorting

# 2.74.0

* Add the option to join attributes from a sqlite database in tile-join
Expand Down
55 changes: 17 additions & 38 deletions attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribut
}

template <class T>
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<T> &full_values, key_pool &key_pool) {
for (size_t i = 0; i < full_keys.size(); i++) {
if (key == *full_keys[i]) {
switch (op) {
Expand Down Expand Up @@ -120,19 +120,13 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
}

case op_mean: {
auto state = attribute_accum_state.find(key);
if (state == attribute_accum_state.end()) {
accum_state s;
s.sum = full_values[i].to_double() + val.to_double();
s.count = 2;
attribute_accum_state.insert(std::pair<std::string, accum_state>(key, s));

full_values[i] = (s.sum / s.count);
size_t count = full_values[i].get_count();
if (count <= 1) {
full_values[i].set_double_count((full_values[i].to_double() + val.to_double()) / 2, 2);
} else {
state->second.sum += val.to_double();
state->second.count += 1;

full_values[i] = (state->second.sum / state->second.count);
double sum = full_values[i].to_double() * count + val.to_double();
count++;
full_values[i].set_double_count(sum / count, count);
}
return;
}
Expand All @@ -146,16 +140,11 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
return;

case op_count: {
auto state = attribute_accum_state.find(key);
if (state == attribute_accum_state.end()) { // not already present
accum_state s;
s.count = 2;
attribute_accum_state.insert(std::pair<std::string, accum_state>(key, s));

full_values[i] = (s.count);
} else { // already present, incrementing
state->second.count += 1;
full_values[i] = (state->second.count);
size_t count = full_values[i].get_count();
if (count <= 1) {
full_values[i].set_double_count(2, 2);
} else {
full_values[i].set_double_count(count + 1, count + 1);
}
return;
}
Expand All @@ -174,17 +163,7 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
break;

case op_count: {
auto state = attribute_accum_state.find(key);
if (state == attribute_accum_state.end()) { // not already present
accum_state s;
s.count = 1;
attribute_accum_state.insert(std::pair<std::string, accum_state>(key, s));

v = (s.count);
} else { // already present, incrementing
fprintf(stderr, "preserve_attribute: can't happen (count)\n");
exit(EXIT_IMPOSSIBLE);
}
v.set_double_count(1, 1);
break;
}

Expand All @@ -197,10 +176,10 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
full_values.push_back(v);
}

void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, key_pool);
}

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, key_pool);
}
9 changes: 2 additions & 7 deletions attribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,14 @@ enum attribute_op {
op_count,
};

struct accum_state {
double sum = 0;
double count = 0;
};

struct serial_val;
struct key_pool;

void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, std::string name, std::string type);
void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, const char *arg, char **argv);

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, key_pool &key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, key_pool &key_pool);

extern std::map<std::string, attribute_op> numeric_operations;

Expand Down
22 changes: 10 additions & 12 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1371,13 +1371,12 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
};

// accumulate :sum:, :min:, :max:, and :count: versions of the specified attribute
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::shared_ptr<std::string>> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state, // accumulation state for preserve_attribute()
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::shared_ptr<std::string>> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
key_pool &key_pool,
std::set<std::string> const &keep, std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix) {
Expand Down Expand Up @@ -1457,7 +1456,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
full_values.push_back(v);
} else {
full_values.push_back(full_values[out_attr->second]);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, key_pool);
}
}
} else {
Expand All @@ -1470,7 +1469,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val,
full_values[prefixed_attr->second] = mvt_value(mvt_value_to_long_long(full_values[prefixed_attr->second]) + 1);
}
} else {
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, key_pool);
}
}
}
Expand Down Expand Up @@ -1597,7 +1596,6 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// attributes from the other features of the
// multiplier cluster accumulated onto them

std::unordered_map<std::string, accum_state> attribute_accum_state;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;
Expand Down Expand Up @@ -1642,13 +1640,13 @@ static bool feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto found = attribute_accum.find(key);
if (found != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(found->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
preserve_attribute(found->second, key, val, full_keys, full_values, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state, key_pool,
keys, numeric_out_field, key_pool,
keep, exclude, exclude_prefix);
}
}
Expand Down
15 changes: 14 additions & 1 deletion mvt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ struct mvt_value;
double mvt_value_to_double(mvt_value const &v);

struct mvt_value {
mvt_value_type type;
long /* mvt_value_type */ type : 5;
long count : 64 - 5;
std::shared_ptr<std::string> s;

union {
Expand Down Expand Up @@ -138,18 +139,30 @@ struct mvt_value {
return mvt_value_to_double(*this);
}

size_t get_count() const {
return count;
}

bool operator<(const mvt_value &o) const;
bool operator==(const mvt_value &o) const;
std::string toString() const;

mvt_value() {
this->type = mvt_double;
this->numeric_value.double_value = 0;
this->count = 0;
}

mvt_value(double v) {
this->type = mvt_double;
this->numeric_value.double_value = v;
this->count = 0;
}

void set_double_count(double v, size_t c) {
this->type = mvt_double;
this->numeric_value.double_value = v;
this->count = c;
}
};

Expand Down
15 changes: 14 additions & 1 deletion serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ struct serial_val {
type = mvt_string;
s = val;
}

size_t get_count() const {
size_t found = s.find('\0');
if (found == std::string::npos) {
return 0;
} else {
return atoll(s.c_str() + found + 1);
}
}

void set_double_count(double v, size_t c) {
type = mvt_double;
s = milo::dtoa_milo(v) + '\0' + std::to_string(c);
}
};

struct key_pool {
Expand Down Expand Up @@ -152,7 +166,6 @@ struct serial_feature {
const char *stringpool; // string pool for keys/values lookup
std::shared_ptr<std::string> tile_stringpool; // string pool for mvt_value construction
std::set<std::string> need_tilestats;
std::unordered_map<std::string, accum_state> attribute_accum_state;

int z; // tile being produced
int tx;
Expand Down
Loading

0 comments on commit 583fc37

Please sign in to comment.