Skip to content

Commit

Permalink
Reduce memory consumption during tiling and tile encoding (#319)
Browse files Browse the repository at this point in the history
* Improve the memory spike during tile construction

* Remove `need_tilestats`. Add a bunch of debug logging

* Remove debug logging

* Update version and changelog
  • Loading branch information
e-n-f authored Jan 31, 2025
1 parent 583fc37 commit 423fea1
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 41 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 2.75.1

* Further reduce memory consumption in attribute sorting and tilestats tracking

# 2.75.0

* Reduce memory consumption in attribute accumulation and feature sorting
Expand Down
26 changes: 17 additions & 9 deletions mvt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,10 @@ struct sorted_value {

return false;
}

bool operator()(const std::shared_ptr<sorted_value> &a, const std::shared_ptr<sorted_value> &b) {
return *a < *b;
}
};

std::string mvt_tile::encode() {
Expand All @@ -334,7 +338,7 @@ std::string mvt_tile::encode() {
layer_writer.add_string(3, layers[i].keys[j]); /* key */
}

std::vector<sorted_value> sorted_values;
std::vector<std::shared_ptr<sorted_value>> sorted_values;

for (size_t v = 0; v < layers[i].values.size(); v++) {
std::string value_string;
Expand Down Expand Up @@ -371,30 +375,34 @@ std::string mvt_tile::encode() {
exit(EXIT_IMPOSSIBLE);
}

sorted_value sv;
sv.val = std::move(value_string);
sv.orig = v;
std::shared_ptr<sorted_value> sv = std::make_shared<sorted_value>();
sv->val = std::move(value_string);
sv->orig = v;
sorted_values.push_back(std::move(sv));
}

std::stable_sort(sorted_values.begin(), sorted_values.end());
std::stable_sort(sorted_values.begin(), sorted_values.end(), sorted_value());
std::vector<size_t> mapping;
mapping.resize(sorted_values.size());

size_t value_index = 0;
for (size_t v = 0; v < sorted_values.size(); v++) {
mapping[sorted_values[v].orig] = value_index;
layer_writer.add_message(4, sorted_values[v].val);
mapping[sorted_values[v]->orig] = value_index;
layer_writer.add_message(4, sorted_values[v]->val);

// crunch out duplicates that were missed by the hashing
while (v + 1 < sorted_values.size() && sorted_values[v].val == sorted_values[v + 1].val) {
mapping[sorted_values[v + 1].orig] = value_index;
while (v + 1 < sorted_values.size() && sorted_values[v]->val == sorted_values[v + 1]->val) {
sorted_values[v]->val.clear();
mapping[sorted_values[v + 1]->orig] = value_index;
v++;
}

sorted_values[v]->val.clear();
value_index++;
}

sorted_values.clear();

for (size_t f = 0; f < layers[i].features.size(); f++) {
std::string feature_string;
protozero::pbf_writer feature_writer(feature_string);
Expand Down
1 change: 0 additions & 1 deletion serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ struct serial_feature {
long long clustered; // does this feature need the clustered/point_count attributes?
const char *stringpool; // string pool for keys/values lookup
std::shared_ptr<std::string> tile_stringpool; // string pool for mvt_value construction
std::set<std::string> need_tilestats;

int z; // tile being produced
int tx;
Expand Down
40 changes: 10 additions & 30 deletions tile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1405,10 +1405,6 @@ void add_tilestats(std::string const &layername, int z, std::vector<std::map<std
}

void promote_attribute(std::string const &key, serial_feature &p, key_pool &key_pool) {
if (p.need_tilestats.count(key) == 0) {
p.need_tilestats.insert(key);
}

// If the feature being merged into has this key as a metadata reference,
// promote it to a full_key so it can be modified

Expand All @@ -1430,10 +1426,6 @@ void promote_attribute(std::string const &key, serial_feature &p, key_pool &key_
}

void promote_attribute_prefix(std::string const &key, std::string const &prefixed_key, serial_feature &p, key_pool &key_pool) {
if (p.need_tilestats.count(prefixed_key) == 0) {
p.need_tilestats.insert(prefixed_key);
}

// does the prefixed attribute already exist as a full key?
ssize_t found_as = -1;
for (size_t i = 0; i < p.full_keys.size(); i++) {
Expand All @@ -1450,6 +1442,7 @@ void promote_attribute_prefix(std::string const &key, std::string const &prefixe
if (found_as >= 0) {
p.full_keys.push_back(key_pool.pool(prefixed_key));
p.full_values.push_back(p.full_values[found_as]);

return;
}

Expand Down Expand Up @@ -2300,8 +2293,6 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
std::vector<std::shared_ptr<serial_feature>> &features = kv.second.features;

if (retain_points_multiplier > 1) {
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "tippecanoe:retain_points_multiplier_first", serial_val(mvt_bool, "true"));

// mapping from input sequence to current sequence within this tile
std::vector<std::pair<size_t, size_t>> feature_sequences;

Expand All @@ -2322,8 +2313,6 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch

features[j]->full_keys.push_back(key_pool.pool("tippecanoe:retain_points_multiplier_sequence"));
features[j]->full_values.push_back(sv);

add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, *features[j]->full_keys.back(), sv);
}
}

Expand All @@ -2340,22 +2329,16 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
sv.s = "true";
p.full_values.push_back(sv);

add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "clustered", sv);

p.full_keys.push_back(key_pool.pool("point_count"));
sv2.type = mvt_double;
sv2.s = std::to_string(point_count);
p.full_values.push_back(sv2);

add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "point_count", sv2);

p.full_keys.push_back(key_pool.pool("sqrt_point_count"));
sv3.type = mvt_double;
sv3.s = std::to_string(round(100 * sqrt(point_count)) / 100.0);
p.full_values.push_back(sv3);

add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "sqrt_point_count", sv3);

p.full_keys.push_back(key_pool.pool("point_count_abbreviated"));
sv4.type = mvt_string;
if (point_count >= 10000) {
Expand All @@ -2367,21 +2350,15 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
}
sv4.s = abbrev;
p.full_values.push_back(sv4);

add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, "point_count_abbreviated", sv4);
}

if (p.need_tilestats.size() > 0) {
for (size_t j = 0; j < p.full_keys.size(); j++) {
if (p.need_tilestats.count(*p.full_keys[j]) > 0) {
// remove accumulation state
size_t found = p.full_values[j].s.find('\0');
if (found != std::string::npos) {
p.full_values[j].s = p.full_values[j].s.substr(0, found);
}
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, *p.full_keys[j], p.full_values[j]);
}
for (size_t j = 0; j < p.full_keys.size(); j++) {
// remove accumulation state
size_t found = p.full_values[j].s.find('\0');
if (found != std::string::npos) {
p.full_values[j].s = p.full_values[j].s.substr(0, found);
}
add_tilestats(layername, z, layermaps, tiling_seg, layer_unmaps, *p.full_keys[j], p.full_values[j]);
}
}

Expand Down Expand Up @@ -2574,6 +2551,9 @@ long long write_tile(decompressor *geoms, std::atomic<long long> *geompos_in, ch
layer.tag(feature, *layer_features[x]->full_keys[a], v);
}

layer_features[x]->full_keys.clear();
layer_features[x]->full_values.clear();

if (additional[A_CALCULATE_FEATURE_DENSITY]) {
int glow = 255;
if (layer_features[x]->spacing > 0) {
Expand Down
2 changes: 1 addition & 1 deletion version.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#ifndef VERSION_HPP
#define VERSION_HPP

#define VERSION "v2.75.0"
#define VERSION "v2.75.1"

#endif

0 comments on commit 423fea1

Please sign in to comment.