Skip to content

Commit

Permalink
feat(storage): replace magic number with config for gc (#20142)
Browse files Browse the repository at this point in the history
Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
  • Loading branch information
Li0k and dependabot[bot] authored Jan 14, 2025
1 parent 9337ddb commit 8c0cfb4
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 6 deletions.
14 changes: 14 additions & 0 deletions src/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,12 @@ pub struct MetaDeveloperConfig {
/// Max number of epoch-to-version inserted into meta store per INSERT, during time travel metadata writing.
#[serde(default = "default::developer::hummock_time_travel_epoch_version_insert_batch_size")]
pub hummock_time_travel_epoch_version_insert_batch_size: usize,

#[serde(default = "default::developer::hummock_gc_history_insert_batch_size")]
pub hummock_gc_history_insert_batch_size: usize,

#[serde(default = "default::developer::hummock_time_travel_filter_out_objects_batch_size")]
pub hummock_time_travel_filter_out_objects_batch_size: usize,
}

/// The section `[server]` in `risingwave.toml`.
Expand Down Expand Up @@ -2075,6 +2081,14 @@ pub mod default {
1000
}

pub fn hummock_gc_history_insert_batch_size() -> usize {
1000
}

pub fn hummock_time_travel_filter_out_objects_batch_size() -> usize {
1000
}

pub fn memory_controller_threshold_aggressive() -> f64 {
0.9
}
Expand Down
2 changes: 2 additions & 0 deletions src/config/example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ meta_hummock_time_travel_sst_info_insert_batch_size = 100
meta_hummock_delta_log_delete_batch_size = 512
meta_time_travel_vacuum_interval_sec = 30
meta_hummock_time_travel_epoch_version_insert_batch_size = 1000
meta_hummock_gc_history_insert_batch_size = 1000
meta_hummock_time_travel_filter_out_objects_batch_size = 1000

[meta.meta_store_config]
max_connections = 10
Expand Down
8 changes: 8 additions & 0 deletions src/meta/node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,14 @@ pub fn start(
.meta
.developer
.hummock_time_travel_epoch_version_insert_batch_size,
hummock_gc_history_insert_batch_size: config
.meta
.developer
.hummock_gc_history_insert_batch_size,
hummock_time_travel_filter_out_objects_batch_size: config
.meta
.developer
.hummock_time_travel_filter_out_objects_batch_size,
min_delta_log_num_for_hummock_version_checkpoint: config
.meta
.min_delta_log_num_for_hummock_version_checkpoint,
Expand Down
19 changes: 15 additions & 4 deletions src/meta/src/hummock/manager/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,12 @@ impl HummockManager {
let after_metadata_backup = object_ids.len();
// filter by time travel archive
let object_ids = self
.filter_out_objects_by_time_travel(object_ids.into_iter())
.filter_out_objects_by_time_travel(
object_ids.into_iter(),
self.env
.opts
.hummock_time_travel_filter_out_objects_batch_size,
)
.await?;
let after_time_travel = object_ids.len();
// filter by SST id watermark, i.e. minimum id of uncommitted SSTs reported by compute nodes.
Expand Down Expand Up @@ -436,11 +441,10 @@ impl HummockManager {
.filter(hummock_gc_history::Column::MarkDeleteAt.lt(gc_history_low_watermark))
.exec(db)
.await?;
const BATCH_SIZE: usize = 1000;
let mut is_finished = false;
while !is_finished {
let mut batch = vec![];
let mut count: usize = BATCH_SIZE;
let mut count: usize = self.env.opts.hummock_gc_history_insert_batch_size;
while count > 0 {
let Some(m) = models.next() else {
is_finished = true;
Expand Down Expand Up @@ -544,7 +548,14 @@ impl HummockManager {
let object_ids = object_ids
.into_iter()
.filter(|s| !version_pinned.contains(s) && !backup_pinned.contains(s));
let object_ids = self.filter_out_objects_by_time_travel(object_ids).await?;
let object_ids = self
.filter_out_objects_by_time_travel(
object_ids,
self.env
.opts
.hummock_time_travel_filter_out_objects_batch_size,
)
.await?;
// Retry is not necessary. Full GC will handle these objects eventually.
self.delete_objects(object_ids.into_iter().collect())
.await?;
Expand Down
4 changes: 2 additions & 2 deletions src/meta/src/hummock/manager/time_travel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,14 @@ impl HummockManager {
pub(crate) async fn filter_out_objects_by_time_travel(
&self,
objects: impl Iterator<Item = HummockSstableObjectId>,
batch_size: usize,
) -> Result<HashSet<HummockSstableObjectId>> {
// The input object count is much smaller than time travel pinned object count in meta store.
// So search input object in meta store.
let mut result: HashSet<_> = objects.collect();
let mut remain: VecDeque<_> = result.iter().copied().collect();
const FILTER_BATCH_SIZE: usize = 1000;
while !remain.is_empty() {
let batch = remain.drain(..std::cmp::min(remain.len(), FILTER_BATCH_SIZE));
let batch = remain.drain(..std::cmp::min(remain.len(), batch_size));
let reject_object_ids: Vec<risingwave_meta_model::HummockSstableObjectId> =
hummock_sstable_info::Entity::find()
.filter(hummock_sstable_info::Column::ObjectId.is_in(batch))
Expand Down
4 changes: 4 additions & 0 deletions src/meta/src/manager/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ pub struct MetaOpts {
pub hummock_time_travel_sst_info_insert_batch_size: usize,
pub hummock_delta_log_delete_batch_size: usize,
pub hummock_time_travel_epoch_version_insert_batch_size: usize,
pub hummock_gc_history_insert_batch_size: usize,
pub hummock_time_travel_filter_out_objects_batch_size: usize,
/// The minimum delta log number a new checkpoint should compact, otherwise the checkpoint
/// attempt is rejected. Greater value reduces object store IO, meanwhile it results in
/// more loss of in memory `HummockVersionCheckpoint::stale_objects` state when meta node is
Expand Down Expand Up @@ -280,6 +282,8 @@ impl MetaOpts {
hummock_time_travel_sst_info_insert_batch_size: 10,
hummock_delta_log_delete_batch_size: 1000,
hummock_time_travel_epoch_version_insert_batch_size: 1000,
hummock_gc_history_insert_batch_size: 1000,
hummock_time_travel_filter_out_objects_batch_size: 1000,
min_delta_log_num_for_hummock_version_checkpoint: 1,
min_sst_retention_time_sec: 3600 * 24 * 7,
full_gc_interval_sec: 3600 * 24 * 7,
Expand Down

0 comments on commit 8c0cfb4

Please sign in to comment.