|
27 | 27 | #include "yb/rocksdb/port/stack_trace.h" |
28 | 28 | #include "yb/rocksdb/rate_limiter.h" |
29 | 29 | #include "yb/rocksdb/util/file_util.h" |
| 30 | +#include "yb/rocksdb/util/task_metrics.h" |
30 | 31 | #include "yb/rocksdb/util/testutil.h" |
31 | 32 |
|
32 | 33 | #include "yb/rocksutil/yb_rocksdb_logger.h" |
33 | 34 |
|
34 | 35 | #include "yb/util/backoff_waiter.h" |
| 36 | +#include "yb/util/metrics.h" |
35 | 37 | #include "yb/util/priority_thread_pool.h" |
36 | 38 | #include "yb/util/random_util.h" |
37 | 39 | #include "yb/util/sync_point.h" |
|
41 | 43 | DECLARE_bool(flush_rocksdb_on_shutdown); |
42 | 44 | DECLARE_bool(use_priority_thread_pool_for_compactions); |
43 | 45 | DECLARE_bool(use_priority_thread_pool_for_flushes); |
| 46 | +DECLARE_bool(rocksdb_allow_multiple_pending_compactions_for_priority_thread_pool); |
44 | 47 |
|
45 | 48 | using std::atomic; |
46 | 49 | using namespace std::literals; |
@@ -2948,6 +2951,206 @@ TEST_F_EX(DBCompactionTest, AbortManualCompactionOnShutdown, RocksDBTest) { |
2948 | 2951 | } |
2949 | 2952 | } |
2950 | 2953 |
|
| 2954 | +namespace { |
| 2955 | + |
| 2956 | +class DelayFilter : public CompactionFilter { |
| 2957 | + public: |
| 2958 | + explicit DelayFilter(std::atomic<int>* delay_ms_per_entry) |
| 2959 | + : delay_ms_per_entry_(delay_ms_per_entry) {} |
| 2960 | + |
| 2961 | + FilterDecision Filter(int level, const Slice& key, const Slice& value, |
| 2962 | + std::string* new_value, bool* value_changed) override { |
| 2963 | + auto delay_ms = delay_ms_per_entry_->load(); |
| 2964 | + if (delay_ms > 0) { |
| 2965 | + std::this_thread::sleep_for(delay_ms * 1ms); |
| 2966 | + } |
| 2967 | + return FilterDecision::kKeep; |
| 2968 | + } |
| 2969 | + |
| 2970 | + const char* Name() const override { return "KeepFilter"; } |
| 2971 | + |
| 2972 | + private: |
| 2973 | + std::atomic<int>* delay_ms_per_entry_; |
| 2974 | +}; |
| 2975 | + |
| 2976 | +class DelayFilterFactory : public CompactionFilterFactory { |
| 2977 | + public: |
| 2978 | + virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter( |
| 2979 | + const CompactionFilter::Context& context) override { |
| 2980 | + return std::unique_ptr<CompactionFilter>(new DelayFilter(&delay_ms_per_entry_)); |
| 2981 | + } |
| 2982 | + |
| 2983 | + void SetDelayMsPerEntry(int delay_ms_per_entry) { |
| 2984 | + delay_ms_per_entry_ = delay_ms_per_entry; |
| 2985 | + } |
| 2986 | + |
| 2987 | + const char* Name() const override { return "DelayFilterFactory"; } |
| 2988 | + |
| 2989 | + private: |
| 2990 | + std::atomic<int> delay_ms_per_entry_; |
| 2991 | +}; |
| 2992 | + |
| 2993 | +Result<std::pair<size_t, size_t>> CheckPendingCompactions(DBImpl* db) { |
| 2994 | + size_t num_small_pending_compactions; |
| 2995 | + size_t num_large_pending_compactions; |
| 2996 | + size_t num_small_not_started_compactions; |
| 2997 | + size_t num_large_not_started_compactions; |
| 2998 | + { |
| 2999 | + std::lock_guard db_lock(*db->TEST_mutex()); |
| 3000 | + std::lock_guard priority_thread_pool_lock( |
| 3001 | + *db->GetOptions().priority_thread_pool_for_compactions_and_flushes->TEST_mutex()); |
| 3002 | + |
| 3003 | + auto* cfd = pointer_cast<ColumnFamilyHandleImpl*>(db->DefaultColumnFamily())->cfd(); |
| 3004 | + num_small_pending_compactions = cfd->TEST_num_pending_compactions(CompactionSizeKind::kSmall); |
| 3005 | + num_large_pending_compactions = cfd->TEST_num_pending_compactions(CompactionSizeKind::kLarge); |
| 3006 | + |
| 3007 | + num_small_not_started_compactions = |
| 3008 | + db->TEST_NumNotStartedCompactionsUnlocked(CompactionSizeKind::kSmall); |
| 3009 | + num_large_not_started_compactions = |
| 3010 | + db->TEST_NumNotStartedCompactionsUnlocked(CompactionSizeKind::kLarge); |
| 3011 | + } |
| 3012 | + |
| 3013 | + LOG(INFO) << "num_small_pending_compactions: " << num_small_pending_compactions |
| 3014 | + << " num_large_pending_compactions: " << num_large_pending_compactions; |
| 3015 | + LOG(INFO) << "num_small_not_started_compactions: " << num_small_not_started_compactions |
| 3016 | + << " num_large_not_started_compactions: " << num_large_not_started_compactions; |
| 3017 | + |
| 3018 | + SCHECK_LE(num_small_not_started_compactions, num_small_pending_compactions, IllegalState, |
| 3019 | + "Pending compactions should include not started and paused."); |
| 3020 | + SCHECK_LE(num_large_not_started_compactions, num_large_pending_compactions, IllegalState, |
| 3021 | + "Pending compactions should include not started and paused."); |
| 3022 | + |
| 3023 | + // Probably we should abort not yet started compaction if pausing another one in order to limit |
| 3024 | + // number of pending compactions but this is non-trivial and should be addressed by |
| 3025 | + // https://github.com/yugabyte/yugabyte-db/issues/24541. |
| 3026 | + SCHECK_LE( |
| 3027 | + num_small_not_started_compactions, std::size_t{1}, IllegalState, |
| 3028 | + "Expected at most 1 not started small compaction."); |
| 3029 | + SCHECK_LE( |
| 3030 | + num_large_not_started_compactions, std::size_t{1}, IllegalState, |
| 3031 | + "Expected at most 1 not started large compaction."); |
| 3032 | + |
| 3033 | + SCHECK_LE( |
| 3034 | + num_small_pending_compactions, std::size_t{2}, IllegalState, |
| 3035 | + "Expected at most 2 pending small compaction."); |
| 3036 | + SCHECK_LE( |
| 3037 | + num_large_pending_compactions, std::size_t{2}, IllegalState, |
| 3038 | + "Expected at most 2 pending large compaction."); |
| 3039 | + return std::make_pair(num_small_pending_compactions, num_large_pending_compactions); |
| 3040 | +} |
| 3041 | + |
| 3042 | +} // namespace |
| 3043 | + |
| 3044 | +TEST_F(DBCompactionTest, LimitPendingCompactionTasks) { |
| 3045 | + ANNOTATE_UNPROTECTED_WRITE(FLAGS_use_priority_thread_pool_for_compactions) = true; |
| 3046 | + ANNOTATE_UNPROTECTED_WRITE( |
| 3047 | + FLAGS_rocksdb_allow_multiple_pending_compactions_for_priority_thread_pool) = false; |
| 3048 | + |
| 3049 | + constexpr auto kMaxBackgroundCompactions = 1; |
| 3050 | + constexpr auto kNumKeysPerSmallSstFile = 100; |
| 3051 | + constexpr auto kNumKeysPerLargeSstFile = 1000; |
| 3052 | + constexpr auto kNumSstFiles = 30; |
| 3053 | + constexpr auto kNumLargeSstFiles = 10; |
| 3054 | + constexpr auto kCompactionDelayMsPerEntry = 10000 * yb::kTimeMultiplier / kNumKeysPerSmallSstFile; |
| 3055 | + constexpr auto kTimeout = 10s * yb::kTimeMultiplier; |
| 3056 | + constexpr auto kMaxCompactFlushRate = 256_MB; |
| 3057 | + constexpr auto kValueSizeBytes = 1_KB; |
| 3058 | + constexpr auto kNumFilesCompactionTrigger = 5; |
| 3059 | + constexpr auto kMaxNumExpectedFiles = 2 * kNumFilesCompactionTrigger; |
| 3060 | + |
| 3061 | + // Static to avoid destruction before RocksDB is destroyed by owning test object. |
| 3062 | + static yb::PriorityThreadPool thread_pool(kMaxBackgroundCompactions); |
| 3063 | + |
| 3064 | + std::shared_ptr<RateLimiter> rate_limiter(NewGenericRateLimiter(kMaxCompactFlushRate)); |
| 3065 | + auto compaction_filter_factory = std::make_shared<DelayFilterFactory>(); |
| 3066 | + compaction_filter_factory->SetDelayMsPerEntry(kCompactionDelayMsPerEntry); |
| 3067 | + |
| 3068 | + rocksdb::BlockBasedTableOptions table_options; |
| 3069 | + table_options.block_size = 2_KB; |
| 3070 | + table_options.filter_block_size = 2_KB; |
| 3071 | + table_options.index_block_size = 2_KB; |
| 3072 | + |
| 3073 | + Options options; |
| 3074 | + options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); |
| 3075 | + // Setting write_buffer_size big enough, because we use manual flush in this test. |
| 3076 | + options.write_buffer_size = 128_MB; |
| 3077 | + options.arena_block_size = 4_KB; |
| 3078 | + options.num_levels = 1; |
| 3079 | + options.compaction_style = kCompactionStyleUniversal; |
| 3080 | + options.compaction_filter_factory = compaction_filter_factory; |
| 3081 | + options.level0_file_num_compaction_trigger = 5; |
| 3082 | + options.level0_stop_writes_trigger = kNumSstFiles * 2; |
| 3083 | + options.level0_slowdown_writes_trigger = options.level0_stop_writes_trigger; |
| 3084 | + options.max_background_compactions = kMaxBackgroundCompactions; |
| 3085 | + options.priority_thread_pool_for_compactions_and_flushes = &thread_pool; |
| 3086 | + options.info_log_level = InfoLogLevel::DEBUG_LEVEL; |
| 3087 | + options.info_log = std::make_shared<yb::YBRocksDBLogger>(options.log_prefix); |
| 3088 | + options.rate_limiter = rate_limiter; |
| 3089 | + options.create_if_missing = true; |
| 3090 | + options.compaction_size_threshold_bytes = 2 * kValueSizeBytes * kNumKeysPerLargeSstFile; |
| 3091 | + |
| 3092 | + auto& compaction_options_universal = options.compaction_options_universal; |
| 3093 | + compaction_options_universal.stop_style = |
| 3094 | + rocksdb::CompactionStopStyle::kCompactionStopStyleTotalSize; |
| 3095 | + compaction_options_universal.min_merge_width = 4; |
| 3096 | + compaction_options_universal.size_ratio = 20; |
| 3097 | + compaction_options_universal.always_include_size_threshold = |
| 3098 | + 2 * kValueSizeBytes * kNumKeysPerSmallSstFile; |
| 3099 | + |
| 3100 | + METRIC_DEFINE_entity(test_entity); |
| 3101 | + ROCKSDB_PRIORITY_THREAD_POOL_METRICS_DEFINE(test_entity); |
| 3102 | + yb::MetricRegistry registry; |
| 3103 | + auto entity = METRIC_ENTITY_test_entity.Instantiate(®istry, "task metrics"); |
| 3104 | + |
| 3105 | + auto priority_thread_pool_metrics = |
| 3106 | + std::make_shared<RocksDBPriorityThreadPoolMetrics>( |
| 3107 | + ROCKSDB_PRIORITY_THREAD_POOL_METRICS_INSTANCE(entity)); |
| 3108 | + options.priority_thread_pool_metrics = priority_thread_pool_metrics; |
| 3109 | + |
| 3110 | + DestroyAndReopen(options); |
| 3111 | + |
| 3112 | + ColumnFamilyData* cfd = pointer_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())->cfd(); |
| 3113 | + |
| 3114 | + int num_keys = 0; |
| 3115 | + |
| 3116 | + for (auto num = 0; num < kNumSstFiles; num++) { |
| 3117 | + const auto num_keys_per_file = |
| 3118 | + (num < kNumLargeSstFiles) ? kNumKeysPerLargeSstFile : kNumKeysPerSmallSstFile; |
| 3119 | + for (auto i = 0; i < num_keys_per_file; i++) { |
| 3120 | + auto key = Key(++num_keys); |
| 3121 | + ASSERT_OK(Put(key, yb::RandomHumanReadableString(kValueSizeBytes))); |
| 3122 | + } |
| 3123 | + ASSERT_OK(Flush()); |
| 3124 | + ASSERT_OK(ResultToStatus(CheckPendingCompactions(dbfull()))); |
| 3125 | + } |
| 3126 | + LOG(INFO) << "Waiting for flushes to complete..."; |
| 3127 | + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); |
| 3128 | + LOG(INFO) << "Waiting for flushes to complete - DONE"; |
| 3129 | + |
| 3130 | + auto num_pending_compactions = ASSERT_RESULT(CheckPendingCompactions(dbfull())); |
| 3131 | + // We should achieve at least 1 small and 1 large pending compactions (and we've verified that at |
| 3132 | + // most 2 is pending and at most 1 is not yet started in each category). |
| 3133 | + ASSERT_GE(num_pending_compactions.first, 1); |
| 3134 | + ASSERT_GE(num_pending_compactions.second, 1); |
| 3135 | + |
| 3136 | + compaction_filter_factory->SetDelayMsPerEntry(0); |
| 3137 | + |
| 3138 | + auto deadline = yb::CoarseMonoClock::Now() + kTimeout; |
| 3139 | + while (yb::CoarseMonoClock::Now() <= deadline) { |
| 3140 | + auto num_sst_files = db_->GetCurrentVersionNumSSTFiles(); |
| 3141 | + LOG(INFO) << "num_sst_files: " << num_sst_files; |
| 3142 | + if (std::cmp_less(num_sst_files, kMaxNumExpectedFiles + 1)) { |
| 3143 | + break; |
| 3144 | + } |
| 3145 | + ASSERT_OK(ResultToStatus(CheckPendingCompactions(dbfull()))); |
| 3146 | + std::this_thread::sleep_for(1s); |
| 3147 | + } |
| 3148 | + |
| 3149 | + ASSERT_LE(db_->GetCurrentVersionNumSSTFiles(), kMaxNumExpectedFiles); |
| 3150 | + |
| 3151 | + Close(); |
| 3152 | +} |
| 3153 | + |
2951 | 3154 | INSTANTIATE_TEST_CASE_P( |
2952 | 3155 | CompactionPriTest, CompactionPriTest, |
2953 | 3156 | ::testing::Values(CompactionPri::kByCompensatedSize, |
|
0 commit comments