Skip to content

Commit 56f7ef5

Browse files
hx235facebook-github-bot
authored andcommitted
Fix nullptr access and race to fault_fs_guard (#12799)
Summary: **Context/Summary:** There are a couple places where we forgot to check fault_fs_guard before accessing it. So we can see something like this occasionally ``` =138831==Hint: address points to the zero page. SCARINESS: 10 (null-deref) AddressSanitizer:DEADLYSIGNAL #0 0x18b9e0b in rocksdb::ThreadLocalPtr::Get() const fbcode/internal_repo_rocksdb/repo/util/thread_local.cc:503 #1 0x83d8b7 in rocksdb::StressTest::TestCompactRange(rocksdb::ThreadState*, long, rocksdb::Slice const&, rocksdb::ColumnFamilyHandle*) fbcode/internal_repo_rocksdb/repo/utilities/fault_injection_fs.h ``` Also accessing of `io_activties_exempted_from_fault_injection.find` not fully synced so we see the following ``` WARNING: ThreadSanitizer: data race (pid=90939) Write of size 8 at 0x7b4c000004d0 by thread T762 (mutexes: write M0): #0 std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>>::operator=(std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>> const&) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:208 (db_stress+0x411c32) (BuildId: b803e5aca22c6b080defed8e85b7bfec) #1 rocksdb::DbStressListener::OnErrorRecoveryCompleted(rocksdb::Status) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_set.h:298 (db_stress+0x4112e5) (BuildId: b803e5aca22c6b080defed8e85b7bfec) #2 rocksdb::EventHelpers::NotifyOnErrorRecoveryEnd(std::vector<std::shared_ptr<rocksdb::EventListener>, std::allocator<std::shared_ptr<rocksdb::EventListener>>> const&, rocksdb::Status const&, rocksdb::Status const&, rocksdb::InstrumentedMutex*) fbcode/internal_repo_rocksdb/repo/db/event_helpers.cc:239 (db_stress+0xa09d60) (BuildId: b803e5aca22c6b080defed8e85b7bfec) Previous read of size 8 at 0x7b4c000004d0 by thread T131 (mutexes: write M1): #0 rocksdb::FaultInjectionTestFS::MaybeInjectThreadLocalError(rocksdb::FaultInjectionIOType, rocksdb::IOOptions const&, rocksdb::FaultInjectionTestFS::ErrorOperation, rocksdb::Slice*, bool, char*, bool, bool*) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:798 (db_stress+0xf7d0f3) (BuildId: b803e5aca22c6b080defed8e85b7bfec) ``` Pull Request resolved: #12799 Test Plan: CI Reviewed By: jowlyzhang Differential Revision: D58917449 Pulled By: hx235 fbshipit-source-id: f24fc1acc2a7d91f9f285447a97ba41397f48dbd
1 parent e3f5125 commit 56f7ef5

File tree

6 files changed

+96
-53
lines changed

6 files changed

+96
-53
lines changed

db_stress_tool/cf_consistency_stress.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,10 @@ class CfConsistencyStressTest : public StressTest {
359359
if (fault_fs_guard) {
360360
fault_fs_guard->DisableThreadLocalErrorInjection(
361361
FaultInjectionIOType::kRead);
362-
363362
fault_fs_guard->DisableThreadLocalErrorInjection(
364363
FaultInjectionIOType::kMetadataRead);
365364
}
365+
366366
if (s.ok() || s.IsNotFound()) {
367367
const bool cmp_found = s.ok();
368368

db_stress_tool/db_stress_common.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
ROCKSDB_NAMESPACE::Env* db_stress_listener_env = nullptr;
2121
ROCKSDB_NAMESPACE::Env* db_stress_env = nullptr;
22-
// If non-null, injects read error at a rate specified by the
23-
// read_fault_one_in or write_fault_one_in flag
2422
std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
2523
std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> compressed_secondary_cache;
2624
std::shared_ptr<ROCKSDB_NAMESPACE::Cache> block_cache;

db_stress_tool/db_stress_test_base.cc

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,10 @@ void StressTest::OperateDb(ThreadState* thread) {
10401040
FaultInjectionIOType::kRead);
10411041
fault_fs_guard->DisableThreadLocalErrorInjection(
10421042
FaultInjectionIOType::kMetadataRead);
1043+
fault_fs_guard->DisableThreadLocalErrorInjection(
1044+
FaultInjectionIOType::kWrite);
1045+
fault_fs_guard->DisableThreadLocalErrorInjection(
1046+
FaultInjectionIOType::kMetadataWrite);
10431047
}
10441048

10451049
// Verify no writes during LockWAL
@@ -1098,6 +1102,10 @@ void StressTest::OperateDb(ThreadState* thread) {
10981102
FaultInjectionIOType::kRead);
10991103
fault_fs_guard->EnableThreadLocalErrorInjection(
11001104
FaultInjectionIOType::kMetadataRead);
1105+
fault_fs_guard->EnableThreadLocalErrorInjection(
1106+
FaultInjectionIOType::kWrite);
1107+
fault_fs_guard->EnableThreadLocalErrorInjection(
1108+
FaultInjectionIOType::kMetadataWrite);
11011109
}
11021110
}
11031111
}
@@ -2476,13 +2484,19 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
24762484
std::vector<std::string> files;
24772485

24782486
// Temporarily disable error injection to print debugging information
2479-
fault_fs_guard->DisableThreadLocalErrorInjection(
2480-
FaultInjectionIOType::kMetadataRead);
2487+
if (fault_fs_guard) {
2488+
fault_fs_guard->DisableThreadLocalErrorInjection(
2489+
FaultInjectionIOType::kMetadataRead);
2490+
}
2491+
24812492
Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files);
2493+
24822494
// Enable back disable error injection disabled for printing debugging
24832495
// information
2484-
fault_fs_guard->EnableThreadLocalErrorInjection(
2485-
FaultInjectionIOType::kMetadataRead);
2496+
if (fault_fs_guard) {
2497+
fault_fs_guard->EnableThreadLocalErrorInjection(
2498+
FaultInjectionIOType::kMetadataRead);
2499+
}
24862500
assert(my_s.ok());
24872501

24882502
for (const auto& f : files) {
@@ -2964,19 +2978,33 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
29642978
uint32_t pre_hash = 0;
29652979
if (thread->rand.OneIn(2)) {
29662980
// Temporarily disable error injection to for validation
2981+
if (fault_fs_guard) {
2982+
fault_fs_guard->DisableThreadLocalErrorInjection(
2983+
FaultInjectionIOType::kMetadataRead);
2984+
fault_fs_guard->DisableThreadLocalErrorInjection(
2985+
FaultInjectionIOType::kRead);
2986+
fault_fs_guard->DisableThreadLocalErrorInjection(
2987+
FaultInjectionIOType::kMetadataWrite);
2988+
fault_fs_guard->DisableThreadLocalErrorInjection(
2989+
FaultInjectionIOType::kWrite);
2990+
}
2991+
29672992
// Declare a snapshot and compare the data before and after the compaction
2968-
fault_fs_guard->DisableThreadLocalErrorInjection(
2969-
FaultInjectionIOType::kMetadataRead);
2970-
fault_fs_guard->DisableThreadLocalErrorInjection(
2971-
FaultInjectionIOType::kRead);
29722993
pre_snapshot = db_->GetSnapshot();
29732994
pre_hash =
29742995
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
2996+
29752997
// Enable back error injection disabled for validation
2976-
fault_fs_guard->EnableThreadLocalErrorInjection(
2977-
FaultInjectionIOType::kMetadataRead);
2978-
fault_fs_guard->EnableThreadLocalErrorInjection(
2979-
FaultInjectionIOType::kRead);
2998+
if (fault_fs_guard) {
2999+
fault_fs_guard->EnableThreadLocalErrorInjection(
3000+
FaultInjectionIOType::kMetadataRead);
3001+
fault_fs_guard->EnableThreadLocalErrorInjection(
3002+
FaultInjectionIOType::kRead);
3003+
fault_fs_guard->EnableThreadLocalErrorInjection(
3004+
FaultInjectionIOType::kMetadataWrite);
3005+
fault_fs_guard->EnableThreadLocalErrorInjection(
3006+
FaultInjectionIOType::kWrite);
3007+
}
29803008
}
29813009
std::ostringstream compact_range_opt_oss;
29823010
compact_range_opt_oss << "exclusive_manual_compaction: "
@@ -3012,11 +3040,16 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
30123040

30133041
if (pre_snapshot != nullptr) {
30143042
// Temporarily disable error injection for validation
3015-
// Declaring a snapshot and compare the data before and after the compaction
3016-
fault_fs_guard->DisableThreadLocalErrorInjection(
3017-
FaultInjectionIOType::kMetadataRead);
3018-
fault_fs_guard->DisableThreadLocalErrorInjection(
3019-
FaultInjectionIOType::kRead);
3043+
if (fault_fs_guard) {
3044+
fault_fs_guard->DisableThreadLocalErrorInjection(
3045+
FaultInjectionIOType::kMetadataRead);
3046+
fault_fs_guard->DisableThreadLocalErrorInjection(
3047+
FaultInjectionIOType::kRead);
3048+
fault_fs_guard->DisableThreadLocalErrorInjection(
3049+
FaultInjectionIOType::kMetadataWrite);
3050+
fault_fs_guard->DisableThreadLocalErrorInjection(
3051+
FaultInjectionIOType::kWrite);
3052+
}
30203053
uint32_t post_hash =
30213054
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
30223055
if (pre_hash != post_hash) {
@@ -3032,11 +3065,17 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
30323065
thread->shared->SetVerificationFailure();
30333066
}
30343067
db_->ReleaseSnapshot(pre_snapshot);
3035-
// Enable back error injection disabled for validation
3036-
fault_fs_guard->EnableThreadLocalErrorInjection(
3037-
FaultInjectionIOType::kMetadataRead);
3038-
fault_fs_guard->EnableThreadLocalErrorInjection(
3039-
FaultInjectionIOType::kRead);
3068+
if (fault_fs_guard) {
3069+
// Enable back error injection disabled for validation
3070+
fault_fs_guard->EnableThreadLocalErrorInjection(
3071+
FaultInjectionIOType::kMetadataRead);
3072+
fault_fs_guard->EnableThreadLocalErrorInjection(
3073+
FaultInjectionIOType::kRead);
3074+
fault_fs_guard->EnableThreadLocalErrorInjection(
3075+
FaultInjectionIOType::kMetadataWrite);
3076+
fault_fs_guard->EnableThreadLocalErrorInjection(
3077+
FaultInjectionIOType::kWrite);
3078+
}
30403079
}
30413080
}
30423081

db_stress_tool/no_batched_ops_stress.cc

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,29 +1607,26 @@ class NonBatchedOpsStressTest : public StressTest {
16071607

16081608
if (FLAGS_verify_before_write) {
16091609
// Temporarily disable error injection for preparation
1610-
fault_fs_guard->DisableThreadLocalErrorInjection(
1611-
FaultInjectionIOType::kRead);
1612-
fault_fs_guard->DisableThreadLocalErrorInjection(
1613-
FaultInjectionIOType::kWrite);
1614-
fault_fs_guard->DisableThreadLocalErrorInjection(
1615-
FaultInjectionIOType::kMetadataRead);
1616-
fault_fs_guard->DisableThreadLocalErrorInjection(
1617-
FaultInjectionIOType::kMetadataWrite);
1610+
if (fault_fs_guard) {
1611+
fault_fs_guard->DisableThreadLocalErrorInjection(
1612+
FaultInjectionIOType::kRead);
1613+
fault_fs_guard->DisableThreadLocalErrorInjection(
1614+
FaultInjectionIOType::kMetadataRead);
1615+
}
1616+
16181617
std::string from_db;
16191618
Status s = db_->Get(read_opts, cfh, k, &from_db);
16201619
bool res = VerifyOrSyncValue(
16211620
rand_column_family, rand_key, read_opts, shared,
16221621
/* msg_prefix */ "Pre-Put Get verification", from_db, s);
16231622

16241623
// Enable back error injection disabled for preparation
1625-
fault_fs_guard->EnableThreadLocalErrorInjection(
1626-
FaultInjectionIOType::kRead);
1627-
fault_fs_guard->EnableThreadLocalErrorInjection(
1628-
FaultInjectionIOType::kWrite);
1629-
fault_fs_guard->EnableThreadLocalErrorInjection(
1630-
FaultInjectionIOType::kMetadataRead);
1631-
fault_fs_guard->EnableThreadLocalErrorInjection(
1632-
FaultInjectionIOType::kMetadataWrite);
1624+
if (fault_fs_guard) {
1625+
fault_fs_guard->EnableThreadLocalErrorInjection(
1626+
FaultInjectionIOType::kRead);
1627+
fault_fs_guard->EnableThreadLocalErrorInjection(
1628+
FaultInjectionIOType::kMetadataRead);
1629+
}
16331630
if (!res) {
16341631
return s;
16351632
}
@@ -1897,19 +1894,24 @@ class NonBatchedOpsStressTest : public StressTest {
18971894
std::ostringstream ingest_options_oss;
18981895

18991896
// Temporarily disable error injection for preparation
1900-
fault_fs_guard->DisableThreadLocalErrorInjection(
1901-
FaultInjectionIOType::kMetadataRead);
1902-
fault_fs_guard->DisableThreadLocalErrorInjection(
1903-
FaultInjectionIOType::kMetadataWrite);
1897+
if (fault_fs_guard) {
1898+
fault_fs_guard->DisableThreadLocalErrorInjection(
1899+
FaultInjectionIOType::kMetadataRead);
1900+
fault_fs_guard->DisableThreadLocalErrorInjection(
1901+
FaultInjectionIOType::kMetadataWrite);
1902+
}
1903+
19041904
if (db_stress_env->FileExists(sst_filename).ok()) {
19051905
// Maybe we terminated abnormally before, so cleanup to give this file
19061906
// ingestion a clean slate
19071907
s = db_stress_env->DeleteFile(sst_filename);
19081908
}
1909-
fault_fs_guard->EnableThreadLocalErrorInjection(
1910-
FaultInjectionIOType::kMetadataRead);
1911-
fault_fs_guard->EnableThreadLocalErrorInjection(
1912-
FaultInjectionIOType::kMetadataWrite);
1909+
if (fault_fs_guard) {
1910+
fault_fs_guard->EnableThreadLocalErrorInjection(
1911+
FaultInjectionIOType::kMetadataRead);
1912+
fault_fs_guard->EnableThreadLocalErrorInjection(
1913+
FaultInjectionIOType::kMetadataWrite);
1914+
}
19131915

19141916
SstFileWriter sst_file_writer(EnvOptions(options_), options_);
19151917
if (s.ok()) {

utilities/fault_injection_fs.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,8 +1228,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
12281228
ErrorContext* ctx =
12291229
static_cast<ErrorContext*>(injected_thread_local_read_error_.Get());
12301230
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in ||
1231-
io_activties_exempted_from_fault_injection.find(io_options.io_activity) !=
1232-
io_activties_exempted_from_fault_injection.end()) {
1231+
ShouldIOActivtiesExemptFromFaultInjection(io_options.io_activity)) {
12331232
return IOStatus::OK();
12341233
}
12351234

@@ -1306,8 +1305,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalError(
13061305

13071306
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
13081307
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in ||
1309-
io_activties_exempted_from_fault_injection.find(io_options.io_activity) !=
1310-
io_activties_exempted_from_fault_injection.end()) {
1308+
ShouldIOActivtiesExemptFromFaultInjection(io_options.io_activity)) {
13111309
return IOStatus::OK();
13121310
}
13131311

utilities/fault_injection_fs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,12 @@ class FaultInjectionTestFS : public FileSystemWrapper {
425425
io_activties_exempted_from_fault_injection = io_activties;
426426
}
427427

428+
bool ShouldIOActivtiesExemptFromFaultInjection(Env::IOActivity io_activty) {
429+
MutexLock l(&mutex_);
430+
return io_activties_exempted_from_fault_injection.find(io_activty) !=
431+
io_activties_exempted_from_fault_injection.end();
432+
}
433+
428434
void AssertNoOpenFile() { assert(open_managed_files_.empty()); }
429435

430436
IOStatus GetError() { return fs_error_; }

0 commit comments

Comments
 (0)