Skip to content

Commit e5baaf4

Browse files
authored
Merge pull request #24483 from WillemKauf/segment_appender_race_fix
[CORE-7058]: `storage`: fix race condition in `segment::release_appender_in_background()`
2 parents 1d386b6 + 24ec834 commit e5baaf4

File tree

5 files changed

+94
-22
lines changed

5 files changed

+94
-22
lines changed

src/v/storage/disk_log_impl.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,7 @@ void disk_log_impl::bg_checkpoint_offset_translator() {
17491749
ss::future<> disk_log_impl::force_roll(ss::io_priority_class iopc) {
17501750
auto roll_lock_holder = co_await _segments_rolling_lock.get_units();
17511751
auto t = term();
1752-
auto next_offset = offsets().dirty_offset + model::offset(1);
1752+
auto next_offset = model::next_offset(offsets().dirty_offset);
17531753
if (_segs.empty()) {
17541754
co_return co_await new_segment(next_offset, t, iopc);
17551755
}

src/v/storage/disk_log_impl.h

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include <absl/container/flat_hash_map.h>
3333

34+
struct storage_e2e_fixture;
3435
namespace storage {
3536

3637
/// \brief offset boundary type
@@ -248,6 +249,7 @@ class disk_log_impl final : public log {
248249
private:
249250
friend class disk_log_appender; // for multi-term appends
250251
friend class disk_log_builder; // for tests
252+
friend ::storage_e2e_fixture;
251253
friend std::ostream& operator<<(std::ostream& o, const disk_log_impl& d);
252254

253255
/// Compute file offset of the batch inside the segment

src/v/storage/segment.cc

+2
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,8 @@ ss::future<> segment::release_appender(readers_cache* readers_cache) {
297297
}
298298

299299
void segment::release_appender_in_background(readers_cache* readers_cache) {
300+
_gate.check();
301+
300302
auto a = std::exchange(_appender, nullptr);
301303
auto c = config::shard_local_cfg().release_cache_on_segment_roll()
302304
? std::exchange(_cache, std::nullopt)
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Copyright 2024 Redpanda Data, Inc.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the file licenses/BSL.md
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0
9+
10+
#include "redpanda/tests/fixture.h"
11+
#include "storage/disk_log_impl.h"
12+
#include "storage/segment.h"
13+
#include "test_utils/scoped_config.h"
14+
15+
#include <seastar/core/future.hh>
16+
#include <seastar/core/shared_ptr.hh>
17+
18+
struct storage_e2e_fixture : public redpanda_thread_fixture {
19+
scoped_config test_local_cfg;
20+
21+
// Produces to the given fixture's partition for 10 seconds.
22+
ss::future<> produce_to_fixture(model::topic topic_name, int* incomplete) {
23+
tests::kafka_produce_transport producer(co_await make_kafka_client());
24+
co_await producer.start();
25+
const int cardinality = 10;
26+
auto now = ss::lowres_clock::now();
27+
while (ss::lowres_clock::now() < now + 5s) {
28+
for (int i = 0; i < cardinality; i++) {
29+
co_await producer.produce_to_partition(
30+
topic_name,
31+
model::partition_id(0),
32+
tests::kv_t::sequence(i, 1));
33+
}
34+
}
35+
*incomplete -= 1;
36+
}
37+
38+
ss::future<> remove_segment_permanently(
39+
storage::disk_log_impl* log, ss::lw_shared_ptr<storage::segment> seg) {
40+
return log->remove_segment_permanently(seg, "storage_e2e_fixture")
41+
.then([&, log, seg]() {
42+
auto& segs = log->segments();
43+
auto it = std::find(segs.begin(), segs.end(), seg);
44+
if (it == segs.end()) {
45+
return;
46+
}
47+
segs.erase(it, std::next(it));
48+
});
49+
}
50+
};

src/v/storage/tests/storage_e2e_fixture_test.cc

+39-21
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@
1010
#include "kafka/server/tests/produce_consume_utils.h"
1111
#include "model/fundamental.h"
1212
#include "random/generators.h"
13-
#include "redpanda/tests/fixture.h"
13+
#include "storage/disk_log_impl.h"
14+
#include "storage/segment.h"
15+
#include "storage/tests/storage_e2e_fixture.h"
1416
#include "test_utils/fixture.h"
15-
#include "test_utils/scoped_config.h"
1617

18+
#include <seastar/core/future.hh>
19+
#include <seastar/core/io_priority_class.hh>
1720
#include <seastar/core/lowres_clock.hh>
1821

1922
#include <boost/test/tools/old/interface.hpp>
@@ -23,27 +26,14 @@
2326

2427
using namespace std::chrono_literals;
2528

26-
struct storage_e2e_fixture : public redpanda_thread_fixture {
27-
scoped_config test_local_cfg;
28-
};
29-
3029
namespace {
31-
32-
// Produces to the given fixture's partition for 10 seconds.
33-
ss::future<> produce_to_fixture(
34-
storage_e2e_fixture* fix, model::topic topic_name, int* incomplete) {
35-
tests::kafka_produce_transport producer(co_await fix->make_kafka_client());
36-
co_await producer.start();
37-
const int cardinality = 10;
38-
auto now = ss::lowres_clock::now();
39-
while (ss::lowres_clock::now() < now + 5s) {
40-
for (int i = 0; i < cardinality; i++) {
41-
co_await producer.produce_to_partition(
42-
topic_name, model::partition_id(0), tests::kv_t::sequence(i, 1));
43-
}
30+
ss::future<> force_roll_log(storage::disk_log_impl* log) {
31+
try {
32+
co_await log->force_roll(ss::default_priority_class());
33+
} catch (...) {
4434
}
45-
*incomplete -= 1;
4635
}
36+
4737
} // namespace
4838

4939
FIXTURE_TEST(test_compaction_segment_ms, storage_e2e_fixture) {
@@ -69,7 +59,7 @@ FIXTURE_TEST(test_compaction_segment_ms, storage_e2e_fixture) {
6959
produces.reserve(5);
7060
int incomplete = 5;
7161
for (int i = 0; i < 5; i++) {
72-
auto fut = produce_to_fixture(this, topic_name, &incomplete);
62+
auto fut = produce_to_fixture(topic_name, &incomplete);
7363
produces.emplace_back(std::move(fut));
7464
}
7565
auto partition = app.partition_manager.local().get(ntp);
@@ -177,3 +167,31 @@ FIXTURE_TEST(test_concurrent_log_eviction_and_append, storage_e2e_fixture) {
177167
// final round of eviction.
178168
BOOST_REQUIRE_LE(log->segment_count(), 1);
179169
}
170+
171+
FIXTURE_TEST(test_concurrent_segment_roll_and_close, storage_e2e_fixture) {
172+
const auto topic_name = model::topic("tapioca");
173+
const auto ntp = model::ntp(model::kafka_namespace, topic_name, 0);
174+
175+
cluster::topic_properties props;
176+
add_topic({model::kafka_namespace, topic_name}, 1, props).get();
177+
wait_for_leader(ntp).get();
178+
179+
auto partition = app.partition_manager.local().get(ntp);
180+
auto* log = dynamic_cast<storage::disk_log_impl*>(partition->log().get());
181+
auto seg = log->segments().back();
182+
183+
// Hold a read lock, which will force release_appender() to go through
184+
// release_appender_in_background()
185+
auto read_lock_holder = seg->read_lock().get();
186+
187+
auto roll_fut = force_roll_log(log);
188+
auto release_holder_fut = ss::sleep(100ms).then(
189+
[read_locker_holder = std::move(read_lock_holder)] {});
190+
auto remove_segment_fut = remove_segment_permanently(log, seg);
191+
192+
ss::when_all(
193+
std::move(roll_fut),
194+
std::move(remove_segment_fut),
195+
std::move(release_holder_fut))
196+
.get();
197+
}

0 commit comments

Comments
 (0)