-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ct: dl_stm mvcc snapshot #23960
base: dev
Are you sure you want to change the base?
ct: dl_stm mvcc snapshot #23960
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright 2024 Redpanda Data, Inc. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.md | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0 | ||
|
||
#pragma once | ||
|
||
#include "cloud_topics/dl_overlay.h" | ||
#include "cloud_topics/dl_version.h" | ||
#include "container/fragmented_vector.h" | ||
#include "serde/envelope.h" | ||
|
||
namespace experimental::cloud_topics { | ||
|
||
struct dl_snapshot_id | ||
: serde:: | ||
envelope<dl_snapshot_id, serde::version<0>, serde::compat_version<0>> { | ||
dl_snapshot_id() noexcept = default; | ||
|
||
explicit dl_snapshot_id(dl_version version) noexcept | ||
: version(version) {} | ||
|
||
auto serde_fields() { return std::tie(version); } | ||
|
||
bool operator==(const dl_snapshot_id& other) const noexcept = default; | ||
|
||
/// Version for which the snapshot is created. | ||
dl_version version; | ||
}; | ||
|
||
struct dl_snapshot_payload | ||
: serde::checksum_envelope< | ||
dl_snapshot_id, | ||
serde::version<0>, | ||
serde::compat_version<0>> { | ||
/// Version for which the snapshot is created. | ||
dl_snapshot_id id; | ||
|
||
/// Overlays visible at the snapshot version. | ||
fragmented_vector<dl_overlay> overlays; | ||
}; | ||
|
||
}; // namespace experimental::cloud_topics |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,8 @@ | |
#include "serde/rw/uuid.h" | ||
#include "storage/record_batch_builder.h" | ||
|
||
#include <stdexcept> | ||
|
||
namespace experimental::cloud_topics { | ||
|
||
std::ostream& operator<<(std::ostream& o, dl_stm_api_errc errc) { | ||
|
@@ -68,4 +70,81 @@ std::optional<dl_overlay> dl_stm_api::lower_bound(kafka::offset offset) const { | |
return _stm->_state.lower_bound(offset); | ||
} | ||
|
||
ss::future<checked<dl_snapshot_id, dl_stm_api_errc>> | ||
dl_stm_api::start_snapshot() { | ||
model::term_id term = _stm->_raft->term(); | ||
|
||
vlog(_logger.debug, "Replicating dl_stm_cmd::start_snapshot_cmd"); | ||
|
||
storage::record_batch_builder builder( | ||
model::record_batch_type::dl_stm_command, model::offset(0)); | ||
builder.add_raw_kv( | ||
serde::to_iobuf(dl_stm_key::start_snapshot), | ||
serde::to_iobuf(start_snapshot_cmd())); | ||
|
||
auto batch = std::move(builder).build(); | ||
auto reader = model::make_memory_record_batch_reader(std::move(batch)); | ||
|
||
auto opts = raft::replicate_options(raft::consistency_level::quorum_ack); | ||
opts.set_force_flush(); | ||
auto res = co_await _stm->_raft->replicate(term, std::move(reader), opts); | ||
|
||
if (res.has_error()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It probably makes sense to return |
||
throw std::runtime_error( | ||
fmt::format("Failed to replicate snapshot: {}", res.error())); | ||
} | ||
|
||
// We abuse knowledge of implementation detail here to construct the | ||
// dl_snapshot_id without having to setup listeners and notifiers of command | ||
// apply. | ||
auto expected_id = dl_snapshot_id(dl_version(res.value().last_offset)); | ||
|
||
auto applied = co_await _stm->wait_no_throw( | ||
res.value().last_offset, model::timeout_clock::now() + 30s); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would that be a problem? PS. there are no locks taken here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, but imagine a situation when this fails. The caller is retrying the operation and replicating new command, then the original command is applied and then the retried command is applied. I guess that currently there is an implicit assumption that this is OK because the command itself is empty. It doesn't have any state on its own. If this is correct than this should be mentioned in the comment at least. Also, this may not always be the case. Eventually, some new fields could be added to the command and this implicit assumption may no longer be true. |
||
if (!applied) { | ||
co_return outcome::failure(dl_stm_api_errc::timeout); | ||
} | ||
|
||
// Ensure that the expected snapshot was created. | ||
vassert(_stm->_state.snapshot_exists(expected_id), "Snapshot not found"); | ||
nvartolomei marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
co_return outcome::success(expected_id); | ||
} | ||
|
||
std::optional<dl_snapshot_payload> | ||
dl_stm_api::read_snapshot(dl_snapshot_id id) { | ||
return _stm->_state.read_snapshot(id); | ||
} | ||
|
||
ss::future<checked<void, dl_stm_api_errc>> | ||
dl_stm_api::remove_snapshots_before(dl_version last_version_to_keep) { | ||
model::term_id term = _stm->_raft->term(); | ||
|
||
vlog(_logger.debug, "Replicating dl_stm_cmd::remove_snapshots_cmd"); | ||
|
||
storage::record_batch_builder builder( | ||
model::record_batch_type::dl_stm_command, model::offset(0)); | ||
builder.add_raw_kv( | ||
serde::to_iobuf(dl_stm_key::remove_snapshots_before_version), | ||
serde::to_iobuf( | ||
remove_snapshots_before_version_cmd(last_version_to_keep))); | ||
|
||
auto batch = std::move(builder).build(); | ||
auto reader = model::make_memory_record_batch_reader(std::move(batch)); | ||
|
||
auto opts = raft::replicate_options(raft::consistency_level::quorum_ack); | ||
opts.set_force_flush(); | ||
auto res = co_await _stm->_raft->replicate(term, std::move(reader), opts); | ||
|
||
if (res.has_error()) { | ||
throw std::runtime_error( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as previous |
||
fmt::format("Failed to replicate remove snapshots: {}", res.error())); | ||
} | ||
|
||
co_await _stm->wait_no_throw( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this workflow (replicate and then wait until the command is applied) can be extracted and reused |
||
res.value().last_offset, model::timeout_clock::now() + 30s); | ||
|
||
co_return outcome::success(); | ||
} | ||
|
||
}; // namespace experimental::cloud_topics |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,8 @@ | |
|
||
#include "base/outcome.h" | ||
#include "cloud_topics/dl_overlay.h" | ||
#include "cloud_topics/dl_snapshot.h" | ||
#include "cloud_topics/dl_version.h" | ||
|
||
#include <seastar/util/log.hh> | ||
|
||
|
@@ -41,6 +43,18 @@ class dl_stm_api { | |
/// available offset. | ||
std::optional<dl_overlay> lower_bound(kafka::offset offset) const; | ||
|
||
/// Request a new snapshot to be created. | ||
ss::future<checked<dl_snapshot_id, dl_stm_api_errc>> start_snapshot(); | ||
|
||
/// Read the payload of a snapshot. | ||
std::optional<dl_snapshot_payload> read_snapshot(dl_snapshot_id id); | ||
|
||
/// Remove all snapshots with version less than the given version. | ||
/// This must be called periodically as new snapshots are being created | ||
/// to avoid the state growing indefinitely. | ||
ss::future<checked<void, dl_stm_api_errc>> | ||
remove_snapshots_before(dl_version last_version_to_keep); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this class needs a gate and a shutdown code path if the object is not transient (only created to replicate one message) |
||
private: | ||
ss::logger& _logger; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is very similar to
command_builder
thatarchival_metadata_stm
uses. Maybe it makes sense to addget_api
method to thedl_stm
which would return an instance of this class?Right now the
dl_stm_api
is constructed separately but it's also a friend of thedl_stm
and is the only way to interact with it. So IMO it makes sense to make this more apparent by allowingdl_stm
to constructapi
objects.