Skip to content

Commit

Permalink
[#24574] docdb: TServers reject RBS requests for tablets they have de…
Browse files Browse the repository at this point in the history
…leted.

Summary:
This diff adds an in-memory map of tablets a tserver has processed delete tablet with tablet data state `DELETED` RPCs for to the `TSTabletManager`. Start remote bootstrap requests are checked against this map, and the tablet manager rejects requests to RBS a tablet it has previously deleted.

Note that delete tablet replica requests with table data state `TOMBSTONED` are not added to this map.
Jira: DB-13611

Test Plan:
```
% ./yb_build.sh release --cxx-test-filter-re remote_bootstrap-itest --cxx-test remote_bootstrap-itest --gtest_filter RemoteBootstrapITest.AcceptRBSAfterTabletTombstone
% ./yb_build.sh release --cxx-test-filter-re remote_bootstrap-itest --cxx-test remote_bootstrap-itest --gtest_filter RemoteBootstrapITest.RejectRBSAfterTabletDeletion
```

Reviewers: yyan, timur, bkolagani

Reviewed By: bkolagani

Subscribers: hsunder, slingam, ybase

Differential Revision: https://phorge.dev.yugabyte.com/D39618
  • Loading branch information
druzac committed Nov 5, 2024
1 parent 617bf6f commit 129c3db
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src/yb/client/table_creator.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ class YBTableCreator {

std::string ResponseNoticeMessage() const { return resp_notice_message_; }

const std::string& get_table_id() const {
return table_id_;
}

private:
friend class YBClient;

Expand Down
1 change: 0 additions & 1 deletion src/yb/common/wire_protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ Status StatusFromPB(const LWAppStatusPB& pb);
void HostPortToPB(const HostPort& host_port, HostPortPB* host_port_pb);
HostPortPB HostPortToPB(const HostPort& host_port);


// Returns the HostPort created from the specified protobuf.
HostPort HostPortFromPB(const HostPortPB& host_port_pb);

Expand Down
208 changes: 207 additions & 1 deletion src/yb/integration-tests/remote_bootstrap-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include "yb/common/wire_protocol-test-util.h"

#include "yb/consensus/consensus.proxy.h"
#include "yb/consensus/log.h"
#include "yb/consensus/raft_consensus.h"

Expand All @@ -65,6 +66,7 @@

#include "yb/master/catalog_entity_info.h"
#include "yb/master/catalog_manager_if.h"
#include "yb/master/master_client.proxy.h"
#include "yb/master/master_cluster_client.h"
#include "yb/master/master_fwd.h"
#include "yb/master/mini_master.h"
Expand All @@ -80,6 +82,7 @@
#include "yb/tserver/tablet_server.h"
#include "yb/tserver/tserver.pb.h"
#include "yb/tserver/tserver_admin.proxy.h"
#include "yb/tserver/tserver_service.proxy.h"

#include "yb/util/backoff_waiter.h"
#include "yb/util/metrics.h"
Expand Down Expand Up @@ -212,6 +215,9 @@ class RemoteBootstrapITest : public CreateTableITestBase {

void RBSWithLazySuperblockFlush(int num_tables);

Result<std::string> SetUp3TabletServerClusterAndTable(
const std::string& db_name, const std::string& table_name);

void LongBootstrapTestSetUpAndVerify(
const vector<string>& tserver_flags = vector<string>(),
const vector<string>& master_flags = vector<string>(),
Expand All @@ -233,6 +239,16 @@ class RemoteBootstrapITest : public CreateTableITestBase {
void AddNewPeerWithDiskspaceCheck(const int num_tablet_servers);
void ReplacePeerWithDiskspaceCheck(const int num_tablet_servers);

Result<master::GetTableLocationsResponsePB> GetTableLocations(
const master::MasterClientProxy& proxy, const std::string& table_id,
uint32_t max_num_tablets = 100) const;

Result<tserver::ListTabletsResponsePB> ListTablets(
const tserver::TabletServerServiceProxy& proxy) const;

std::optional<std::reference_wrapper<const tablet::TabletStatusPB>> FindTablet(
const tserver::ListTabletsResponsePB& resp, const std::string& tablet_id) const;

MonoDelta crash_test_timeout_ = MonoDelta::FromSeconds(40);
const MonoDelta kWaitForCrashTimeout_ = 60s;
vector<string> crash_test_tserver_flags_;
Expand Down Expand Up @@ -1960,6 +1976,45 @@ void RemoteBootstrapITest::RBSWithLazySuperblockFlush(int num_tables) {
}
}

Result<master::GetTableLocationsResponsePB> RemoteBootstrapITest::GetTableLocations(
const master::MasterClientProxy& proxy, const std::string& table_id,
uint32_t max_num_tablets) const {
master::GetTableLocationsRequestPB req;
master::GetTableLocationsResponsePB resp;
rpc::RpcController rpc;
req.set_max_returned_locations(max_num_tablets);
req.mutable_table()->set_table_id(table_id);
RETURN_NOT_OK(proxy.GetTableLocations(req, &resp, &rpc));
if (resp.has_error()) {
return StatusFromPB(resp.error().status());
}
return resp;
}

Result<tserver::ListTabletsResponsePB> RemoteBootstrapITest::ListTablets(
const tserver::TabletServerServiceProxy& proxy) const {
tserver::ListTabletsRequestPB req;
tserver::ListTabletsResponsePB resp;
rpc::RpcController rpc;
RETURN_NOT_OK(proxy.ListTablets(req, &resp, &rpc));
if (resp.has_error()) {
return StatusFromPB(resp.error().status());
}
return resp;
}

std::optional<std::reference_wrapper<const tablet::TabletStatusPB>>
RemoteBootstrapITest::FindTablet(
const tserver::ListTabletsResponsePB& resp, const std::string& tablet_id) const {
auto it = std::find_if(
resp.status_and_schema().begin(), resp.status_and_schema().end(),
[&tablet_id](const auto& entry) { return tablet_id != entry.tablet_status().tablet_id(); });
if (it == resp.status_and_schema().end()) {
return std::nullopt;
}
return it->tablet_status();
}

TEST_F(RemoteBootstrapITest, TestRBSWithLazySuperblockFlush) {
vector<string> ts_flags;
// Enable lazy superblock flush.
Expand Down Expand Up @@ -1987,7 +2042,6 @@ TEST_F(RemoteBootstrapITest, TestRBSWithLazySuperblockFlush) {
RBSWithLazySuperblockFlush(/* num_tables */ 20);
}


TEST_F(RemoteBootstrapITest, TestRBSAddNewPeerWithDiskspaceCheck) {
AddNewPeerWithDiskspaceCheck(3);
}
Expand Down Expand Up @@ -2143,6 +2197,158 @@ void RemoteBootstrapITest::ReplacePeerWithDiskspaceCheck(const int num_tablet_se
workload.rows_inserted()));
}

TEST_F(RemoteBootstrapITest, RejectRBSAfterTabletDeletion) {
const auto waitfor_timeout = MonoDelta::FromSeconds(60);
const auto table_id = ASSERT_RESULT(SetUp3TabletServerClusterAndTable("test_db", "test_table"));
auto master = ASSERT_NOTNULL(cluster_->GetLeaderMaster());
auto master_client_proxy =
master::MasterClientProxy(&client_->proxy_cache(), master->bound_rpc_addr());
std::unordered_set<std::string> tablet_ids;
auto table_locations_resp = ASSERT_RESULT(GetTableLocations(master_client_proxy, table_id));
for (const auto& tablet_location : table_locations_resp.tablet_locations()) {
tablet_ids.insert(tablet_location.tablet_id());
}
ASSERT_GT(tablet_ids.size(), 0);
ASSERT_OK(client_->DeleteTable(table_id, /* wait */ true));
auto tservers = cluster_->tserver_daemons();
auto& proxy_cache = client_->proxy_cache();

ASSERT_OK(WaitFor(
[&]() -> Result<bool> {
for (const auto tserver : tservers) {
auto proxy = tserver::TabletServerServiceProxy(&proxy_cache, tserver->bound_rpc_addr());
auto resp = VERIFY_RESULT(ListTablets(proxy));
if (std::any_of(
resp.status_and_schema().begin(), resp.status_and_schema().end(),
[&tablet_ids](const auto& entry) {
return tablet_ids.contains(entry.tablet_status().tablet_id());
})) {
return false;
}
}
return true;
},
waitfor_timeout,
"Timed out waiting for all tablets to be deleted on all tservers"));
auto cluster_client =
master::MasterClusterClient(cluster_->GetLeaderMasterProxy<master::MasterClusterProxy>());
auto list_tablets_response = ASSERT_RESULT(cluster_client.ListTabletServers());
ASSERT_GE(list_tablets_response.servers().size(), 3);
auto& rbs_dest_entry = list_tablets_response.servers(0);
auto& rbs_source_entry = list_tablets_response.servers(1);
consensus::StartRemoteBootstrapRequestPB req;
consensus::StartRemoteBootstrapResponsePB resp;
rpc::RpcController rpc;
rpc.set_timeout(MonoDelta::FromSeconds(20));
req.set_dest_uuid(rbs_dest_entry.instance_id().permanent_uuid());
req.set_tablet_id(*tablet_ids.begin());
req.set_bootstrap_source_peer_uuid(rbs_source_entry.instance_id().permanent_uuid());
if (rbs_source_entry.registration().common().private_rpc_addresses().size() > 0) {
*req.add_bootstrap_source_private_addr() =
rbs_source_entry.registration().common().private_rpc_addresses(0);
}
if (rbs_source_entry.registration().common().broadcast_addresses().size() > 0) {
*req.add_bootstrap_source_broadcast_addr() =
rbs_source_entry.registration().common().broadcast_addresses(0);
}
*req.mutable_bootstrap_source_cloud_info() =
rbs_source_entry.registration().common().cloud_info();
req.set_caller_term(2);
auto status =
ts_map_[rbs_dest_entry.instance_id().permanent_uuid()]->consensus_proxy->StartRemoteBootstrap(
req, &resp, &rpc);
if (status.ok()) {
status = StatusFromPB(resp.error().status());
}
ASSERT_NOK(status);
ASSERT_TRUE(status.IsIllegalState());
ASSERT_STR_CONTAINS(
status.ToString(),
"Cannot bootstrap a new tablet replica for a previously deleted tablet replica");
}

TEST_F(RemoteBootstrapITest, AcceptRBSAfterTabletTombstone) {
const auto waitfor_timeout = MonoDelta::FromSeconds(60);
const auto table_id = ASSERT_RESULT(SetUp3TabletServerClusterAndTable("test_db", "test_table"));

auto master = ASSERT_NOTNULL(cluster_->GetLeaderMaster());
auto master_client_proxy =
master::MasterClientProxy(&client_->proxy_cache(), master->bound_rpc_addr());
std::unordered_set<std::string> tablet_ids;
auto table_locations_resp = ASSERT_RESULT(GetTableLocations(master_client_proxy, table_id));
for (const auto& tablet_location : table_locations_resp.tablet_locations()) {
tablet_ids.insert(tablet_location.tablet_id());
}
ASSERT_GT(tablet_ids.size(), 0);
const auto tablet_id = *tablet_ids.begin();
auto cluster_client =
master::MasterClusterClient(cluster_->GetLeaderMasterProxy<master::MasterClusterProxy>());

// Blacklist the "first" tserver.
auto tserver_to_blacklist = ASSERT_RESULT(cluster_client.ListTabletServers()).servers(0);
const auto& hp_to_blacklist =
tserver_to_blacklist.registration().common().private_rpc_addresses(0);
// Add a new tserver to host the tablet peer.
ASSERT_OK(cluster_->AddTabletServer());
ASSERT_OK(cluster_client.BlacklistHost(HostPortPB(hp_to_blacklist)));
ASSERT_OK(WaitFor(
[&]() -> Result<bool> {
auto proxy = tserver::TabletServerServiceProxy(
&cluster_->proxy_cache(), HostPortFromPB(hp_to_blacklist));
auto resp = VERIFY_RESULT(ListTablets(proxy));
auto tablet = FindTablet(resp, tablet_id);
if (!tablet.has_value()) {
return true;
}
return tablet->get().tablet_data_state() == tablet::TABLET_DATA_DELETED ||
tablet->get().tablet_data_state() == tablet::TABLET_DATA_TOMBSTONED;
},
waitfor_timeout,
"Timed out waiting for the tablet peer to be moved from the target tserver"));
// Now clear the blacklist and blacklist another server.
ASSERT_OK(cluster_client.ClearBlacklist());
auto new_tserver_to_blacklist = cluster_->tablet_server(3);
ASSERT_OK(cluster_client.BlacklistHost(HostPortToPB(new_tserver_to_blacklist->bound_rpc_addr())));
// Wait for the tablet to show up again.
ASSERT_OK(WaitFor(
[&]() -> Result<bool> {
auto proxy = tserver::TabletServerServiceProxy(
&cluster_->proxy_cache(), HostPortFromPB(hp_to_blacklist));
auto resp = VERIFY_RESULT(ListTablets(proxy));
auto tablet = FindTablet(resp, tablet_id);
return tablet.has_value() && tablet->get().tablet_data_state() == tablet::TABLET_DATA_READY;
},
waitfor_timeout,
"Timed out waiting for a new copy of the tablet replica to be added back to the target "
"tserver"));
}

Result<std::string> RemoteBootstrapITest::SetUp3TabletServerClusterAndTable(
const std::string& db_name, const std::string& table_name) {
StartCluster(
/* extra_tserver_flags */ {},
/* master_flags */ {},
/* num_tablet_servers */ 3, /* enable_ysql */ true);
auto yb_dbname = YBTableName(YQLDatabase::YQL_DATABASE_PGSQL);
yb_dbname.set_namespace_name(db_name);
RETURN_NOT_OK(
client_->CreateNamespaceIfNotExists(yb_dbname.namespace_name(), yb_dbname.namespace_type()));

auto table_creator = client_->NewTableCreator();
client::YBSchema client_schema(client::YBSchemaFromSchema(yb::GetSimpleTestSchema()));
table_creator->schema(&client_schema);
table_creator
->table_name(
YBTableName(YQLDatabase::YQL_DATABASE_PGSQL, yb_dbname.namespace_name(), table_name))
.table_type(YBTableType::PGSQL_TABLE_TYPE)
.num_tablets(1)
.wait(true);
RETURN_NOT_OK(table_creator->Create());
SCHECK(!table_creator->get_table_id().empty(),
IllegalState, "Table id should not be empty.");
return table_creator->get_table_id();
}

class RemoteBootstrapMiniClusterITest: public YBMiniClusterTestBase<MiniCluster> {
public:
RemoteBootstrapMiniClusterITest() : num_tablets_(1), num_tablet_servers_(3) {}
Expand Down
16 changes: 16 additions & 0 deletions src/yb/tserver/ts_tablet_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,14 @@ DEFINE_RUNTIME_bool(enable_copy_retryable_requests_from_parent, true,
"Whether to copy retryable requests from parent tablet when opening"
"the child tablet");

DEFINE_NON_RUNTIME_uint32(deleted_tablet_cache_max_size, 10000,
"Maximum size for the cache of recently deleted tablet ids. Used to "
"reject remote bootstrap requests for recently deleted tablets.");

DEFINE_RUNTIME_bool(
reject_rbs_for_deleted_tablet, true,
"Whether to reject a request to RBS a tablet that the receiving tserver has recently deleted.");

DEFINE_UNKNOWN_int32(flush_bootstrap_state_pool_max_threads, -1,
"The maximum number of threads used to flush retryable requests");

Expand Down Expand Up @@ -493,6 +501,7 @@ TSTabletManager::TSTabletManager(FsManager* fs_manager,
MetricRegistry* metric_registry)
: fs_manager_(fs_manager),
server_(server),
deleted_tablet_ids_(FLAGS_deleted_tablet_cache_max_size),
metric_registry_(metric_registry),
state_(MANAGER_INITIALIZING) {
ThreadPoolMetrics metrics = {
Expand Down Expand Up @@ -1558,6 +1567,12 @@ Status HandleReplacingStaleTablet(

Status TSTabletManager::StartRemoteBootstrap(const StartRemoteBootstrapRequestPB& req) {
const TabletId& tablet_id = req.tablet_id();
if (FLAGS_reject_rbs_for_deleted_tablet) {
SharedLock<RWMutex> shared_lock(mutex_);
SCHECK(
!deleted_tablet_ids_.contains(tablet_id), IllegalState,
"Cannot bootstrap a new tablet replica for a previously deleted tablet replica.");
}
const PeerId& bootstrap_peer_uuid = req.bootstrap_source_peer_uuid();
const auto& kLogPrefix = TabletLogPrefix(tablet_id);
const auto& private_addr = req.bootstrap_source_private_addr()[0].host();
Expand Down Expand Up @@ -1891,6 +1906,7 @@ Status TSTabletManager::DeleteTablet(
RETURN_NOT_OK(CheckRunningUnlocked(error_code));
CHECK_EQ(1, tablet_map_.erase(tablet_id)) << tablet_id;
dirty_tablets_.erase(tablet_id);
deleted_tablet_ids_.insert(tablet_id);
}

// We unregister TOMBSTONED tablets in addition to DELETED tablets because they do not have
Expand Down
5 changes: 5 additions & 0 deletions src/yb/tserver/ts_tablet_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@

#include "yb/util/status_fwd.h"
#include "yb/util/locks.h"
#include "yb/util/lru_cache.h"
#include "yb/util/rw_mutex.h"
#include "yb/util/shared_lock.h"
#include "yb/util/threadpool.h"
Expand Down Expand Up @@ -643,6 +644,10 @@ class TSTabletManager : public tserver::TabletPeerLookupIf, public tablet::Table

// Map from tablet ID to tablet
TabletMap tablet_map_ GUARDED_BY(mutex_);
// A cache of the most recently deleted tablets. Only includes tablets deleted with argument
// TABLET_DATA_DELETED. Used to reject certain requests on recently deleted tablets, such as
// StartRemoteBootstrap.
LRUCache<TabletId> deleted_tablet_ids_ GUARDED_BY(mutex_);

// Map from table ID to count of children in data and wal directories.
TableDiskAssignmentMap table_data_assignment_map_ GUARDED_BY(dir_assignment_mutex_);
Expand Down
4 changes: 4 additions & 0 deletions src/yb/util/lru_cache-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,13 @@ TEST(LRUCacheTest, Simple) {
cache.insert(2);
cache.insert(3);
ASSERT_EQ(AsString(cache), "[3, 2]");
ASSERT_TRUE(cache.contains(3));
ASSERT_FALSE(cache.contains(1));
ASSERT_EQ(0, cache.erase(1));
ASSERT_EQ(1, cache.erase(3));
ASSERT_EQ(AsString(cache), "[2]");
ASSERT_TRUE(cache.contains(2));
ASSERT_FALSE(cache.contains(3));
}

TEST(LRUCacheTest, Erase) {
Expand Down
5 changes: 5 additions & 0 deletions src/yb/util/lru_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ class LRUCache {
return erase(key);
}

template<class Key>
bool contains(const Key& key) const {
return impl_.template get<IdTag>().contains(key);
}

// Erase by usage order iterator
const_iterator erase(const_iterator pos) {
return impl_.erase(pos);
Expand Down

0 comments on commit 129c3db

Please sign in to comment.