Skip to content

Commit

Permalink
[#24575] docdb: RF1 changed to any RF>1 will break leaderless tablet …
Browse files Browse the repository at this point in the history
…endpoint

Summary:
Considering the following scenario:
Originally it's RF1, all tablet leaders will report max ht lease to the master
N1 (leader) : ht_lease: MAX_HT_LEASE
Master leader: {N1: ht_lease=MAX_HT_LEASE}

Edit the universe to make it RF3, the leader will report the normal ht lease to the master
N1 (leader) : ht_lease: 300ms
N2 (follower)
N3 (follower)
Master leader: {N1: ht_lease=MAX_HT_LEASE}

When leader N1 reports its leader status to master, 300 < MAX_HT_LEASE, so it's considered as stale and skipped.
Reports in the following 120seconds are all skipped, so that the tablet is reported as leaderless.

This diff fixes this issue by accepting the ht_lease when the existing one is MAX_HT_LEASE.
Also make the RF1 remaining ht_lease more readable, an RF1 tablet will show `+INF(RF1)`:
```
LEADER: 127.0.0.3 (HAS_LEASE)
Remaining ht_lease: +INF(RF1)
UUID: a335a08464a84de0991d51aefee9dee8
```
Jira: DB-13612

Test Plan:
Manual test:
1. Starts an RF3 universe and create a table with 1 tablet (assume tablet-1).
2. Remove two followers from the raft group of tablet-1 with `yb-admin change_config REMOVE_SERVER`.
3. Check the table page, see remaining ht_lease is showing as `+INF(RF1)`.
4. Add one follower back.
5. Check the table page, see remaining ht_lease becomes a normal value.
6. After 2mins, check leaderless tablet endpoint, shouldn't see tablet-1.

MasterPathHandlersLeaderlessITest.*
MasterPathHandlersLeaderlessRF3ITest.*
MasterPathHandlersLeaderlessRF1ITest.*
MasterPathHandlersItest.TestUndeletedParentTablet
MasterPathHandlersItest.TestHiddenSplitParentTablet

Reviewers: asrivastava

Reviewed By: asrivastava

Subscribers: esheng, ybase

Differential Revision: https://phorge.dev.yugabyte.com/D39725
  • Loading branch information
Huqicheng committed Nov 11, 2024
1 parent af8f3ae commit 24ed670
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 18 deletions.
3 changes: 3 additions & 0 deletions src/yb/common/common_consensus_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@
#include <string>

#include "yb/common/common_types.pb.h"
#include "yb/common/hybrid_time.h"

#include "yb/consensus/metadata.pb.h"

namespace yb::consensus {

const MicrosTime kInfiniteHybridTimeLeaseExpiration = HybridTime::kMax.GetPhysicalValueMicros();

bool IsRaftConfigMember(const std::string& tserver_uuid, const RaftConfigPB& config);

bool IsRaftConfigVoter(const std::string& tserver_uuid, const RaftConfigPB& config);
Expand Down
2 changes: 1 addition & 1 deletion src/yb/consensus/consensus_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ MicrosTime PeerMessageQueue::HybridTimeLeaseExpirationWatermark() {
}

static result_type InfiniteWatermarkForLocalPeer() {
return HybridTime::kMax.GetPhysicalValueMicros();
return kInfiniteHybridTimeLeaseExpiration;
}

static result_type ExtractValue(const TrackedPeer& peer) {
Expand Down
68 changes: 65 additions & 3 deletions src/yb/integration-tests/master_path_handlers-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@
#include "yb/master/mini_master.h"

#include "yb/master/tasks_tracker.h"

#include "yb/rpc/messenger.h"

#include "yb/server/webui_util.h"

#include "yb/tablet/tablet_types.pb.h"
Expand Down Expand Up @@ -80,6 +83,7 @@ DECLARE_bool(TEST_skip_deleting_split_tablets);
DECLARE_uint32(leaderless_tablet_alert_delay_secs);
DECLARE_bool(TEST_assert_local_op);
DECLARE_bool(TEST_echo_service_enabled);
DECLARE_bool(enable_load_balancing);

namespace yb {
namespace master {
Expand Down Expand Up @@ -787,6 +791,64 @@ TEST_F_EX(
ASSERT_EQ(result.find(tablet->id()), string::npos);
}

class MasterPathHandlersLeaderlessITest : public MasterPathHandlersItest {
public:
void SetUp() override {
ANNOTATE_UNPROTECTED_WRITE(FLAGS_leaderless_tablet_alert_delay_secs) =
kLeaderlessTabletAlertDelaySecs;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_enable_load_balancing) = false;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_tserver_heartbeat_metrics_interval_ms) =
kMetricsHeartbeatIntervalMs;
MasterPathHandlersItest::SetUp();
}
protected:
const int kLeaderlessTabletAlertDelaySecs = 5;
const int kMetricsHeartbeatIntervalMs = 1000;
};

// A tablet changed from RF-1 to RF-3 shouldn't be shown as leaderless tablet.
TEST_F(MasterPathHandlersLeaderlessITest, TestRF1ChangedToRF3) {
const auto kLeaderlessTabletAlertDelaySecs = 5;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_leaderless_tablet_alert_delay_secs) =
kLeaderlessTabletAlertDelaySecs;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_enable_load_balancing) = false;

CreateTestTable(1 /* num_tablets */);
client::TableHandle table;
ASSERT_OK(table.Open(table_name, client_.get()));
auto& catalog_manager = ASSERT_RESULT(cluster_->GetLeaderMiniMaster())->catalog_manager();
auto tablet = ASSERT_RESULT(catalog_manager.GetTableInfo(table->id())->GetTablets())[0];
auto leader = ASSERT_RESULT(GetLeaderPeerForTablet(cluster_.get(), tablet->id()));
const auto proxy_cache = std::make_unique<rpc::ProxyCache>(client_->messenger());
auto ts_map = ASSERT_RESULT(itest::CreateTabletServerMap(
ASSERT_RESULT(cluster_->GetLeaderMasterProxy<master::MasterClusterProxy>()),
proxy_cache.get()));
auto leader_uuid = leader->permanent_uuid();
for (const auto& replica : ts_map) {
auto uuid = replica.second->uuid();
if (uuid == leader_uuid) {
continue;
}
ASSERT_OK(itest::RemoveServer(
ts_map[leader_uuid].get(), tablet->id(), ts_map[uuid].get(), boost::none, 10s));
}
SleepFor(kLeaderlessTabletAlertDelaySecs * yb::kTimeMultiplier * 1s);
string result = ASSERT_RESULT(GetLeaderlessTabletsString());
ASSERT_EQ(result.find(tablet->id()), string::npos);
for (const auto& replica : ts_map) {
auto uuid = replica.second->uuid();
if (uuid == leader_uuid) {
continue;
}
ASSERT_OK(itest::AddServer(
ts_map[leader_uuid].get(), tablet->id(), ts_map[uuid].get(),
consensus::PeerMemberType::PRE_VOTER, boost::none, 10s));
}
SleepFor(kLeaderlessTabletAlertDelaySecs * yb::kTimeMultiplier * 1s);
result = ASSERT_RESULT(GetLeaderlessTabletsString());
ASSERT_EQ(result.find(tablet->id()), string::npos);
}

class MasterPathHandlersExternalItest : public MasterPathHandlersBaseItest<ExternalMiniCluster> {
public:
void InitCluster() override {
Expand Down Expand Up @@ -1110,7 +1172,7 @@ TEST_F_EX(MasterPathHandlersItest, TestTablePlacementInfo, MasterPathHandlersExt
}

template <int RF>
class MasterPathHandlersLeaderlessITest : public MasterPathHandlersExternalItest {
class MasterPathHandlersExternalLeaderlessITest : public MasterPathHandlersExternalItest {
protected:
int num_tablet_servers() const override {
return RF;
Expand Down Expand Up @@ -1167,8 +1229,8 @@ class MasterPathHandlersLeaderlessITest : public MasterPathHandlersExternalItest
static constexpr int kTserverHeartbeatMetricsIntervalMs = 1000;
};

typedef MasterPathHandlersLeaderlessITest<3> MasterPathHandlersLeaderlessRF3ITest;
typedef MasterPathHandlersLeaderlessITest<1> MasterPathHandlersLeaderlessRF1ITest;
typedef MasterPathHandlersExternalLeaderlessITest<3> MasterPathHandlersLeaderlessRF3ITest;
typedef MasterPathHandlersExternalLeaderlessITest<1> MasterPathHandlersLeaderlessRF1ITest;

TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestLeaderlessTabletEndpoint) {
ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs", "5"));
Expand Down
17 changes: 13 additions & 4 deletions src/yb/master/catalog_entity_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

#include "yb/cdc/xcluster_types.h"
#include "yb/common/colocated_util.h"
#include "yb/common/common_consensus_util.h"
#include "yb/common/doc_hybrid_time.h"
#include "yb/dockv/partition.h"
#include "yb/common/schema_pbutil.h"
Expand Down Expand Up @@ -119,10 +120,18 @@ void TabletReplica::UpdateLeaderLeaseInfo(const TabletLeaderLeaseInfo& info) {
const bool initialized = leader_lease_info.initialized;
const auto old_lease_exp = leader_lease_info.ht_lease_expiration;
leader_lease_info = info;
leader_lease_info.ht_lease_expiration =
info.leader_lease_status == consensus::LeaderLeaseStatus::HAS_LEASE
? std::max(info.ht_lease_expiration, old_lease_exp)
: 0;
leader_lease_info.ht_lease_expiration = 0;
if (info.leader_lease_status == consensus::LeaderLeaseStatus::HAS_LEASE) {
if (old_lease_exp == consensus::kInfiniteHybridTimeLeaseExpiration) {
// It's originally RF-1, so there are two possibilities:
// 1. It's still RF-1, and the new lease expiration is the same as the old one.
// 2. It's changed to RF>1, and the new lease expiration is less than the old one.
// In both cases, we can accept the new lease expiration.
leader_lease_info.ht_lease_expiration = info.ht_lease_expiration;
} else {
leader_lease_info.ht_lease_expiration = std::max(info.ht_lease_expiration, old_lease_exp);
}
}
leader_lease_info.initialized = initialized || info.initialized;
}

Expand Down
19 changes: 13 additions & 6 deletions src/yb/master/master-path-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

#include <boost/date_time/posix_time/time_formatters.hpp>

#include "yb/common/common_consensus_util.h"
#include "yb/common/xcluster_util.h"

#include "yb/common/common_types_util.h"
Expand Down Expand Up @@ -3347,12 +3348,18 @@ string MasterPathHandlers::RaftConfigToHtml(
: "UNKNOWN";
html << Format(" <li><b>LEADER: $0 ($1)</b></li>\n", location_html, leader_lease_status);
if (leader_lease_info.leader_lease_status == consensus::LeaderLeaseStatus::HAS_LEASE) {
// Get the remaining milliseconds of the current valid lease.
const auto now_usec = boost::posix_time::microseconds(
master_->clock()->Now().GetPhysicalValueMicros());
auto ht_lease_usec = boost::posix_time::microseconds(leader_lease_info.ht_lease_expiration);
auto diff = ht_lease_usec - now_usec;
html << Format("Remaining ht_lease (may be stale): $0 ms<br>\n", diff.total_milliseconds());
auto ht_lease_exp = leader_lease_info.ht_lease_expiration;
if (ht_lease_exp == consensus::kInfiniteHybridTimeLeaseExpiration) {
html << "Remaining ht_lease: +INF(RF1)<br>\n";
} else {
// Get the remaining milliseconds of the current valid lease.
const auto now_usec = boost::posix_time::microseconds(
master_->clock()->Now().GetPhysicalValueMicros());
auto ht_lease_usec = boost::posix_time::microseconds(ht_lease_exp);
auto diff = ht_lease_usec - now_usec;
html << Format("Remaining ht_lease (may be stale): $0 ms<br>\n",
diff.total_milliseconds());
}
} else if (leader_lease_info.heartbeats_without_leader_lease > 0) {
html << Format(
"Cannot replicate lease for past <b><font color='red'>$0</font></b> heartbeats<br>",
Expand Down
15 changes: 11 additions & 4 deletions src/yb/master/master_heartbeat_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,7 @@ void MasterHeartbeatServiceImpl::ProcessTabletMetadata(
return;
}
auto& tablet = *tablet_result;
MicrosTime ht_lease_exp = 0;
MicrosTime new_ht_lease_exp = 0;
uint64 new_heartbeats_without_leader_lease = 0;
consensus::LeaderLeaseStatus leader_lease_status =
consensus::LeaderLeaseStatus::NO_MAJORITY_REPLICATED_LEASE;
Expand All @@ -1318,13 +1318,20 @@ void MasterHeartbeatServiceImpl::ProcessTabletMetadata(
leader_lease_status = leader_info.leader_lease_status();
leader_lease_info_initialized = true;
if (leader_info.leader_lease_status() == consensus::LeaderLeaseStatus::HAS_LEASE) {
ht_lease_exp = leader_info.ht_lease_expiration();
auto ht_lease_exp = leader_info.ht_lease_expiration();
auto current_ht_lease_exp = existing_leader_lease_info->ht_lease_expiration;
// If the reported ht lease from the leader is expired for more than
// FLAGS_maximum_tablet_leader_lease_expired_secs, the leader shouldn't be treated
// as a valid leader.
if (ht_lease_exp >= existing_leader_lease_info->ht_lease_expiration &&
// If the tablet has been changed from RF1 to any RF>1, the newly reported ht_lease_exp
// is always less than that reported when it was RF1 (kInfiniteHybridTimeLeaseExpiration).
// We should also treat such ht_lease_exp as valid.
// See https://github.com/yugabyte/yugabyte-db/issues/24575.
if ((ht_lease_exp >= current_ht_lease_exp ||
current_ht_lease_exp == consensus::kInfiniteHybridTimeLeaseExpiration) &&
!IsHtLeaseExpiredForTooLong(
master_->clock()->Now().GetPhysicalValueMicros(), ht_lease_exp)) {
new_ht_lease_exp = ht_lease_exp;
tablet->UpdateLastTimeWithValidLeader();
}
} else {
Expand All @@ -1336,7 +1343,7 @@ void MasterHeartbeatServiceImpl::ProcessTabletMetadata(
TabletLeaderLeaseInfo leader_lease_info{
leader_lease_info_initialized,
leader_lease_status,
ht_lease_exp,
new_ht_lease_exp,
new_heartbeats_without_leader_lease};
TabletReplicaDriveInfo drive_info{
storage_metadata.sst_file_size(),
Expand Down

0 comments on commit 24ed670

Please sign in to comment.