Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NBS] Issue 2732: add change device/agent state buttons #2760

Merged
merged 13 commits into from
Jan 16, 2025
Merged
6 changes: 6 additions & 0 deletions cloud/blockstore/config/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1083,4 +1083,10 @@ message TStorageServiceConfig
// percentage, then the rejection of such agents does not occur - we assume
// a connectivity failure in the cluster.
optional double DiskRegistryInitialAgentRejectionThreshold = 396;

// Enable buttons for agent/device state changing.
optional bool EnableToChangeStatesFromDiskRegistryMonpage = 397;

// Enable buttons for device state changing, when they in error state.
optional bool EnableToChangeErrorStatesFromDiskRegistryMonpage = 398;
}
2 changes: 2 additions & 0 deletions cloud/blockstore/libs/storage/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,8 @@ TDuration MSeconds(ui32 value)
xxx(EncryptionAtRestForDiskRegistryBasedDisksEnabled, bool, false )\
xxx(DisableFullPlacementGroupCountCalculation, bool, false )\
xxx(DiskRegistryInitialAgentRejectionThreshold, double, 50 )\
xxx(EnableToChangeStatesFromDiskRegistryMonpage, bool, false )\
xxx(EnableToChangeErrorStatesFromDiskRegistryMonpage, bool, false )\
// BLOCKSTORE_STORAGE_CONFIG_RW

#define BLOCKSTORE_STORAGE_CONFIG(xxx) \
Expand Down
3 changes: 3 additions & 0 deletions cloud/blockstore/libs/storage/core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,9 @@ class TStorageConfig

[[nodiscard]] bool GetDisableFullPlacementGroupCountCalculation() const;
[[nodiscard]] double GetDiskRegistryInitialAgentRejectionThreshold() const;
[[nodiscard]] bool GetEnableToChangeStatesFromDiskRegistryMonpage() const;
[[nodiscard]] bool
GetEnableToChangeErrorStatesFromDiskRegistryMonpage() const;
};

ui64 GetAllocationUnit(
Expand Down
10 changes: 10 additions & 0 deletions cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,16 @@ class TDiskRegistryActor final
const TCgiParameters& params,
TRequestInfoPtr requestInfo);

void HandleHttpInfo_ChangeDeviseState(
const NActors::TActorContext& ctx,
const TCgiParameters& params,
TRequestInfoPtr requestInfo);

void HandleHttpInfo_ChangeAgentState(
const NActors::TActorContext& ctx,
const TCgiParameters& params,
TRequestInfoPtr requestInfo);

void HandleHttpInfo_RenderDisks(
const NActors::TActorContext& ctx,
const TCgiParameters& params,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
#include "disk_registry_actor.h"

#include <cloud/blockstore/libs/storage/core/monitoring_utils.h>

namespace NCloud::NBlockStore::NStorage {

using namespace NActors;

using namespace NKikimr;

using namespace NMonitoringUtils;

namespace {

////////////////////////////////////////////////////////////////////////////////

class TChangeAgentStateActor final
: public TActorBootstrapped<TChangeAgentStateActor>
{
private:
const TActorId Owner;
const ui64 TabletID;
const TRequestInfoPtr RequestInfo;
const TString AgentID;
const NProto::EAgentState NewState;
const NProto::EAgentState OldState;

public:
TChangeAgentStateActor(
const TActorId& owner,
ui64 tabletID,
TRequestInfoPtr requestInfo,
TString agentId,
NProto::EAgentState newState,
NProto::EAgentState oldState);

void Bootstrap(const TActorContext& ctx);

private:
void Notify(
const TActorContext& ctx,
TString message,
const EAlertLevel alertLevel);

void ReplyAndDie(const TActorContext& ctx, NProto::TError error);

private:
STFUNC(StateWork);

void HandleChangeAgentStateResponse(
const TEvDiskRegistry::TEvChangeAgentStateResponse::TPtr& ev,
const TActorContext& ctx);

void HandlePoisonPill(
const TEvents::TEvPoisonPill::TPtr& ev,
const TActorContext& ctx);
};

////////////////////////////////////////////////////////////////////////////////

TChangeAgentStateActor::TChangeAgentStateActor(
const TActorId& owner,
ui64 tabletID,
TRequestInfoPtr requestInfo,
TString agentId,
NProto::EAgentState newState,
NProto::EAgentState oldState)
: Owner(owner)
, TabletID(tabletID)
, RequestInfo(std::move(requestInfo))
, AgentID(std::move(agentId))
, NewState(newState)
, OldState(oldState)
{}

void TChangeAgentStateActor::Bootstrap(const TActorContext& ctx)
{
auto request =
std::make_unique<TEvDiskRegistry::TEvChangeAgentStateRequest>();

request->Record.SetAgentId(AgentID);
request->Record.SetAgentState(NewState);
vladstepanyuk marked this conversation as resolved.
Show resolved Hide resolved
request->Record.SetReason("monpage action");

NCloud::Send(ctx, Owner, std::move(request));

Become(&TThis::StateWork);
}

void TChangeAgentStateActor::Notify(
const TActorContext& ctx,
TString message,
const EAlertLevel alertLevel)
{
TStringStream out;

BuildNotifyPageWithRedirect(
out,
std::move(message),
TStringBuilder() << "./app?action=agent&TabletId=" << TabletID
<< "&AgentID=" << AgentID,
alertLevel);

auto response = std::make_unique<NMon::TEvRemoteHttpInfoRes>(out.Str());
NCloud::Reply(ctx, *RequestInfo, std::move(response));
}

void TChangeAgentStateActor::ReplyAndDie(
const TActorContext& ctx,
NProto::TError error)
{
if (SUCCEEDED(error.GetCode())) {
vladstepanyuk marked this conversation as resolved.
Show resolved Hide resolved
Notify(ctx, "Operation successfully completed", EAlertLevel::SUCCESS);
} else {
Notify(
ctx,
TStringBuilder()
<< "failed to change agent[" << AgentID.Quote()
<< "] state from " << EAgentState_Name(OldState) << " to "
<< EAgentState_Name(NewState) << ": " << FormatError(error),
EAlertLevel::DANGER);
}

NCloud::Send(
ctx,
Owner,
std::make_unique<TEvDiskRegistryPrivate::TEvOperationCompleted>());

Die(ctx);
}

////////////////////////////////////////////////////////////////////////////////

void TChangeAgentStateActor::HandlePoisonPill(
const TEvents::TEvPoisonPill::TPtr& ev,
const TActorContext& ctx)
{
Y_UNUSED(ev);
ReplyAndDie(ctx, MakeTabletIsDeadError(E_REJECTED, __LOCATION__));
}

void TChangeAgentStateActor::HandleChangeAgentStateResponse(
const TEvDiskRegistry::TEvChangeAgentStateResponse::TPtr& ev,
const TActorContext& ctx)
{
const auto* response = ev->Get();

ReplyAndDie(ctx, response->GetError());
}

////////////////////////////////////////////////////////////////////////////////

STFUNC(TChangeAgentStateActor::StateWork)
{
switch (ev->GetTypeRewrite()) {
HFunc(TEvents::TEvPoisonPill, HandlePoisonPill);

HFunc(
TEvDiskRegistry::TEvChangeAgentStateResponse,
HandleChangeAgentStateResponse);

default:
HandleUnexpectedEvent(
ev,
TBlockStoreComponents::DISK_REGISTRY_WORKER);
break;
}
}

} // namespace

////////////////////////////////////////////////////////////////////////////////

void TDiskRegistryActor::HandleHttpInfo_ChangeAgentState(
const TActorContext& ctx,
const TCgiParameters& params,
TRequestInfoPtr requestInfo)
{
if (!Config->GetEnableToChangeStatesFromDiskRegistryMonpage()) {
RejectHttpRequest(ctx, *requestInfo, "Can't change state from monpage");
return;
}

const auto& newStateRaw = params.Get("NewState");
const auto& agentId = params.Get("AgentID");

if (!newStateRaw) {
RejectHttpRequest(ctx, *requestInfo, "No new state is given");
return;
}

if (!agentId) {
RejectHttpRequest(ctx, *requestInfo, "No agent id is given");
return;
}

NProto::EAgentState newState;
if (!EAgentState_Parse(newStateRaw, &newState)) {
RejectHttpRequest(ctx, *requestInfo, "Invalid new state");
return;
}

static const THashSet<NProto::EAgentState> NewStateAllowlist{
vladstepanyuk marked this conversation as resolved.
Show resolved Hide resolved
NProto::EAgentState::AGENT_STATE_ONLINE,
NProto::EAgentState::AGENT_STATE_WARNING,
};

if (!NewStateAllowlist.contains(newState)) {
RejectHttpRequest(ctx, *requestInfo, "Invalid new state");
return;
}

const auto agentState = State->GetAgentState(agentId);
if (agentState.Empty()) {
RejectHttpRequest(ctx, *requestInfo, "Unknown agent");
return;
}

static const THashSet<NProto::EAgentState> OldStateAllowlist = {
NProto::EAgentState::AGENT_STATE_ONLINE,
NProto::EAgentState::AGENT_STATE_WARNING,
};

if (!OldStateAllowlist.contains(*agentState.Get())) {
RejectHttpRequest(
ctx,
*requestInfo,
"Can't change agent state from " +
EAgentState_Name(*agentState.Get()));
}

LOG_INFO(
ctx,
TBlockStoreComponents::DISK_REGISTRY,
"Change state of agent[%s] on monitoring page from %s to %s",
agentId.Quote().c_str(),
EAgentState_Name(*agentState.Get()).c_str(),
EAgentState_Name(newState).c_str());

auto actor = NCloud::Register<TChangeAgentStateActor>(
ctx,
SelfId(),
TabletID(),
std::move(requestInfo),
agentId,
newState,
*agentState.Get());

Actors.insert(actor);
}

} // namespace NCloud::NBlockStore::NStorage
Loading
Loading