1515#include < ydb/core/base/hive.h>
1616#include < ydb/core/base/path.h>
1717#include < ydb/core/base/statestorage.h>
18+ #include < ydb/core/base/statestorage_impl.h>
1819#include < ydb/core/base/tablet_pipe.h>
1920#include < ydb/core/cms/console/configs_dispatcher.h>
2021#include < ydb/core/mon/mon.h>
@@ -155,6 +156,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
155156 QuotaUsage,
156157 BridgeGroupState,
157158 PileComputeState,
159+ StateStorage,
160+ StateStorageRing,
161+ StateStorageNode,
158162 };
159163
160164 enum ETimeoutTag {
@@ -316,7 +320,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
316320 if (issueLog.status () != Ydb::Monitoring::StatusFlag::UNSPECIFIED) {
317321 id << Ydb::Monitoring::StatusFlag_Status_Name (issueLog.status ()) << ' -' ;
318322 }
319- id << crc16 (issueLog.message ());
323+ id << crc16 (TStringBuilder () << issueLog.message () << issueLog. type ());
320324 if (location.database ().name ()) {
321325 id << ' -' << crc32 (location.database ().name ());
322326 }
@@ -363,6 +367,15 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
363367 if (location.compute ().schema ().path ()) {
364368 id << ' -' << crc32 (location.compute ().schema ().path ());
365369 }
370+ if (location.compute ().state_storage ().pile ().name ()) {
371+ id << ' -' << location.compute ().state_storage ().pile ().name ();
372+ }
373+ if (location.compute ().state_storage ().ring ()) {
374+ id << ' -' << location.compute ().state_storage ().ring ();
375+ }
376+ if (location.compute ().state_storage ().node ().id ()) {
377+ id << ' -' << location.compute ().state_storage ().node ().id ();
378+ }
366379 return id.Str ();
367380 }
368381
@@ -392,10 +405,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
392405 if (Location.ByteSizeLong () > 0 ) {
393406 issueLog.mutable_location ()->CopyFrom (Location);
394407 }
395- issueLog.set_id (GetIssueId (issueLog));
396408 if (Type) {
397409 issueLog.set_type (Type);
398410 }
411+ issueLog.set_id (GetIssueId (issueLog));
399412 issueLog.set_level (Level);
400413 if (!reason.empty ()) {
401414 for (const TString& r : reason) {
@@ -675,6 +688,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
675688 std::optional<TRequestResponse<TEvSysView::TEvGetPDisksResponse>> PDisks;
676689 std::optional<TRequestResponse<TEvNodeWardenStorageConfig>> NodeWardenStorageConfig;
677690 std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> DatabaseBoardInfo;
691+ std::optional<TRequestResponse<TEvStateStorage::TEvListStateStorageResult>> StateStorageInfo;
692+ std::optional<TRequestResponse<TEvStateStorage::TEvListSchemeBoardResult>> SchemeBoardInfo;
693+ std::optional<TRequestResponse<TEvStateStorage::TEvListBoardResult>> BoardInfo;
678694 THashSet<TNodeId> UnknownStaticGroups;
679695
680696 const NKikimrConfig::THealthCheckConfig& HealthCheckConfig;
@@ -837,6 +853,16 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
837853 NodeWardenStorageConfig = RequestStorageConfig ();
838854 }
839855
856+ if (!IsSpecificDatabaseFilter ()) {
857+ StateStorageInfo = TRequestResponse<TEvStateStorage::TEvListStateStorageResult>(Span.CreateChild (TComponentTracingLevels::TTablet::Detailed, " TEvStateStorage::TEvListStateStorageResult" ));
858+ Send (MakeStateStorageProxyID (), new TEvStateStorage::TEvListStateStorage (), 0 /* flags*/ , 0 /* cookie*/ , Span.GetTraceId ());
859+ SchemeBoardInfo = TRequestResponse<TEvStateStorage::TEvListSchemeBoardResult>(Span.CreateChild (TComponentTracingLevels::TTablet::Detailed, " TEvStateStorage::TEvListSchemeBoardResult" ));
860+ Send (MakeStateStorageProxyID (), new TEvStateStorage::TEvListSchemeBoard (false ), 0 /* flags*/ , 0 /* cookie*/ , Span.GetTraceId ());
861+ BoardInfo = TRequestResponse<TEvStateStorage::TEvListBoardResult>(Span.CreateChild (TComponentTracingLevels::TTablet::Detailed, " TEvStateStorage::TEvListBoardResult" ));
862+ Send (MakeStateStorageProxyID (), new TEvStateStorage::TEvListBoard (), 0 /* flags*/ , 0 /* cookie*/ , Span.GetTraceId ());
863+ Requests += 3 ;
864+ }
865+
840866
841867 NodesInfo = TRequestResponse<TEvInterconnect::TEvNodesInfo>(Span.CreateChild (TComponentTracingLevels::TTablet::Detailed, " TEvInterconnect::TEvListNodes" ));
842868 Send (GetNameserviceActorId (), new TEvInterconnect::TEvListNodes (), 0 /* flags*/ , 0 /* cookie*/ , Span.GetTraceId ());
@@ -921,6 +947,37 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
921947 }
922948 }
923949
950+ void RequestNodes (TIntrusiveConstPtr<TStateStorageInfo> info) {
951+ for (const auto & group : info->RingGroups ) {
952+ for (const auto & ring : group.Rings ) {
953+ for (const auto & replica : ring.Replicas ) {
954+ RequestGenericNode (replica.NodeId ());
955+ }
956+ }
957+ }
958+ }
959+
960+ void Handle (TEvStateStorage::TEvListStateStorageResult::TPtr& ev) {
961+ if (StateStorageInfo->Set (std::move (ev))) {
962+ RequestNodes (StateStorageInfo->Get ()->Info );
963+ RequestDone (" TEvListStateStorageResult" );
964+ }
965+ }
966+
967+ void Handle (TEvStateStorage::TEvListSchemeBoardResult::TPtr& ev) {
968+ if (SchemeBoardInfo->Set (std::move (ev))) {
969+ RequestNodes (SchemeBoardInfo->Get ()->Info );
970+ RequestDone (" TEvListSсhemeBoardResult" );
971+ }
972+ }
973+
974+ void Handle (TEvStateStorage::TEvListBoardResult::TPtr& ev) {
975+ if (BoardInfo->Set (std::move (ev))) {
976+ RequestNodes (BoardInfo->Get ()->Info );
977+ RequestDone (" TEvListBoardResult" );
978+ }
979+ }
980+
924981 STATEFN (StateWait) {
925982 switch (ev->GetTypeRewrite ()) {
926983 hFunc (TEvents::TEvUndelivered, Handle);
@@ -946,6 +1003,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
9461003 hFunc (TEvStateStorage::TEvBoardInfo, Handle);
9471004 hFunc (TEvents::TEvWakeup, HandleTimeout);
9481005 hFunc (TEvNodeWardenStorageConfig, Handle);
1006+ hFunc (TEvStateStorage::TEvListStateStorageResult, Handle);
1007+ hFunc (TEvStateStorage::TEvListSchemeBoardResult, Handle);
1008+ hFunc (TEvStateStorage::TEvListBoardResult, Handle);
9491009 }
9501010 }
9511011
@@ -2999,6 +3059,18 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
29993059 message = std::regex_replace (message.c_str (), std::regex (" ^PDisk " ), " PDisks " );
30003060 break ;
30013061 }
3062+ case ETags::StateStorageRing: {
3063+ message = std::regex_replace (message.c_str (), std::regex (" ^Ring has " ), " Rings have " );
3064+ message = std::regex_replace (message.c_str (), std::regex (" ^Ring is " ), " Rings are " );
3065+ message = std::regex_replace (message.c_str (), std::regex (" ^Ring " ), " Rings " );
3066+ break ;
3067+ }
3068+ case ETags::StateStorageNode: {
3069+ message = std::regex_replace (message.c_str (), std::regex (" ^Ring has " ), " Rings have " );
3070+ message = std::regex_replace (message.c_str (), std::regex (" ^Ring is " ), " Rings are " );
3071+ message = std::regex_replace (message.c_str (), std::regex (" ^Ring " ), " Rings " );
3072+ break ;
3073+ }
30023074 default :
30033075 break ;
30043076 }
@@ -3054,6 +3126,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
30543126 isSimilar = it->IssueLog .location ().storage ().pool ().group ().pile ().name ()
30553127 == similar.begin ()->IssueLog .location ().storage ().pool ().group ().pile ().name ();
30563128 }
3129+ if (isSimilar && similar.begin ()->IssueLog .location ().compute ().state_storage ().has_pile ()) {
3130+ isSimilar = it->IssueLog .location ().compute ().state_storage ().pile ().name ()
3131+ == similar.begin ()->IssueLog .location ().compute ().state_storage ().pile ().name ();
3132+ }
30573133 if (isSimilar) {
30583134 auto move = it++;
30593135 similar.splice (similar.end (), records, move);
@@ -3295,6 +3371,8 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
32953371 MergeLevelRecords (mergeContext, ETags::VDiskState, ETags::BridgeGroupState);
32963372 MergeLevelRecords (mergeContext, ETags::VDiskState, ETags::GroupState);
32973373 MergeLevelRecords (mergeContext, ETags::PDiskState, ETags::VDiskState);
3374+ MergeLevelRecords (mergeContext, ETags::StateStorageRing);
3375+ MergeLevelRecords (mergeContext, ETags::StateStorageNode, ETags::StateStorageRing);
32983376 }
32993377 mergeContext.FillRecords (records);
33003378 }
@@ -3463,13 +3541,72 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
34633541 }
34643542 }
34653543
3544+ void FillStateStorage (TOverallStateContext& context, TString type, TIntrusiveConstPtr<TStateStorageInfo> info) {
3545+ TSelfCheckResult ssContext;
3546+ ssContext.Type = type;
3547+ for (const auto & ringGroup : info->RingGroups ) {
3548+ if (ringGroup.State != ERingGroupState::PRIMARY && ringGroup.State != ERingGroupState::SYNCHRONIZED) {
3549+ continue ;
3550+ }
3551+ TSelfCheckResult* currentContext = &ssContext;
3552+ TSelfCheckContext pileContext (&ssContext, TStringBuilder () << " PILE_" << type);
3553+ if ((bool )ringGroup.BridgePileId && NodeWardenStorageConfig && NodeWardenStorageConfig->IsOk ()) {
3554+ const auto & pileName = NodeWardenStorageConfig->Get ()->BridgeInfo ->GetPile (ringGroup.BridgePileId )->Name ;
3555+ pileContext.Location .mutable_compute ()->mutable_state_storage ()->mutable_pile ()->set_name (pileName);
3556+ currentContext = &pileContext;
3557+ }
3558+ ui32 disabledRings = 0 ;
3559+ ui32 badRings = 0 ;
3560+ for (size_t ringIdx = 0 ; ringIdx < ringGroup.Rings .size (); ++ringIdx) {
3561+ const auto & ring = ringGroup.Rings [ringIdx];
3562+ TSelfCheckContext ringContext (currentContext, TStringBuilder () << type << " _RING" );
3563+ ringContext.Location .mutable_compute ()->mutable_state_storage ()->set_ring (ringIdx + 1 );
3564+ if (ring.IsDisabled ) {
3565+ ++disabledRings;
3566+ continue ;
3567+ }
3568+ for (const auto & replica : ring.Replicas ) {
3569+ const auto node = replica.NodeId ();
3570+ if (!NodeSystemState[node].IsOk ()) {
3571+ TSelfCheckContext nodeContext (&ringContext, TStringBuilder () << type << " _NODE" );
3572+ nodeContext.Location .mutable_compute ()->mutable_state_storage ()->mutable_node ()->set_id (node);
3573+ nodeContext.ReportStatus (Ydb::Monitoring::StatusFlag::RED, " Node is not available" , ETags::StateStorageNode);
3574+ }
3575+ }
3576+ ringContext.ReportWithMaxChildStatus (" Ring has unavailable nodes" , ETags::StateStorageRing, {ETags::StateStorageNode});
3577+ if (ringContext.GetOverallStatus () == Ydb::Monitoring::StatusFlag::RED) {
3578+ ++badRings;
3579+ }
3580+ }
3581+ if (disabledRings + badRings > (ringGroup.NToSelect - 1 ) / 2 ) {
3582+ currentContext->ReportStatus (Ydb::Monitoring::StatusFlag::RED, " There is not enough functional rings" , ETags::StateStorage);
3583+ } else if (badRings > 1 ) {
3584+ currentContext->ReportStatus (Ydb::Monitoring::StatusFlag::YELLOW, " Multiple rings have unavailable replicas" , ETags::StateStorage);
3585+ } else if (badRings > 0 ) {
3586+ currentContext->ReportStatus (Ydb::Monitoring::StatusFlag::BLUE, " One ring has unavailable replicas" , ETags::StateStorage);
3587+ }
3588+ }
3589+ MergeRecords (ssContext.IssueRecords );
3590+ context.UpdateMaxStatus (ssContext.GetOverallStatus ());
3591+ context.AddIssues (ssContext.IssueRecords );
3592+ }
3593+
34663594 void FillResult (TOverallStateContext context) {
34673595 if (IsSpecificDatabaseFilter ()) {
34683596 FillDatabaseResult (context, FilterDatabase, DatabaseState[FilterDatabase]);
34693597 } else {
34703598 for (auto & [path, state] : DatabaseState) {
34713599 FillDatabaseResult (context, path, state);
34723600 }
3601+ if (StateStorageInfo && StateStorageInfo->IsOk ()) {
3602+ FillStateStorage (context, " STATE_STORAGE" , StateStorageInfo->Get ()->Info );
3603+ }
3604+ if (SchemeBoardInfo && SchemeBoardInfo->IsOk ()) {
3605+ FillStateStorage (context, " SCHEME_BOARD" , SchemeBoardInfo->Get ()->Info );
3606+ }
3607+ if (BoardInfo && BoardInfo->IsOk ()) {
3608+ FillStateStorage (context, " BOARD" , BoardInfo->Get ()->Info );
3609+ }
34733610 }
34743611 if (DatabaseState.empty ()) {
34753612 Ydb::Monitoring::DatabaseStatus& databaseStatus (*context.Result ->add_database_status ());
0 commit comments