Skip to content

Commit

Permalink
issue-2737: fix NodeIndexCache population race (#2741)
Browse files Browse the repository at this point in the history
* issue-2737: fix NodeIndexCache population race

* fix review issues

* add same stuff for readahead

* better logic for createhandle

---------

Co-authored-by: Maxim Deb Natkh <[email protected]>
  • Loading branch information
debnatkh and Maxim Deb Natkh authored Dec 20, 2024
1 parent 39c5f12 commit e3f97f5
Show file tree
Hide file tree
Showing 14 changed files with 330 additions and 3 deletions.
3 changes: 2 additions & 1 deletion cloud/filestore/libs/storage/tablet/model/node_index_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class TNodeIndexCache
THashMap<TNodeIndexCacheKey, NProto::TNodeAttr, TNodeIndexCacheKeyHash>
AttrByParentNodeId;

ui32 MaxNodes;
ui32 MaxNodes = 0;

public:
explicit TNodeIndexCache(IAllocator* allocator);
Expand All @@ -69,6 +69,7 @@ class TNodeIndexCache
void InvalidateCache(ui64 parentNodeId, const TString& name);

void InvalidateCache(ui64 nodeId);

void RegisterGetNodeAttrResult(
ui64 parentNodeId,
const TString& name,
Expand Down
4 changes: 4 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor_addblob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,10 @@ void TIndexTabletActor::CompleteTx_AddBlob(
const TActorContext& ctx,
TTxIndexTablet::TAddBlob& args)
{
for (auto [nodeId, _]: args.WriteRanges) {
InvalidateNodeCaches(nodeId);
}

// log request
FinalizeProfileLogRequestInfo(
std::move(args.ProfileLogRequest),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ void TIndexTabletActor::CompleteTx_AllocateData(
const TActorContext& ctx,
TTxIndexTablet::TAllocateData& args)
{
InvalidateNodeCaches(args.NodeId);

RemoveTransaction(*args.RequestInfo);

auto response = std::make_unique<TEvService::TEvAllocateDataResponse>(args.Error);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ void TIndexTabletActor::ExecuteTx_CreateHandle(
args.WriteCommitId,
parent,
args.ParentNode->Attrs);
args.UpdatedNodes.push_back(args.ParentNode->NodeId);

} else if (args.ShardId.empty()
&& HasFlag(args.Flags, NProto::TCreateHandleRequest::E_TRUNCATE))
Expand All @@ -361,6 +362,7 @@ void TIndexTabletActor::ExecuteTx_CreateHandle(
args.WriteCommitId,
attrs,
args.TargetNode->Attrs);
args.UpdatedNodes.push_back(args.TargetNodeId);
}

if (args.ShardId.empty()) {
Expand Down Expand Up @@ -417,6 +419,10 @@ void TIndexTabletActor::CompleteTx_CreateHandle(
const TActorContext& ctx,
TTxIndexTablet::TCreateHandle& args)
{
for (auto nodeId: args.UpdatedNodes) {
InvalidateNodeCaches(nodeId);
}

if (args.Error.GetCode() == E_ARGUMENT) {
// service actor sent something inappropriate, we'd better log it
LOG_ERROR(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,12 @@ void TIndexTabletActor::CompleteTx_CreateNode(
const TActorContext& ctx,
TTxIndexTablet::TCreateNode& args)
{
InvalidateNodeCaches(args.TargetNodeId);
InvalidateNodeCaches(args.ChildNodeId);
if (args.ParentNode) {
InvalidateNodeCaches(args.ParentNode->NodeId);
}

if (args.OpLogEntry.HasCreateNodeRequest() && !HasError(args.Error)) {
LOG_DEBUG(ctx, TFileStoreComponents::TABLET,
"%s Creating node in shard upon CreateNode: %s, %s",
Expand Down
15 changes: 15 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor_renamenode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,21 @@ void TIndexTabletActor::CompleteTx_RenameNode(
const TActorContext& ctx,
TTxIndexTablet::TRenameNode& args)
{
InvalidateNodeCaches(args.ParentNodeId);
InvalidateNodeCaches(args.NewParentNodeId);
if (args.ChildRef) {
InvalidateNodeCaches(args.ChildRef->ChildNodeId);
}
if (args.NewChildRef) {
InvalidateNodeCaches(args.NewChildRef->ChildNodeId);
}
if (args.ParentNode) {
InvalidateNodeCaches(args.ParentNode->NodeId);
}
if (args.NewParentNode) {
InvalidateNodeCaches(args.NewParentNode->NodeId);
}

if (!HasError(args.Error) && !args.ChildRef) {
auto message = ReportChildRefIsNull(TStringBuilder()
<< "RenameNode: " << args.Request.ShortDebugString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ void TIndexTabletActor::CompleteTx_SetNodeAttr(
const TActorContext& ctx,
TTxIndexTablet::TSetNodeAttr& args)
{
InvalidateNodeCaches(args.NodeId);

RemoveTransaction(*args.RequestInfo);

auto response = std::make_unique<TEvService::TEvSetNodeAttrResponse>(args.Error);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,13 @@ void TIndexTabletActor::CompleteTx_UnlinkNode(
args.SessionId.c_str(),
FormatError(args.Error).c_str());

if (args.ParentNodeId != InvalidNodeId) {
InvalidateNodeCaches(args.ParentNodeId);
}
if (args.ChildNode && args.ChildNode->NodeId != InvalidNodeId) {
InvalidateNodeCaches(args.ChildNode->NodeId);
}

if (!HasError(args.Error) && !args.ChildRef) {
auto message = ReportChildRefIsNull(TStringBuilder()
<< "UnlinkNode: " << args.Request.ShortDebugString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@ void TIndexTabletActor::CompleteTx_WriteData(
const TActorContext& ctx,
TTxIndexTablet::TWriteData& args)
{
InvalidateNodeCaches(args.NodeId);

RemoveTransaction(*args.RequestInfo);

auto reply = [&] (
Expand Down
11 changes: 10 additions & 1 deletion cloud/filestore/libs/storage/tablet/tablet_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -1290,11 +1290,20 @@ FILESTORE_DUPCACHE_REQUESTS(FILESTORE_DECLARE_DUPCACHE)
ui64 commitId,
const TByteRange& range);

public:

////////////////////////////////////////////////////////////////////////////
// Caching: ReadAhead, NodeIndexCache, InMemoryIndexState
////////////////////////////////////////////////////////////////////////////

// Upon any completion of the RW operation this function is supposed to be
// called in order to invalidate potentially cached data
void InvalidateNodeCaches(ui64 nodeId);

//
// ReadAhead.
//

public:
bool TryFillDescribeResult(
ui64 nodeId,
ui64 handle,
Expand Down
8 changes: 8 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_state_nodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,4 +640,12 @@ TInMemoryIndexStateStats TIndexTabletState::GetInMemoryIndexStateStats() const
return Impl->InMemoryIndexState.GetStats();
}

////////////////////////////////////////////////////////////////////////////////

void TIndexTabletState::InvalidateNodeCaches(ui64 nodeId)
{
InvalidateReadAheadCache(nodeId);
InvalidateNodeIndexCache(nodeId);
}

} // namespace NCloud::NFileStore::NStorage
2 changes: 2 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_tx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,7 @@ struct TTxIndexTablet
bool IsNewShardNode = false;
TMaybe<IIndexTabletDatabase::TNode> TargetNode;
TMaybe<IIndexTabletDatabase::TNode> ParentNode;
TVector<ui64> UpdatedNodes;

NProto::TOpLogEntry OpLogEntry;

Expand Down Expand Up @@ -1243,6 +1244,7 @@ struct TTxIndexTablet
OpLogEntry.Clear();

Response.Clear();
UpdatedNodes.clear();
}
};

Expand Down
98 changes: 98 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_ut_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4541,6 +4541,104 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_Data)
}
}

// See #2737 for more details
TABLET_TEST_16K(ReadAheadCacheShouldNotBeAffectedByConcurrentModifications)
{
NProto::TStorageConfig storageConfig;
storageConfig.SetReadAheadCacheRangeSize(1_MB);
storageConfig.SetReadAheadCacheMaxResultsPerNode(32);
TTestEnv env({}, storageConfig);
env.CreateSubDomain("nfs");
auto registry = env.GetRegistry();

ui32 nodeIdx = env.CreateNode("nfs");
ui64 tabletId = env.BootIndexTablet(nodeIdx);

TIndexTabletClient tablet(
env.GetRuntime(),
nodeIdx,
tabletId,
tabletConfig);
tablet.InitSession("client", "session");

auto id = CreateNode(tablet, TCreateNodeArgs::File(RootNodeId, "test"));

ui64 handle = CreateHandle(tablet, id);
for (ui32 i = 0; i < 2; ++i) {
tablet.WriteData(handle, i * 1_MB, 1_MB, '0');
}

for (ui32 i = 0; i < 7; ++i) {
const ui64 len = 128_KB;
const ui64 offset = i * len;
auto response = tablet.DescribeData(handle, offset, len);
}
// The next describe data will trigger ReadAhead cache population

auto& runtime = env.GetRuntime();
TAutoPtr<IEventHandle> rwTxPutRequest;

runtime.SetEventFilter(
[&](auto& runtime, auto& event)
{
Y_UNUSED(runtime);
switch (event->GetTypeRewrite()) {
case TEvBlobStorage::EvPut:
if (!rwTxPutRequest) {
rwTxPutRequest = std::move(event);
return true;
}
}
return false;
});

// Execute stage of RW tx will produce a TEvPut request, which is
// dropped to postpone the completion of the transaction
tablet.SendSetNodeAttrRequest(TSetNodeAttrArgs(RootNodeId).SetUid(2));
runtime.DispatchEvents(TDispatchOptions{
.CustomFinalCondition = [&]()
{
return rwTxPutRequest != nullptr;
}});

// Now the DescribeData operation is supposed to start a new transaction
// and hang because it accesses the same data as the previous one. This
// operation has a potential to populate the node attributes cache
tablet.SendDescribeDataRequest(handle, 1_MB - 128_KB, 128_KB);

tablet.AssertSetNodeAttrNoResponse();
tablet.AssertDescribeDataNoResponse();

// However, Prepare stages are already completed, and GetNodeAttr has
// stale data in its structure

runtime.SetEventFilter(TTestActorRuntimeBase::DefaultFilterFunc);

// Now let's start the new transaction that will clear the file by
// setting its size to 0
tablet.SendSetNodeAttrRequest(TSetNodeAttrArgs(id).SetSize(0));

runtime.DispatchEvents(TDispatchOptions(), TDuration::Seconds(2));

// Let us complete the initial transaction that will release the lock
// and let the DescribeData operation to proceed
runtime.Send(rwTxPutRequest.Release(), nodeIdx);

// Now the initial SetNodeAttr should complete
tablet.RecvSetNodeAttrResponse();
// The DescribeData operation should also complete
tablet.RecvDescribeDataResponse();
// The SetNodeAttr operation should also complete
tablet.RecvSetNodeAttrResponse();

// The ReadAhead cache should not give faulty results. If the cache is
// not invalidated, the DescribeData operation will return stale data
auto response = tablet.DescribeData(handle, 1_MB, 128_KB);

UNIT_ASSERT_VALUES_EQUAL(0, response->Record.GetFileSize());
UNIT_ASSERT_VALUES_EQUAL(0, response->Record.FreshDataRangesSize());
}

void DoTestWriteRequestCancellationOnTabletReboot(
bool writeBatchEnabled,
const TFileSystemConfig& tabletConfig)
Expand Down
Loading

0 comments on commit e3f97f5

Please sign in to comment.