@@ -1294,8 +1294,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
12941294}
12951295
12961296template  <typename  CacheTrait>
1297- void  CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1297+ bool  CacheAllocator<CacheTrait>::moveRegularItemWithSync(
12981298    Item& oldItem, WriteHandle& newItemHdl) {
1299+   // on function exit - the new item handle is no longer moving
1300+   // and other threads may access it - but in case where
1301+   // we failed to replace in access container we can give the
1302+   // new item back to the allocator
1303+   auto  guard = folly::makeGuard ([&]() {
1304+     auto  ref = newItemHdl->unmarkMoving ();
1305+     if  (UNLIKELY (ref == 0 )) {
1306+       const  auto  res =
1307+           releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1308+       XDCHECK (res == ReleaseRes::kReleased );
1309+     }
1310+   });
1311+ 
12991312  XDCHECK (oldItem.isMoving ());
13001313  XDCHECK (!oldItem.isExpired ());
13011314  //  TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1326,6 +1339,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13261339
13271340  auto  replaced = accessContainer_->replaceIf (oldItem, *newItemHdl,
13281341                                   predicate);
1342+   //  another thread may have called insertOrReplace which could have
1343+   //  marked this item as unaccessible causing the replaceIf
1344+   //  in the access container to fail - in this case we want
1345+   //  to abort the move since the item is no longer valid
1346+   if  (!replaced) {
1347+       return  false ;
1348+   }
1349+   //  what if another thread calls insertOrReplace now when
1350+   //  the item is moving and already replaced in the hash table?
1351+   //  1. it succeeds in updating the hash table - so there is
1352+   //     no guarentee that isAccessible() is true
1353+   //  2. it will then try to remove from MM container
1354+   //      - this operation will wait for newItemHdl to
1355+   //        be unmarkedMoving via the waitContext
1356+   //  3. replaced handle is returned and eventually drops
1357+   //     ref to 0 and the item is recycled back to allocator.
13291358
13301359  if  (config_.moveCb ) {
13311360    //  Execute the move callback. We cannot make any guarantees about the
@@ -1367,14 +1396,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13671396    XDCHECK (newItemHdl->hasChainedItem ());
13681397  }
13691398  newItemHdl.unmarkNascent ();
1370-   auto  ref = newItemHdl->unmarkMoving ();
1371-   // remove because there is a chance the new item was not
1372-   // added to the access container
1373-   if  (UNLIKELY (ref == 0 )) {
1374-     const  auto  res =
1375-         releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1376-     XDCHECK (res == ReleaseRes::kReleased );
1377-   }
1399+   return  true ;
13781400}
13791401
13801402template  <typename  CacheTrait>
@@ -1529,7 +1551,6 @@ template <typename CacheTrait>
15291551void  CacheAllocator<CacheTrait>::unlinkItemForEviction(Item& it) {
15301552  XDCHECK (it.isMarkedForEviction ());
15311553  XDCHECK (it.getRefCount () == 0 );
1532- 
15331554  accessContainer_->remove (it);
15341555  removeFromMMContainer (it);
15351556
@@ -1624,28 +1645,43 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
16241645    auto  evictedToNext = lastTier ? nullptr 
16251646        : tryEvictToNextMemoryTier (*candidate, false );
16261647    if  (!evictedToNext) {
1627-       if  (!token.isValid ()) {
1648+       // if insertOrReplace was called during move
1649+       // then candidate will not be accessible (failed replace during tryEvict)
1650+       //  - therefore this was why we failed to
1651+       //    evict to the next tier and insertOrReplace
1652+       //    will remove from NVM cache
1653+       // however, if candidate is accessible
1654+       // that means the allocation in the next
1655+       // tier failed - so we will continue to
1656+       // evict the item to NVM cache
1657+       bool  failedToReplace = !candidate->isAccessible ();
1658+       if  (!token.isValid () && !failedToReplace) {
16281659        token = createPutToken (*candidate);
16291660      }
1630-       //  tryEvictToNextMemoryTier should only fail if allocation of the new item fails
1631-       //  in that case, it should be still possible to mark item as exclusive.
1661+       //  tryEvictToNextMemoryTier can fail if:
1662+       //     a) allocation of the new item fails in that case,
1663+       //        it should be still possible to mark item for eviction.
1664+       //     b) another thread calls insertOrReplace and the item
1665+       //        is no longer accessible
16321666      // 
16331667      //  in case that we are on the last tier, we whould have already marked
16341668      //  as exclusive since we will not be moving the item to the next tier
16351669      //  but rather just evicting all together, no need to
1636-       //  markExclusiveWhenMoving 
1670+       //  markForEvictionWhenMoving 
16371671      auto  ret = lastTier ? true  : candidate->markForEvictionWhenMoving ();
16381672      XDCHECK (ret);
16391673
16401674      unlinkItemForEviction (*candidate);
1675+       
1676+       if  (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)
1677+               && !failedToReplace) {
1678+         nvmCache_->put (*candidate, std::move (token));
1679+       }
16411680      //  wake up any readers that wait for the move to complete
16421681      //  it's safe to do now, as we have the item marked exclusive and
16431682      //  no other reader can be added to the waiters list
16441683      wakeUpWaiters (*candidate, {});
16451684
1646-       if  (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)) {
1647-         nvmCache_->put (*candidate, std::move (token));
1648-       }
16491685    } else  {
16501686      XDCHECK (!evictedToNext->isMarkedForEviction () && !evictedToNext->isMoving ());
16511687      XDCHECK (!candidate->isMarkedForEviction () && !candidate->isMoving ());
@@ -1756,7 +1792,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
17561792
17571793    if  (newItemHdl) {
17581794      XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1759-       moveRegularItemWithSync (item, newItemHdl);
1795+       if  (!moveRegularItemWithSync (item, newItemHdl)) {
1796+           return  WriteHandle{};
1797+       }
1798+       XDCHECK_EQ (newItemHdl->getKey (),item.getKey ());
17601799      item.unmarkMoving ();
17611800      return  newItemHdl;
17621801    } else  {
@@ -1795,7 +1834,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
17951834
17961835    if  (newItemHdl) {
17971836      XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1798-       moveRegularItemWithSync (item, newItemHdl);
1837+       if  (!moveRegularItemWithSync (item, newItemHdl)) {
1838+           return  WriteHandle{};
1839+       }
17991840      item.unmarkMoving ();
18001841      return  newItemHdl;
18011842    } else  {
@@ -3148,9 +3189,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
31483189      //  TODO: add support for chained items
31493190      return  false ;
31503191    } else  {
3151-       moveRegularItemWithSync (oldItem, newItemHdl);
3152-       removeFromMMContainer (oldItem);
3153-       return  true ;
3192+       // move can fail if another thread calls insertOrReplace
3193+       // in this case oldItem is no longer valid (not accessible, 
3194+       // it gets removed from MMContainer and evictForSlabRelease
3195+       // will send it back to the allocator
3196+       bool  ret = moveRegularItemWithSync (oldItem, newItemHdl);
3197+       if  (!ret) {
3198+           // we failed to move - newItemHdl was released back to allocator
3199+           // by the moveRegularItemWithSync but oldItem is not accessible
3200+           // and no longer valid - we need to clean it up here
3201+           XDCHECK (!oldItem.isAccessible ());
3202+           oldItem.markForEvictionWhenMoving ();
3203+           unlinkItemForEviction (oldItem);
3204+           wakeUpWaiters (oldItem, {});
3205+       } else  {
3206+         removeFromMMContainer (oldItem);
3207+       }
3208+       return  ret;
31543209    }
31553210  }
31563211}
0 commit comments