diff --git a/src/Access/AeolusAccessUtil.h b/src/Access/AeolusAccessUtil.h
index 856baf98aff..c6ffb0144ef 100644
--- a/src/Access/AeolusAccessUtil.h
+++ b/src/Access/AeolusAccessUtil.h
@@ -7,7 +7,7 @@
 namespace DB
 {
 
-bool aeolusCheck(const Context & context, const String & full_table_name)
+inline bool aeolusCheck(const Context & context, const String & full_table_name)
 {
     String access_table_names = context.getSettingsRef().access_table_names;
 
diff --git a/src/Analyzers/SubstituteSelectItemToAnyFunction.cpp b/src/Analyzers/SubstituteSelectItemToAnyFunction.cpp
index 4ff06e59cd5..0bcdc6714e7 100644
--- a/src/Analyzers/SubstituteSelectItemToAnyFunction.cpp
+++ b/src/Analyzers/SubstituteSelectItemToAnyFunction.cpp
@@ -149,7 +149,9 @@ void SubstituteSelectItemToAnyFunction::visit(ASTSelectQuery * select_query)
     // process having and order by
     if (!processed_identifier_qualified_names.empty())
     {
-        SubstituteIdentifierToAnyFunction::Data expression_data{{}, processed_identifier_qualified_names, {}, context, false, false};
+        QualifiedNames empty_qualified_names;
+        NameSet empty_aliases;
+        SubstituteIdentifierToAnyFunction::Data expression_data{empty_qualified_names, processed_identifier_qualified_names, empty_aliases, context, false, false};
         SubstituteIdentifierToAnyFunction expression_visitor(expression_data);
         if (select_query->having())
             expression_visitor.visit(select_query->refHaving());
diff --git a/src/Catalog/Catalog.cpp b/src/Catalog/Catalog.cpp
index aca8e07d0fc..f517f9dd5e0 100644
--- a/src/Catalog/Catalog.cpp
+++ b/src/Catalog/Catalog.cpp
@@ -976,9 +976,13 @@ namespace Catalog
             }
 
             StoragePtr storage;
-            if (auto query_context = CurrentThread::getGroup()->query_context.lock())
-                storage = tryGetTableByUUID(*query_context, UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS());
-            else
+            auto thread_group = CurrentThread::getGroup();
+            if (thread_group)
+            {
+                if (auto query_context = thread_group->query_context.lock())
+                    storage = tryGetTableByUUID(*query_context, UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS());
+            }
+            if (!storage)
                 storage = tryGetTableByUUID(context, UUIDHelpers::UUIDToString(uuid), TxnTimestamp::maxTS());
 
             if (auto pcm = context.getPartCacheManager(); pcm && storage)
@@ -4262,7 +4266,7 @@ namespace Catalog
                             return;
 
                         start_key.clear();
-                        auto it = meta_proxy->getAllTransactionRecord(name_space, start_key, max_result_number);
+                        it = meta_proxy->getAllTransactionRecord(name_space, start_key, max_result_number);
                         if (!it->next())
                             return;
                     }
diff --git a/src/CloudServices/CnchMergeMutateThread.cpp b/src/CloudServices/CnchMergeMutateThread.cpp
index 87360a13d72..9bcb39ebc80 100644
--- a/src/CloudServices/CnchMergeMutateThread.cpp
+++ b/src/CloudServices/CnchMergeMutateThread.cpp
@@ -221,16 +221,22 @@ FutureManipulationTask::~FutureManipulationTask()
 /// Add source parts (include invisible parts) to merging_mutating_parts.
 FutureManipulationTask & FutureManipulationTask::tagSourceParts(ServerDataPartsVector && parts_)
 {
-    auto check_and_add = [&](const auto & part_name) {
-        if (parent.currently_merging_mutating_parts.count(part_name))
-            throw Exception("Part '" + part_name + "' was already in other Task, cancel merge.", ErrorCodes::ABORTED);
-        parent.currently_merging_mutating_parts.emplace(part_name);
-    };
-
     if (!record->try_execute)
     {
         std::lock_guard lock(parent.currently_merging_mutating_parts_mutex);
 
+        std::vector<String> added;
+        auto check_and_add = [&](const auto & part_name) {
+            if (parent.currently_merging_mutating_parts.count(part_name))
+            {
+                for (const auto & n : added)
+                    parent.currently_merging_mutating_parts.erase(n);
+                throw Exception("Part '" + part_name + "' was already in other Task, cancel merge.", ErrorCodes::ABORTED);
+            }
+            parent.currently_merging_mutating_parts.emplace(part_name);
+            added.push_back(part_name);
+        };
+
         for (const auto & p : parts_)
         {
             check_and_add(p->name());
diff --git a/src/CloudServices/CnchServerClient.cpp b/src/CloudServices/CnchServerClient.cpp
index f5a5d4be66d..b21ff86e3c7 100644
--- a/src/CloudServices/CnchServerClient.cpp
+++ b/src/CloudServices/CnchServerClient.cpp
@@ -1034,7 +1034,10 @@ brpc::CallId CnchServerClient::submitPreloadTask(const MergeTreeMetaBase & stora
     auto * cntl = new brpc::Controller();
     auto call_id = cntl->call_id();
     if (parts.empty())
+    {
+        delete cntl;
         return call_id;
+    }
 
     Protos::SubmitPreloadTaskReq request;
     request.set_ts(time(nullptr));
diff --git a/src/CloudServices/CnchServerServiceImpl.cpp b/src/CloudServices/CnchServerServiceImpl.cpp
index 01dcec4391f..02c816e4e12 100644
--- a/src/CloudServices/CnchServerServiceImpl.cpp
+++ b/src/CloudServices/CnchServerServiceImpl.cpp
@@ -751,49 +751,17 @@ void CnchServerServiceImpl::fetchPartitions(
             ASTPtr query_ptr = deserializeAST(rb);
             /// We should to add `database` into AST before calling `buildSelectQueryInfoForQuery`.
             {
-                StoragePtr storage = gc->getCnchCatalog()->getTable(*gc, request->database(), request->table(), TxnTimestamp::maxTS());
-
-                auto calculated_host
-                    = gc->getCnchTopologyMaster()
-                          ->getTargetServer(UUIDHelpers::UUIDToString(storage->getStorageUUID()), storage->getServerVwName(), true)
-                          .getRPCAddress();
-
-                if (request->remote_host() != calculated_host)
-                    throw Exception(
-                        "Fetch partitions failed because of inconsistent view of topology in remote server, remote_host: "
-                            + request->remote_host() + ", calculated_host: " + calculated_host,
-                        ErrorCodes::LOGICAL_ERROR);
-
-                Names column_names;
-                for (const auto & name : request->column_name_filter())
-                    column_names.push_back(name);
-                auto session_context = Context::createCopy(gc);
-                session_context->setCurrentDatabase(request->database());
-                ReadBufferFromString rb(request->predicate());
-                ASTPtr query_ptr = deserializeAST(rb);
-                /// We should to add `database` into AST before calling `buildSelectQueryInfoForQuery`.
-                {
-                    ASTSelectQuery * select_query = query_ptr->as<ASTSelectQuery>();
-                    if (!select_query)
-                        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST type found in buildSelectQueryInfoForQuery");
-                    select_query->replaceDatabaseAndTable(request->database(), request->table());
-                }
-                SelectQueryInfo query_info = buildSelectQueryInfoForQuery(query_ptr, session_context);
-
-                session_context->setTemporaryTransaction(
-                    TxnTimestamp(request->has_txnid() ? request->txnid() : session_context->getTimestamp()), 0, false);
-                auto required_partitions = gc->getCnchCatalog()->getPartitionsByPredicate(
-                    session_context, storage, query_info, column_names, request->has_ignore_ttl() && request->ignore_ttl());
-
-                response->set_total_size(required_partitions.total_partition_number);
-                auto & mutable_partitions = *response->mutable_partitions();
-                for (auto & partition : required_partitions.partitions)
-                    *mutable_partitions.Add() = std::move(partition);
+                ASTSelectQuery * select_query = query_ptr->as<ASTSelectQuery>();
+                if (!select_query)
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST type found in buildSelectQueryInfoForQuery");
+                select_query->replaceDatabaseAndTable(request->database(), request->table());
             }
             SelectQueryInfo query_info = buildSelectQueryInfoForQuery(query_ptr, session_context);
 
-            session_context->setTemporaryTransaction(TxnTimestamp(request->has_txnid() ? request->txnid() : session_context->getTimestamp()), 0, false);
-            auto required_partitions = gc->getCnchCatalog()->getPartitionsByPredicate(session_context, storage, query_info, column_names, request->has_ignore_ttl() && request->ignore_ttl());
+            session_context->setTemporaryTransaction(
+                TxnTimestamp(request->has_txnid() ? request->txnid() : session_context->getTimestamp()), 0, false);
+            auto required_partitions = gc->getCnchCatalog()->getPartitionsByPredicate(
+                session_context, storage, query_info, column_names, request->has_ignore_ttl() && request->ignore_ttl());
 
             response->set_total_size(required_partitions.total_partition_number);
             auto & mutable_partitions = *response->mutable_partitions();
diff --git a/src/CloudServices/CnchWorkerClient.cpp b/src/CloudServices/CnchWorkerClient.cpp
index 67796bf8cad..7d911b7b54e 100644
--- a/src/CloudServices/CnchWorkerClient.cpp
+++ b/src/CloudServices/CnchWorkerClient.cpp
@@ -175,6 +175,60 @@ std::vector<ManipulationInfo> CnchWorkerClient::getManipulationTasksStatus()
     return res;
 }
 
+std::vector<Protos::TTLCacheTableStats> CnchWorkerClient::getTTLCacheStats()
+{
+    brpc::Controller cntl;
+    Protos::GetTTLCacheStatsReq request;
+    Protos::GetTTLCacheStatsResp response;
+
+    stub->getTTLCacheStats(&cntl, &request, &response, nullptr);
+
+    assertController(cntl);
+    RPCHelpers::checkResponse(response);
+
+    std::vector<Protos::TTLCacheTableStats> res;
+    res.reserve(response.tables_size());
+    for (const auto & t : response.tables())
+        res.push_back(t);
+    return res;
+}
+
+std::vector<Protos::TTLCachePartitionStats> CnchWorkerClient::getTTLCachePartitionStats()
+{
+    brpc::Controller cntl;
+    Protos::GetTTLCachePartitionStatsReq request;
+    Protos::GetTTLCachePartitionStatsResp response;
+
+    stub->getTTLCachePartitionStats(&cntl, &request, &response, nullptr);
+
+    assertController(cntl);
+    RPCHelpers::checkResponse(response);
+
+    std::vector<Protos::TTLCachePartitionStats> res;
+    res.reserve(response.partitions_size());
+    for (const auto & p : response.partitions())
+        res.push_back(p);
+    return res;
+}
+
+std::vector<Protos::PreloadPartitionStats> CnchWorkerClient::getPreloadStats()
+{
+    brpc::Controller cntl;
+    Protos::GetPreloadStatsReq request;
+    Protos::GetPreloadStatsResp response;
+
+    stub->getPreloadStats(&cntl, &request, &response, nullptr);
+
+    assertController(cntl);
+    RPCHelpers::checkResponse(response);
+
+    std::vector<Protos::PreloadPartitionStats> res;
+    res.reserve(response.partitions_size());
+    for (const auto & p : response.partitions())
+        res.push_back(p);
+    return res;
+}
+
 void CnchWorkerClient::submitMvRefreshTask(
     const StorageMaterializedView & , const ManipulationTaskParams & params, TxnTimestamp txn_id)
 {
diff --git a/src/CloudServices/CnchWorkerClient.h b/src/CloudServices/CnchWorkerClient.h
index 8aa8e4a6f6c..bf19a0dc82d 100644
--- a/src/CloudServices/CnchWorkerClient.h
+++ b/src/CloudServices/CnchWorkerClient.h
@@ -43,6 +43,9 @@ namespace DB
 namespace Protos
 {
     class CnchWorkerService_Stub;
+    class TTLCacheTableStats;
+    class TTLCachePartitionStats;
+    class PreloadPartitionStats;
 }
 
 namespace IngestColumnCnch
@@ -81,6 +84,9 @@ class CnchWorkerClient : public RpcClientBase
     void shutdownManipulationTasks(const UUID & table_uuid, const Strings & task_ids = Strings{});
     std::unordered_set<String> touchManipulationTasks(const UUID & table_uuid, const Strings & tasks_id);
     std::vector<ManipulationInfo> getManipulationTasksStatus();
+    std::vector<Protos::TTLCacheTableStats> getTTLCacheStats();
+    std::vector<Protos::TTLCachePartitionStats> getTTLCachePartitionStats();
+    std::vector<Protos::PreloadPartitionStats> getPreloadStats();
 
     void submitMvRefreshTask(
         const StorageMaterializedView & storage, const ManipulationTaskParams & params, TxnTimestamp txn_id);
diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp
index 53cb069af0c..4e05d4750bc 100644
--- a/src/CloudServices/CnchWorkerServiceImpl.cpp
+++ b/src/CloudServices/CnchWorkerServiceImpl.cpp
@@ -34,6 +34,9 @@
 #include <Protos/DataModelHelpers.h>
 #include <Protos/RPCHelpers.h>
 #include <Storages/DiskCache/IDiskCache.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+#include <Storages/DiskCache/PreloadRegistry.h>
 #include <Storages/MergeTree/CnchMergeTreeMutationEntry.h>
 #include <Storages/MergeTree/IMergeTreeDataPart_fwd.h>
 #include <Storages/MergeTree/MergeTreeDataPartCNCH.h>
@@ -605,10 +608,23 @@ void CnchWorkerServiceImpl::preloadDataParts(
         }
         else
         {
+            // Group parts by partition and register with PreloadRegistry before scheduling
+            // so in-flight counts are visible immediately.
+            auto & registry = PreloadRegistry::instance();
+            String table_name = cloud_merge_tree.getStorageID().getFullNameNotQuoted();
+            String table_uuid_str = toString(cloud_merge_tree.getStorageUUID());
+            std::unordered_map<String, size_t> partition_counts;
+            for (const auto & part : data_parts)
+                partition_counts[part->info.partition_id]++;
+            for (const auto & [pid, cnt] : partition_counts)
+                registry.registerParts(table_name, table_uuid_str, pid, cnt, preload_level);
+
             ThreadPool * preload_thread_pool = &(IDiskCache::getPreloadPool());
             for (const auto & part : data_parts)
             {
-                preload_thread_pool->scheduleOrThrowOnError([part, preload_level, submit_ts, read_injection, storage] {
+                String pid = part->info.partition_id;
+                preload_thread_pool->scheduleOrThrowOnError([part, preload_level, submit_ts, read_injection, storage, table_uuid_str, pid, &registry] {
+                    SCOPE_EXIT({ registry.partFinished(table_uuid_str, pid); });
                     part->remote_fs_read_failed_injection = read_injection;
                     part->disk_cache_mode = DiskCacheMode::SKIP_DISK_CACHE;// avoid getCheckum & getIndex re-cache
                     part->preload(preload_level, submit_ts);
@@ -1317,6 +1333,104 @@ void CnchWorkerServiceImpl::getCloudMergeTreeStatus(
 {
 }
 
+void CnchWorkerServiceImpl::getTTLCacheStats(
+    google::protobuf::RpcController *,
+    const Protos::GetTTLCacheStatsReq *,
+    Protos::GetTTLCacheStatsResp * response,
+    google::protobuf::Closure * done)
+{
+    SUBMIT_THREADPOOL({
+        auto ttl_caches = DiskCacheFactory::instance().getAllTableTTLCaches();
+        LOG_INFO(log, "getTTLCacheStats: {} TTL cache(s) in registry", ttl_caches.size());
+        for (const auto & [uuid, cache_ptr] : ttl_caches)
+        {
+            auto * ttl_cache = dynamic_cast<DiskCacheTTL *>(cache_ptr.get());
+            if (!ttl_cache)
+                continue;
+
+            auto stats = ttl_cache->getStats();
+            LOG_INFO(log, "getTTLCacheStats: returning stats for table={} uuid={}", ttl_cache->getName(), stats.table_uuid);
+            auto * t = response->add_tables();
+            t->set_table_name(ttl_cache->getName());
+            t->set_table_uuid(stats.table_uuid);
+            t->set_ttl_minutes(ttl_cache->getTTLMinutes());
+            t->set_max_size_bytes(ttl_cache->getMaxSizeBytes());
+            t->set_last_eviction_run(stats.last_eviction_run);
+            t->set_evicted_expired(stats.evicted_expired);
+            t->set_evicted_size_limit(stats.evicted_size_limit);
+            t->set_async_triggered_evicted(stats.async_eviction_triggered);
+            t->set_async_skipped_rate_limit_evicted(stats.async_eviction_skipped_rate_limit);
+            t->set_rejected_non_time_partition(stats.rejected_non_time_partition);
+            t->set_rejected_too_old(stats.rejected_too_old);
+            t->set_count_preload(stats.cached_from_preload);
+            t->set_count_query(stats.cached_from_query);
+            t->set_bytes_preload(stats.cached_bytes_preload);
+            t->set_bytes_query(stats.cached_bytes_query);
+            t->set_count_restored(stats.cached_from_restored);
+            t->set_bytes_restored(stats.cached_bytes_restored);
+            t->set_idx_count_preload(stats.cached_idx_from_preload);
+            t->set_idx_bytes_preload(stats.cached_idx_bytes_preload);
+            t->set_idx_count_query(stats.cached_idx_from_query);
+            t->set_idx_bytes_query(stats.cached_idx_bytes_query);
+            t->set_data_hits(stats.data_hits);
+            t->set_data_misses(stats.data_misses);
+            t->set_idx_hits(stats.idx_hits);
+            t->set_idx_misses(stats.idx_misses);
+        }
+    })
+}
+
+void CnchWorkerServiceImpl::getTTLCachePartitionStats(
+    google::protobuf::RpcController *,
+    const Protos::GetTTLCachePartitionStatsReq *,
+    Protos::GetTTLCachePartitionStatsResp * response,
+    google::protobuf::Closure * done)
+{
+    SUBMIT_THREADPOOL({
+        auto ttl_caches = DiskCacheFactory::instance().getAllTableTTLCaches();
+        LOG_DEBUG(log, "getTTLCachePartitionStats: {} TTL cache(s) in registry", ttl_caches.size());
+        for (const auto & [uuid, cache_ptr] : ttl_caches)
+        {
+            auto * ttl_cache = dynamic_cast<DiskCacheTTL *>(cache_ptr.get());
+            if (!ttl_cache)
+                continue;
+
+            auto table_stats = ttl_cache->getStats();
+            LOG_DEBUG(log, "getTTLCachePartitionStats: returning partition stats for table={} uuid={}", ttl_cache->getName(), table_stats.table_uuid);
+            for (const auto & ps : ttl_cache->getPartitionStats())
+            {
+                auto * p = response->add_partitions();
+                p->set_table_name(ttl_cache->getName());
+                p->set_table_uuid(table_stats.table_uuid);
+                p->set_partition(ps.partition_id);
+                p->set_entry_count(ps.entry_count);
+                p->set_bytes(ps.total_bytes);
+            }
+        }
+    })
+}
+
+void CnchWorkerServiceImpl::getPreloadStats(
+    google::protobuf::RpcController *,
+    const Protos::GetPreloadStatsReq *,
+    Protos::GetPreloadStatsResp * response,
+    google::protobuf::Closure * done)
+{
+    SUBMIT_THREADPOOL({
+        for (const auto & snap : PreloadRegistry::instance().getSnapshot())
+        {
+            auto * p = response->add_partitions();
+            p->set_table_name(snap.table_name);
+            p->set_table_uuid(snap.table_uuid);
+            p->set_partition_id(snap.partition_id);
+            p->set_parts_in_flight(snap.parts_in_flight);
+            p->set_parts_submitted(snap.parts_submitted);
+            p->set_elapsed_ms(snap.elapsed_ms);
+            p->set_preload_level(snap.preload_level);
+        }
+    })
+}
+
 #if defined(__clang__)
 #    pragma clang diagnostic pop
 #else
diff --git a/src/CloudServices/CnchWorkerServiceImpl.h b/src/CloudServices/CnchWorkerServiceImpl.h
index 49578d86bca..fc5d169a529 100644
--- a/src/CloudServices/CnchWorkerServiceImpl.h
+++ b/src/CloudServices/CnchWorkerServiceImpl.h
@@ -67,6 +67,24 @@ class CnchWorkerServiceImpl : protected WithMutableContext, public DB::Protos::C
         Protos::GetManipulationTasksStatusResp * response,
         google::protobuf::Closure * done) override;
 
+    void getTTLCacheStats(
+        google::protobuf::RpcController * cntl,
+        const Protos::GetTTLCacheStatsReq * request,
+        Protos::GetTTLCacheStatsResp * response,
+        google::protobuf::Closure * done) override;
+
+    void getTTLCachePartitionStats(
+        google::protobuf::RpcController * cntl,
+        const Protos::GetTTLCachePartitionStatsReq * request,
+        Protos::GetTTLCachePartitionStatsResp * response,
+        google::protobuf::Closure * done) override;
+
+    void getPreloadStats(
+        google::protobuf::RpcController * cntl,
+        const Protos::GetPreloadStatsReq * request,
+        Protos::GetPreloadStatsResp * response,
+        google::protobuf::Closure * done) override;
+
     void GetPreallocatedStatus(
         google::protobuf::RpcController *,
         const Protos::GetPreallocatedStatusReq * request,
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index ba91347891b..e88ec334052 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -106,6 +106,10 @@
     M(CreatedReadBufferMMapFailed, "") \
     M(DiskReadElapsedMicroseconds, "Total time spent waiting for read syscall. This include reads from page cache.") \
     M(DiskWriteElapsedMicroseconds, "Total time spent waiting for write syscall. This include writes to page cache.") \
+    M(DiskCacheDecompressMicroseconds, "Time spent decompressing blocks on disk cache hit path.") \
+    M(DiskCacheDiskReadMicroseconds, "Time spent in pread/read syscall on disk cache hit path (excludes decompression).") \
+    M(DiskCacheUncompressedHit, "UncompressedCache hit: no disk I/O or decompression needed.") \
+    M(DiskCacheUncompressedMiss, "UncompressedCache miss: disk read + decompression performed.") \
     M(NetworkReceiveElapsedMicroseconds, \
       "Total time spent waiting for data to receive or receiving data from network. Only ClickHouse-related network interaction is " \
       "included, not by 3rd party libraries.") \
@@ -444,6 +448,10 @@
     M(DiskCacheGetTotalOps, "Total count of disk cache get operations") \
     M(DiskCacheSetTotalOps, "Total count of disk cache set operations") \
     M(DiskCacheSetTotalBytes, "Total  of disk cache set operations") \
+    M(DiskCacheDataHits, "TTL cache hits for data column segments") \
+    M(DiskCacheDataMisses, "TTL cache misses for data column segments") \
+    M(DiskCacheIdxHits, "TTL cache hits for skip-index segments") \
+    M(DiskCacheIdxMisses, "TTL cache misses for skip-index segments") \
     M(DiskCacheDeviceBytesWritten, "Total bytes written of disk cache device") \
     M(DiskCacheDeviceBytesRead, "Total bytes read of disk cache device") \
     M(DiskCacheDeviceWriteIOErrors, "Total errors of disk cache device write io") \
diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index 56a976be25a..0c63c95a00c 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -26,6 +26,19 @@
 #include "IO/BufferWithOwnMemory.h"
 
 #include <utility>
+#include <Common/Stopwatch.h>
+#include <Common/ProfileEvents.h>
+#include <common/logger_useful.h>
+
+namespace ProfileEvents
+{
+    extern const Event DiskCacheDecompressMicroseconds;
+    extern const Event DiskCacheDiskReadMicroseconds;
+    extern const Event DiskCacheUncompressedHit;
+    extern const Event DiskCacheUncompressedMiss;
+}
+
+static Poco::Logger * getLog() { return &Poco::Logger::get("CachedCompressedReadBuffer"); }
 
 
 namespace DB
@@ -69,9 +82,13 @@ bool CachedCompressedReadBuffer::nextImpl()
     /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists.
     UInt128 key = cache->hash(path, file_pos);
 
+    bool cache_miss = false;
     owned_cell = cache->getOrSet(key, [&]()
     {
+        cache_miss = true;
         initInput();
+
+        Stopwatch io_sw;
         file_in->seek(file_pos, SEEK_SET);
 
         auto cell = std::make_shared<UncompressedCacheCell>();
@@ -79,21 +96,33 @@ bool CachedCompressedReadBuffer::nextImpl()
         size_t size_decompressed;
         size_t size_compressed_without_checksum;
         cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
+        const auto io_us = io_sw.elapsedMicroseconds();
+        ProfileEvents::increment(ProfileEvents::DiskCacheDiskReadMicroseconds, io_us);
 
         if (cell->compressed_size)
         {
-            // * a little bit hack here for reducing memory copy
-            // * allocate 12 more bytes to store {size_decompressed} and {size_decompressed}, padding at the end of the data
             cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
             auto buffer = HybridCache::Buffer{size_decompressed + cell->additional_bytes + sizeof(cell->compressed_size) + sizeof(cell->additional_bytes)};
             cell->data = std::move(buffer);
             cell->data.shrink(size_decompressed + cell->additional_bytes);
+
+            Stopwatch decompress_sw;
             decompressTo(reinterpret_cast<char *>(cell->data.data()), size_decompressed, size_compressed_without_checksum);
+            const auto decompress_us = decompress_sw.elapsedMicroseconds();
+            ProfileEvents::increment(ProfileEvents::DiskCacheDecompressMicroseconds, decompress_us);
+
+            LOG_DEBUG(getLog(), "[cache-perf] path={} compressed={}B decompressed={}B disk_read={}us decompress={}us",
+                path, cell->compressed_size, size_decompressed, io_us, decompress_us);
         }
 
         return cell;
     });
 
+    if (cache_miss)
+        ProfileEvents::increment(ProfileEvents::DiskCacheUncompressedMiss);
+    else
+        ProfileEvents::increment(ProfileEvents::DiskCacheUncompressedHit);
+
     if (owned_cell->data.size() == 0)
         return false;
 
diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index c3e4a3185fe..4ebefd9af92 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -26,7 +26,17 @@
 #include <Compression/LZ4_decompress_faster.h>
 #include <IO/WriteHelpers.h>
 #include <IO/createReadBufferFromFileBase.h>
+#include <Common/Stopwatch.h>
+#include <Common/ProfileEvents.h>
+#include <common/logger_useful.h>
 
+namespace ProfileEvents
+{
+    extern const Event DiskCacheDiskReadMicroseconds;
+    extern const Event DiskCacheDecompressMicroseconds;
+}
+
+static Poco::Logger * getLog() { return &Poco::Logger::get("CompressedReadBufferFromFile"); }
 
 namespace DB
 {
@@ -48,7 +58,11 @@ bool CompressedReadBufferFromFile::nextImpl()
 
     size_t size_decompressed = 0;
     size_t size_compressed_without_checksum;
+    Stopwatch io_sw;
     size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
+    const auto io_us = io_sw.elapsedMicroseconds();
+    ProfileEvents::increment(ProfileEvents::DiskCacheDiskReadMicroseconds, io_us);
+
     if (!size_compressed)
         return false;
 
@@ -60,7 +74,13 @@ bool CompressedReadBufferFromFile::nextImpl()
     memory.resize(size_decompressed + additional_size_at_the_end_of_buffer);
     working_buffer = Buffer(memory.data(), &memory[size_decompressed]);
 
+    Stopwatch decomp_sw;
     decompress(working_buffer, size_decompressed, size_compressed_without_checksum);
+    const auto decomp_us = decomp_sw.elapsedMicroseconds();
+    ProfileEvents::increment(ProfileEvents::DiskCacheDecompressMicroseconds, decomp_us);
+
+    LOG_DEBUG(getLog(), "[cache-perf] path={} compressed={}B decompressed={}B disk_read={}us decompress={}us",
+        file_in.getFileName(), size_compressed, size_decompressed, io_us, decomp_us);
 
     /// nextimpl_working_buffer_offset is set in the seek function (lazy seek). So we have to
     /// check that we are not seeking beyond working buffer.
@@ -163,7 +183,11 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
         size_t size_decompressed = 0;
         size_t size_compressed_without_checksum = 0;
 
+        Stopwatch io_sw2;
         size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
+        const auto io_us2 = io_sw2.elapsedMicroseconds();
+        ProfileEvents::increment(ProfileEvents::DiskCacheDiskReadMicroseconds, io_us2);
+
         size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer.
         if (!new_size_compressed)
             return bytes_read;
@@ -174,7 +198,14 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
         /// need to skip some bytes in decompressed data (seek happened before readBig call).
         if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
         {
+            Stopwatch decomp_sw2;
             decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
+            const auto decomp_us2 = decomp_sw2.elapsedMicroseconds();
+            ProfileEvents::increment(ProfileEvents::DiskCacheDecompressMicroseconds, decomp_us2);
+
+            LOG_DEBUG(getLog(), "[cache-perf] path={} compressed={}B decompressed={}B disk_read={}us decompress={}us",
+                file_in.getFileName(), new_size_compressed, size_decompressed, io_us2, decomp_us2);
+
             bytes_read += size_decompressed;
             bytes += size_decompressed;
         }
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 7a25004c3ab..1931f26a736 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -220,6 +220,7 @@ enum PreloadLevelSettings : UInt64
     M(UInt64, background_gc_schedule_pool_size, 16, "Number of threads performing data removing related background tasks.", 0) \
     M(UInt64, local_disk_cache_thread_pool_size, 16, "Number of threads perforrming background tasks from cache segments from cloud storage to local disk. Only has meaning at server startup.", 0) \
     M(UInt64, local_disk_cache_evict_thread_pool_size, 16, "Number of threads perforrming asynchronous remove disk cache file.", 0) \
+    M(UInt64, local_disk_cache_preload_thread_pool_size, 16, "Number of threads for preloading parts into local disk cache. Only has meaning at server startup.", 0) \
     M(UInt64, \
       max_bandwidth_for_disk_cache, \
       0, \
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index ea90a803dbd..0b543e65cf5 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -136,7 +136,7 @@ void PocoHTTPClient::makeRequestInternal(
     Poco::Logger * log = &Poco::Logger::get("AWSClient");
 
     auto uri = request.GetUri().GetURIString();
-    LOG_DEBUG(log, "Make request to: {}", uri);
+    LOG_TRACE(log, "Make request to: {}", uri);
 
     enum class S3MetricType
     {
@@ -296,7 +296,7 @@ void PocoHTTPClient::makeRequestInternal(
                 request.GetContentBody()->seekg(0);
 
                 auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream);
-                LOG_DEBUG(log, "Written {} bytes to request body", size);
+                LOG_TRACE(log, "Written {} bytes to request body", size);
             }
 
             LOG_TRACE(log, "Receiving response...");
@@ -306,14 +306,14 @@ void PocoHTTPClient::makeRequestInternal(
             ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds());
 
             int status_code = static_cast<int>(poco_response.getStatus());
-            LOG_DEBUG(log, "Response status: {}, {}", status_code, poco_response.getReason());
+            LOG_TRACE(log, "Response status: {}, {}", status_code, poco_response.getReason());
 
             if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
             {
                 auto location = poco_response.get("location");
                 remote_host_filter.checkURL(Poco::URI(location));
                 uri = location;
-                LOG_DEBUG(log, "Redirecting request to new location: {}", location);
+                LOG_TRACE(log, "Redirecting request to new location: {}", location);
 
                 ProfileEvents::increment(select_metric(S3MetricType::Redirects));
 
@@ -328,7 +328,7 @@ void PocoHTTPClient::makeRequestInternal(
                 response->AddHeader(header_name, header_value);
                 headers_ss << header_name << ": " << header_value << "; ";
             }
-            LOG_DEBUG(log, "Received headers: {}", headers_ss.str());
+            LOG_TRACE(log, "Received headers: {}", headers_ss.str());
 
             if (status_code == 429 || status_code == 503)
             { // API throttling
diff --git a/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp b/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp
index 435dc34db81..a5c4d1cc802 100644
--- a/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp
+++ b/src/Interpreters/DistributedStages/PlanSegmentExecutor.cpp
@@ -64,6 +64,7 @@
 #include <QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <QueryPlan/PlanPrinter.h>
 #include <QueryPlan/QueryPlan.h>
+#include <QueryPlan/TableScanStep.h>
 #include <brpc/callback.h>
 #include <fmt/core.h>
 #include <incubator-brpc/src/brpc/controller.h>
@@ -71,6 +72,7 @@
 #include <Common/Brpc/BrpcChannelPoolOptions.h>
 #include <Common/CurrentThread.h>
 #include <Common/Exception.h>
+#include <Common/HostWithPorts.h>
 #include <Common/ThreadStatus.h>
 #include <Common/time.h>
 #include <common/defines.h>
@@ -296,8 +298,7 @@ void fillPlanSegmentProfile(
     ContextPtr context,
     PlanSegment * plan_segment)
 {
-    AddressInfo current_address = getLocalAddress(*context);
-    segment_profile->worker_address = extractExchangeHostPort(current_address);
+    segment_profile->worker_address = getWorkerID(context);
     if (query_status)
     {
         auto query_status_info = query_status->getInfo(true, context->getSettingsRef().log_profile_events);
@@ -324,18 +325,6 @@ void fillPlanSegmentProfile(
         auto step_profile = GroupedProcessorProfile::aggregateOperatorProfileToStepLevel(grouped_profiles);
         for (auto & [step_id, profile] : step_profile)
             segment_profile->profiles.emplace(step_id, profile);
-        auto & plan = plan_segment->getQueryPlan();
-        for (auto & node : plan.getNodes())
-        {
-            if (!node.step->getAttributeDescriptions().empty() && segment_profile->profiles.contains(node.id))
-            {
-                for (auto & att : node.step->getAttributeDescriptions())
-                {
-                    auto attribute_ptr = std::make_shared<RuntimeAttributeDescription>(att.second);
-                    segment_profile->profiles.at(node.id)->attributes.emplace(att.first, attribute_ptr);
-                }
-            }
-        }
     }
 }
 
@@ -491,7 +480,7 @@ void PlanSegmentExecutor::doExecute()
             PlanSegmentDescription::getPlanSegmentDescription(plan_segment_instance->plan_segment, true)
                 ->jsonPlanSegmentDescriptionAsString(collectStepRuntimeProfiles(pipeline)));
     }
-    if (context->getSettingsRef().report_segment_profiles && plan_segment)
+    if ((context->getSettingsRef().report_segment_profiles || context->getSettingsRef().log_segment_profiles) && plan_segment)
     {
         segment_profile = std::make_shared<PlanSegmentProfile>(query_log_element->client_info.initial_query_id, plan_segment->getPlanSegmentId());
         fillPlanSegmentProfile(
@@ -501,14 +490,41 @@ void PlanSegmentExecutor::doExecute()
     if (context->getSettingsRef().log_processors_profiles)
     {
         auto processors_profile_log = context->getProcessorsProfileLog();
+        if (processors_profile_log)
+            processors_profile_log->addLogs(pipeline.get(),
+                                            context->getClientInfo().initial_query_id,
+                                            std::chrono::system_clock::now(),
+                                            plan_segment->getPlanSegmentId());
+    }
 
-        if (!processors_profile_log)
-            return;
-
-        processors_profile_log->addLogs(pipeline.get(),
-                                        context->getClientInfo().initial_query_id,
-                                        std::chrono::system_clock::now(),
-                                        plan_segment->getPlanSegmentId());
+    // Collect post-execution attributes (e.g. CacheStats from TTL disk cache) into
+    // attribute_descriptions on TableScanStep, then propagate all attribute_descriptions
+    // from every plan node into the segment profile.
+    if (segment_profile && plan_segment)
+    {
+        auto & plan = plan_segment->getQueryPlan();
+        for (auto & node : plan.getNodes())
+        {
+            if (auto * ts = dynamic_cast<TableScanStep *>(node.step.get()))
+            {
+                LOG_DEBUG(logger, "Collecting post-execution attributes for TableScanStep node {}", node.id);
+                ts->collectPostExecutionAttributes();
+            }
+            auto & descs = node.step->getAttributeDescriptions();
+            if (descs.empty())
+                continue;
+            LOG_DEBUG(logger, "Propagating {} attribute(s) from node {} ({}) into segment profile",
+                descs.size(), node.id, node.step->getName());
+            if (!segment_profile->profiles.contains(node.id))
+            {
+                auto m = std::make_shared<ProfileMetric>();
+                m->id = node.id;
+                segment_profile->profiles.emplace(node.id, m);
+            }
+            for (auto & [k, v] : descs)
+                segment_profile->profiles.at(node.id)->attributes.insert_or_assign(
+                    k, std::make_shared<RuntimeAttributeDescription>(v));
+        }
     }
 }
 
diff --git a/src/Interpreters/DistributedStages/PlanSegmentReport.cpp b/src/Interpreters/DistributedStages/PlanSegmentReport.cpp
index 5e4ca6f3b50..e82a7996196 100644
--- a/src/Interpreters/DistributedStages/PlanSegmentReport.cpp
+++ b/src/Interpreters/DistributedStages/PlanSegmentReport.cpp
@@ -136,7 +136,7 @@ PlanSegmentExecutor::ExecutionResult convertSuccessPlanSegmentStatusToResult(
     result.runtime_segment_status.message = "execute success";
     result.runtime_segment_status.metrics.final_progress = final_progress.toProto();
     result.sender_metrics = senderMetricsToProto(plan_segment_outputs, sender_metrics, execution_address);
-    if (query_context->getSettingsRef().report_segment_profiles && segment_profile)
+    if ((query_context->getSettingsRef().report_segment_profiles || query_context->getSettingsRef().log_segment_profiles) && segment_profile)
         result.segment_profile = segment_profile;
 
     return result;
diff --git a/src/Interpreters/SegmentScheduler.cpp b/src/Interpreters/SegmentScheduler.cpp
index 5bf0a7e4f11..8f9026e1bd6 100644
--- a/src/Interpreters/SegmentScheduler.cpp
+++ b/src/Interpreters/SegmentScheduler.cpp
@@ -96,7 +96,8 @@ SegmentScheduler::insertPlanSegments(const String & query_id, PlanSegmentTree *
 
     }
     {
-        if (query_context->isExplainQuery() && query_context->getSettingsRef().report_segment_profiles)
+        if ((query_context->isExplainQuery() && query_context->getSettingsRef().report_segment_profiles)
+            || query_context->getSettingsRef().log_segment_profiles)
         {
             std::unique_lock<bthread::Mutex> lock(segment_profile_mutex);
             segment_profile_map[query_id];
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 7fd8db382f4..20503784c48 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -91,6 +91,11 @@
 #include <Interpreters/ProcessorsProfileLog.h>
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/QueueManager.h>
+#include <Interpreters/SegmentScheduler.h>
+#include <QueryPlan/IQueryPlanStep.h>
+#include <Common/JSONBuilder.h>
+#include <IO/WriteBufferFromString.h>
+#include <Poco/JSON/Parser.h>
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
 #include <Interpreters/SelectIntersectExceptQueryVisitor.h>
 #include <Interpreters/SelectQueryOptions.h>
@@ -794,6 +799,161 @@ void interpretSettings(ASTPtr query, ContextMutablePtr context)
     }
 }
 
+static String buildRuntimeStatsJSON(const std::unordered_map<size_t, PlanSegmentProfiles> & profiles_map)
+{
+    struct StageAgg
+    {
+        String type, name, condition, keys;
+        UInt64 parts_after = 0, granules_after = 0;
+    };
+    struct StepAgg
+    {
+        UInt64 total_parts = 0;
+        std::vector<StageAgg> stages;
+    };
+
+    bool has_cache = false;
+    UInt64 cache_hit_segs = 0, cache_miss_segs = 0, steal_segs = 0, s3_fallback_segs = 0;
+    UInt64 cache_bytes = 0, s3_bytes = 0, cache_read_ms = 0, s3_read_ms = 0;
+    UInt64 idx_hit_segs = 0, idx_miss_segs = 0, idx_cache_bytes = 0, idx_s3_bytes = 0;
+    UInt64 idx_cache_read_ms = 0, idx_s3_read_ms = 0;
+    std::map<UInt64, StepAgg> index_by_step;
+
+    for (const auto & [seg_id, seg_profiles] : profiles_map)
+    {
+        for (const auto & profile : seg_profiles)
+        {
+            for (const auto & [step_id, metric] : profile->profiles)
+            {
+                if (metric->attributes.count(RuntimeAttributeKeys::CacheStats))
+                {
+                    try
+                    {
+                        Poco::JSON::Parser parser;
+                        auto obj = parser.parse(metric->attributes.at(RuntimeAttributeKeys::CacheStats)->description)
+                                       .extract<Poco::JSON::Object::Ptr>();
+                        has_cache = true;
+                        cache_hit_segs    += obj->getValue<UInt64>("cache_hit_segs");
+                        cache_miss_segs   += obj->getValue<UInt64>("cache_miss_segs");
+                        steal_segs        += obj->getValue<UInt64>("steal_segs");
+                        s3_fallback_segs  += obj->getValue<UInt64>("s3_fallback_segs");
+                        cache_bytes       += obj->getValue<UInt64>("cache_bytes");
+                        s3_bytes          += obj->getValue<UInt64>("s3_bytes");
+                        cache_read_ms     += obj->getValue<UInt64>("cache_read_ms");
+                        s3_read_ms        += obj->getValue<UInt64>("s3_read_ms");
+                        idx_hit_segs      += obj->getValue<UInt64>("idx_hit_segs");
+                        idx_miss_segs     += obj->getValue<UInt64>("idx_miss_segs");
+                        idx_cache_bytes   += obj->getValue<UInt64>("idx_cache_bytes");
+                        idx_s3_bytes      += obj->getValue<UInt64>("idx_s3_bytes");
+                        idx_cache_read_ms += obj->getValue<UInt64>("idx_cache_read_ms");
+                        idx_s3_read_ms    += obj->getValue<UInt64>("idx_s3_read_ms");
+                    }
+                    catch (...) {}
+                }
+
+                if (metric->attributes.count(RuntimeAttributeKeys::Indexes))
+                {
+                    const auto & additional = metric->attributes.at(RuntimeAttributeKeys::Indexes)->additional;
+                    if (additional.empty())
+                        continue;
+                    try
+                    {
+                        Poco::JSON::Parser parser;
+                        auto obj = parser.parse(additional).extract<Poco::JSON::Object::Ptr>();
+                        auto & step_agg = index_by_step[step_id];
+                        step_agg.total_parts += obj->getValue<UInt64>("total_parts");
+                        auto stages = obj->getArray("stages");
+                        if (step_agg.stages.empty())
+                        {
+                            for (size_t i = 0; i < stages->size(); ++i)
+                            {
+                                auto s = stages->getObject(i);
+                                StageAgg agg;
+                                agg.type        = s->getValue<String>("type");
+                                if (s->has("name"))      agg.name      = s->getValue<String>("name");
+                                if (s->has("condition")) agg.condition = s->getValue<String>("condition");
+                                if (s->has("keys"))      agg.keys      = s->getValue<String>("keys");
+                                agg.parts_after    = s->getValue<UInt64>("parts_after");
+                                agg.granules_after = s->getValue<UInt64>("granules_after");
+                                step_agg.stages.push_back(std::move(agg));
+                            }
+                        }
+                        else
+                        {
+                            for (size_t i = 0; i < std::min(stages->size(), step_agg.stages.size()); ++i)
+                            {
+                                auto s = stages->getObject(i);
+                                step_agg.stages[i].parts_after    += s->getValue<UInt64>("parts_after");
+                                step_agg.stages[i].granules_after += s->getValue<UInt64>("granules_after");
+                            }
+                        }
+                    }
+                    catch (...) {}
+                }
+            }
+        }
+    }
+
+    if (!has_cache && index_by_step.empty())
+        return "";
+
+    auto runtime_stats = std::make_unique<JSONBuilder::JSONMap>();
+
+    if (has_cache)
+    {
+        auto cache_obj = std::make_unique<JSONBuilder::JSONMap>();
+        cache_obj->add("cache_hit_segs",   cache_hit_segs);
+        cache_obj->add("cache_miss_segs",  cache_miss_segs);
+        cache_obj->add("steal_segs",       steal_segs);
+        cache_obj->add("s3_fallback_segs", s3_fallback_segs);
+        cache_obj->add("cache_bytes",      cache_bytes);
+        cache_obj->add("s3_bytes",         s3_bytes);
+        cache_obj->add("cache_read_ms",    cache_read_ms);
+        cache_obj->add("s3_read_ms",       s3_read_ms);
+        cache_obj->add("idx_hit_segs",     idx_hit_segs);
+        cache_obj->add("idx_miss_segs",    idx_miss_segs);
+        cache_obj->add("idx_cache_bytes",  idx_cache_bytes);
+        cache_obj->add("idx_s3_bytes",     idx_s3_bytes);
+        cache_obj->add("idx_cache_read_ms", idx_cache_read_ms);
+        cache_obj->add("idx_s3_read_ms",   idx_s3_read_ms);
+        runtime_stats->add(RuntimeAttributeKeys::CacheStats, std::move(cache_obj));
+    }
+
+    if (!index_by_step.empty())
+    {
+        auto idx_arr = std::make_unique<JSONBuilder::JSONArray>();
+        for (auto & [step_id, step_agg] : index_by_step)
+        {
+            auto step_obj = std::make_unique<JSONBuilder::JSONMap>();
+            step_obj->add("total_parts", step_agg.total_parts);
+            auto stages_arr = std::make_unique<JSONBuilder::JSONArray>();
+            for (const auto & stage : step_agg.stages)
+            {
+                auto s = std::make_unique<JSONBuilder::JSONMap>();
+                s->add("type", stage.type);
+                if (!stage.name.empty())      s->add("name",      stage.name);
+                if (!stage.condition.empty()) s->add("condition", stage.condition);
+                if (!stage.keys.empty())      s->add("keys",      stage.keys);
+                s->add("parts_after",    stage.parts_after);
+                s->add("granules_after", stage.granules_after);
+                stages_arr->add(std::move(s));
+            }
+            step_obj->add("stages", std::move(stages_arr));
+            idx_arr->add(std::move(step_obj));
+        }
+        runtime_stats->add("IndexUsage", std::move(idx_arr));
+    }
+
+    auto outer = std::make_unique<JSONBuilder::JSONMap>();
+    outer->add("RuntimeStats", std::move(runtime_stats));
+
+    WriteBufferFromOwnString buf;
+    JSONBuilder::FormatSettings json_fmt{.settings = {}};
+    JSONBuilder::FormatContext fmt_ctx{.out = buf};
+    outer->format(json_fmt, fmt_ctx);
+    return buf.str();
+}
+
 static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
     const char * begin,
     const char * end,
@@ -1553,6 +1713,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                      log_queries_min_type = settings.log_queries_min_type,
                      log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(),
                      log_processors_profiles = settings.log_processors_profiles,
+                     log_segment_profiles = settings.log_segment_profiles,
                      status_info_to_query_log,
                      query_id,
                      finish_current_transaction](
@@ -1697,6 +1858,23 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                         elem.used_table_functions = factories_info.table_functions;
                         elem.partition_ids = context->getPartitionIds();
 
+                        if (log_segment_profiles)
+                        {
+                            if (auto scheduler = context->getSegmentScheduler())
+                            {
+                                auto seg_profiles = scheduler->getSegmentsProfile(elem.client_info.current_query_id);
+                                if (!seg_profiles.empty())
+                                {
+                                    auto runtime_stats = buildRuntimeStatsJSON(seg_profiles);
+                                    if (!runtime_stats.empty())
+                                    {
+                                        if (!elem.segment_profiles)
+                                            elem.segment_profiles = std::make_shared<std::vector<String>>();
+                                        elem.segment_profiles->emplace_back(std::move(runtime_stats));
+                                    }
+                                }
+                            }
+                        }
                         if (log_queries && elem.type >= log_queries_min_type
                             && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms)
                             logQuery(context, elem);
diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp
index 51d33f22812..854e6f01519 100644
--- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp
+++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp
@@ -57,6 +57,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartCNCH.h>
 #include <Storages/MergeTree/localBackup.h>
 #include <Storages/VirtualColumnUtils.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
 #include <Storages/MutationCommands.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
@@ -202,6 +203,13 @@ void MergeTreeMetaBase::setRelativeDataPath(StorageLocation location, const Stri
     relative_data_path = rel_path;
 }
 
+IDiskCachePtr MergeTreeMetaBase::getDiskCache() const
+{
+    // Default implementation: return global LRU cache
+    // Override in StorageCloudMergeTree for per-table TTL cache support
+    return DiskCacheFactory::instance().get(DiskCacheType::MergeTree);
+}
+
 static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name, bool allow_nullable_key)
 {
     for (const auto & action : expr.getActions())
diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.h b/src/MergeTreeCommon/MergeTreeMetaBase.h
index 106dbd295b4..fc7fa960203 100644
--- a/src/MergeTreeCommon/MergeTreeMetaBase.h
+++ b/src/MergeTreeCommon/MergeTreeMetaBase.h
@@ -37,6 +37,8 @@ namespace DB
 {
 
 class MutationCommands;
+class IDiskCache;
+using IDiskCachePtr = std::shared_ptr<IDiskCache>;
 
 class MergeTreeMetaBase : public IStorage, public WithMutableContext, public MergeTreeDataPartTypeHelper
 {
@@ -171,6 +173,11 @@ class MergeTreeMetaBase : public IStorage, public WithMutableContext, public Mer
     virtual const String& getRelativeDataPath(StorageLocation location) const;
     void setRelativeDataPath(StorageLocation location, const String & rel_path);
 
+    /// Get disk cache (TTL cache if enabled, otherwise global LRU)
+    /// Override in subclasses to provide per-table TTL cache support
+    virtual IDiskCachePtr getDiskCache() const;
+
+
     bool supportsFinal() const override
     {
         return merging_params.mode == MergingParams::Collapsing
diff --git a/src/Processors/Transforms/ExplainAnalyzeTransform.cpp b/src/Processors/Transforms/ExplainAnalyzeTransform.cpp
index a0c35ee4e5f..c1c71afad2d 100644
--- a/src/Processors/Transforms/ExplainAnalyzeTransform.cpp
+++ b/src/Processors/Transforms/ExplainAnalyzeTransform.cpp
@@ -1,4 +1,5 @@
 #include <set>
+#include <thread>
 #include <DataTypes/DataTypeString.h>
 #include <Interpreters/InterpreterExplainQuery.h>
 #include <Interpreters/SegmentScheduler.h>
@@ -52,6 +53,31 @@ void ExplainAnalyzeTransform::transform(Chunk & chunk)
             break;
     }
 
+    // Wait for segment profiles to arrive. Profiles are sent before status over separate RPCs,
+    // but server-side RPC thread scheduling can process status before profile, causing a race.
+    if (context->getSettingsRef().report_segment_profiles || context->getSettingsRef().log_segment_profiles)
+    {
+        size_t expected_profiles = 0;
+        for (auto & desc : segment_descriptions)
+            if (desc->segment_id != 0)
+                expected_profiles += desc->parallel;
+
+        auto profile_wait_start = std::chrono::steady_clock::now();
+        while (expected_profiles > 0)
+        {
+            auto now = std::chrono::steady_clock::now();
+            if (std::chrono::duration_cast<std::chrono::milliseconds>(now - profile_wait_start).count() >= 100)
+                break;
+            auto current_map = scheduler->getSegmentsProfile(context->getCurrentQueryId());
+            size_t received = 0;
+            for (auto & [seg_id, seg_profiles] : current_map)
+                received += seg_profiles.size();
+            if (received >= expected_profiles)
+                break;
+            std::this_thread::sleep_for(std::chrono::milliseconds(2));
+        }
+    }
+
     auto profiles_map = scheduler->getSegmentsProfile(context->getCurrentQueryId());
     String explain;
     if ((kind == ASTExplainQuery::ExplainKind::LogicalAnalyze || kind == ASTExplainQuery::ExplainKind::DistributedAnalyze))
diff --git a/src/Protos/cnch_worker_rpc.proto b/src/Protos/cnch_worker_rpc.proto
index 71da188d569..138d4e8a19c 100644
--- a/src/Protos/cnch_worker_rpc.proto
+++ b/src/Protos/cnch_worker_rpc.proto
@@ -183,6 +183,85 @@ message GetManipulationTasksStatusResp
   repeated ManipulationTask tasks = 2;
 }
 
+message TTLCacheTableStats
+{
+  optional string table_name = 1;
+  optional string table_uuid = 2;
+  optional uint64 ttl_minutes = 3;
+  optional uint64 max_size_bytes = 4;
+  optional uint64 last_eviction_run = 5;
+  optional uint64 evicted_expired = 6;
+  optional uint64 evicted_size_limit = 7;
+  optional uint64 async_triggered_evicted = 8;
+  optional uint64 async_skipped_rate_limit_evicted = 9;
+  optional uint64 rejected_non_time_partition = 12;
+  optional uint64 rejected_too_old = 13;
+  optional uint64 count_preload = 14;
+  optional uint64 count_query = 15;
+  optional uint64 bytes_preload = 16;
+  optional uint64 bytes_query = 17;
+  optional uint64 count_restored = 18;
+  optional uint64 bytes_restored = 19;
+  optional uint64 idx_count_preload = 20;
+  optional uint64 idx_bytes_preload = 21;
+  optional uint64 idx_count_query = 22;
+  optional uint64 idx_bytes_query = 23;
+  optional uint64 data_hits = 24;
+  optional uint64 data_misses = 25;
+  optional uint64 idx_hits = 26;
+  optional uint64 idx_misses = 27;
+}
+
+message GetTTLCacheStatsReq
+{
+}
+
+message GetTTLCacheStatsResp
+{
+  optional string exception = 1;
+  repeated TTLCacheTableStats tables = 2;
+}
+
+message TTLCachePartitionStats
+{
+  optional string table_name = 1;
+  optional string table_uuid = 2;
+  optional string partition = 3;
+  optional uint64 entry_count = 4;
+  optional uint64 bytes = 5;
+}
+
+message GetTTLCachePartitionStatsReq
+{
+}
+
+message GetTTLCachePartitionStatsResp
+{
+  optional string exception = 1;
+  repeated TTLCachePartitionStats partitions = 2;
+}
+
+message PreloadPartitionStats
+{
+  optional string table_name = 1;
+  optional string table_uuid = 2;
+  optional string partition_id = 3;
+  optional uint64 parts_in_flight = 4;
+  optional uint64 parts_submitted = 5;
+  optional uint64 elapsed_ms = 6;
+  optional uint64 preload_level = 7;
+}
+
+message GetPreloadStatsReq
+{
+}
+
+message GetPreloadStatsResp
+{
+  optional string exception = 1;
+  repeated PreloadPartitionStats partitions = 2;
+}
+
 message GetPreallocatedStatusReq
 {
   required UUID storage_id = 1;
@@ -616,4 +695,8 @@ service CnchWorkerService
   rpc dropPartDiskCache(DropPartDiskCacheReq) returns (DropPartDiskCacheResp);
 
   rpc executeDedupTask(ExecuteDedupTaskReq) returns (ExecuteDedupTaskResp);
+
+  rpc getTTLCacheStats(GetTTLCacheStatsReq) returns (GetTTLCacheStatsResp);
+  rpc getTTLCachePartitionStats(GetTTLCachePartitionStatsReq) returns (GetTTLCachePartitionStatsResp);
+  rpc getPreloadStats(GetPreloadStatsReq) returns (GetPreloadStatsResp);
 }
diff --git a/src/QueryPlan/IQueryPlanStep.h b/src/QueryPlan/IQueryPlanStep.h
index 1425adaafc0..e6015f60f95 100644
--- a/src/QueryPlan/IQueryPlanStep.h
+++ b/src/QueryPlan/IQueryPlanStep.h
@@ -130,6 +130,14 @@ struct RuntimeAttributeDescription
     void toProto(Protos::RuntimeAttributeDescription & proto) const;
 };
 
+namespace RuntimeAttributeKeys
+{
+    static constexpr const char * Indexes = "Indexes";
+    static constexpr const char * SelectParts = "SelectParts";
+    static constexpr const char * TableScanDescription = "TableScanDescription";
+    static constexpr const char * CacheStats = "CacheStats";
+}
+
 
 /// Single step of query plan.
 class IQueryPlanStep
diff --git a/src/QueryPlan/PlanPrinter.cpp b/src/QueryPlan/PlanPrinter.cpp
index 9bda33ccf4c..bcb8f260810 100644
--- a/src/QueryPlan/PlanPrinter.cpp
+++ b/src/QueryPlan/PlanPrinter.cpp
@@ -686,33 +686,40 @@ String PlanPrinter::TextPrinter::printAttributes(PlanNodeBase & plan, const Text
     size_t step_id = plan.getId();
     if (!profiles.contains(step_id) || profiles.at(step_id)->address_to_attributes.empty())
         return "";
-    if (!settings.query_plan_options.indexes && !settings.selected_parts)
+    const auto & address_to_attributes = profiles.at(step_id)->address_to_attributes;
+    bool has_priority_attrs = std::any_of(address_to_attributes.begin(), address_to_attributes.end(),
+        [](const auto & p) {
+            return p.second.count(RuntimeAttributeKeys::CacheStats)
+                || p.second.count(RuntimeAttributeKeys::Indexes);
+        });
+    if (!settings.query_plan_options.indexes && !settings.selected_parts && !has_priority_attrs)
         return "";
     std::stringstream out;
-    const auto & address_to_attributes = profiles.at(step_id)->address_to_attributes;
     if (plan.getStep()->getType() == IQueryPlanStep::Type::TableScan)
     {
-        String space;
         for (const auto & [address, attribute] : address_to_attributes)
         {
-            if (address_to_attributes.size() > 1)
-            {
-                out << intent.detailIntent() << address;
-                space = "    ";
-            }
-            if (settings.query_plan_options.indexes && attribute.contains("Indexes"))
+            String space = "    ";
+            out << intent.detailIntent() << address;
+            if (settings.query_plan_options.indexes && attribute.contains(RuntimeAttributeKeys::Indexes))
             {
                 out << intent.detailIntent() << space << "Indexes:";
-                auto index_desc = attribute.at("Indexes");
+                auto index_desc = attribute.at(RuntimeAttributeKeys::Indexes);
                 for (const auto & desc : index_desc->name_and_detail)
                     out << intent.detailIntent() << space << "    " << desc.second;
             }
             if (settings.selected_parts)
             {
-                if (attribute.contains("SelectParts"))
-                    out << intent.detailIntent() << space << attribute.at("SelectParts")->description;
-                if (attribute.contains("TableScanDescription"))
-                    out << intent.detailIntent() << space << attribute.at("TableScanDescription")->description;
+                if (attribute.contains(RuntimeAttributeKeys::SelectParts))
+                    out << intent.detailIntent() << space << attribute.at(RuntimeAttributeKeys::SelectParts)->description;
+                if (attribute.contains(RuntimeAttributeKeys::TableScanDescription))
+                    out << intent.detailIntent() << space << attribute.at(RuntimeAttributeKeys::TableScanDescription)->description;
+            }
+            if (attribute.contains(RuntimeAttributeKeys::CacheStats))
+            {
+                out << intent.detailIntent() << space << "CacheStats:";
+                for (const auto & desc : attribute.at(RuntimeAttributeKeys::CacheStats)->name_and_detail)
+                    out << intent.detailIntent() << space << "    " << desc.second;
             }
         }
         return out.str();
diff --git a/src/QueryPlan/ReadFromMergeTree.cpp b/src/QueryPlan/ReadFromMergeTree.cpp
index cd3934c832d..ac51fc02ab1 100644
--- a/src/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/QueryPlan/ReadFromMergeTree.cpp
@@ -32,7 +32,10 @@
 #include <DataTypes/MapHelpers.h>
 #include <Functions/IFunction.h>
 #include <common/logger_useful.h>
+#include <Common/CurrentThread.h>
 #include <Common/JSONBuilder.h>
+#include <IO/WriteBufferFromString.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
 #include <Common/escapeForFileName.h>
 #include "Storages/MergeTree/MergeTreeIOSettings.h"
 #include <Parsers/queryToString.h>
@@ -46,6 +49,9 @@ namespace ProfileEvents
     extern const Event SelectedParts;
     extern const Event SelectedRanges;
     extern const Event SelectedMarks;
+    extern const Event IndexGranuleSeekTime;
+    extern const Event IndexGranuleReadTime;
+    extern const Event IndexGranuleCalcTime;
 }
 
 namespace DB
@@ -172,7 +178,8 @@ static bool isSamePartition(const RangesInDataPart & lhs, const RangesInDataPart
 static bool canReadInPartitionOrder(
     const StorageInMemoryMetadata & metadata,
     const InputOrderInfo & input_order_info,
-    const ASTSelectQuery & select)
+    const ASTSelectQuery & select,
+    ContextPtr context)
 {
     if (!metadata.isPartitionKeyDefined() || !metadata.isSortingKeyDefined())
         return false;
@@ -191,8 +198,27 @@ static bool canReadInPartitionOrder(
 
     /// sorting columns should contain partition column
     auto partition_column_it = std::find(sorting_columns.begin(), sorting_columns.end(), partition_column);
+
+    /// If partition_column is a MATERIALIZED alias (e.g. `date MATERIALIZED toDate(timestamp)`)
+    /// it won't appear directly in sorting columns.  Expand it and retry.
+    ExpressionActionsPtr expanded_expr;
     if (partition_column_it == sorting_columns.end())
-        return false;
+    {
+        auto col_default = metadata.getColumns().getDefault(partition_column);
+        if (!col_default || col_default->kind != ColumnDefaultKind::Materialized || !col_default->expression)
+            return false;
+
+        auto mat_key = KeyDescription::getKeyFromAST(col_default->expression, metadata.getColumns(), context);
+        Names mat_required = mat_key.expression->getRequiredColumns();
+        if (mat_required.size() != 1)
+            return false;
+
+        partition_column_it = std::find(sorting_columns.begin(), sorting_columns.end(), mat_required[0]);
+        if (partition_column_it == sorting_columns.end())
+            return false;
+
+        expanded_expr = mat_key.expression;
+    }
 
     /// Allow table "partition by c order by (a, b, c)" for query "where a={} and b={} order by c",
     /// where all sorting columns before partition column match single value,
@@ -227,9 +253,11 @@ static bool canReadInPartitionOrder(
     if (partition_key.column_names.front() == *partition_column_it)
         return true;
 
-    /// Allow "partition by func(x) order by (x)" where func is monotonic nondecreasing
+    /// Allow "partition by func(x) order by (x)" where func is monotonic nondecreasing.
+    /// For MATERIALIZED columns use the expanded expression; otherwise use the partition key expression.
+    const ExpressionActions & expr_for_monotonicity = expanded_expr ? *expanded_expr : *partition_key.expression;
     IFunction::Monotonicity monotonicity;
-    for (const auto & action : partition_key.expression->getActions())
+    for (const auto & action : expr_for_monotonicity.getActions())
     {
         if (action.node->type != ActionsDAG::ActionType::FUNCTION)
         {
@@ -1397,7 +1425,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
         result.selected_marks,
         result.selected_ranges);
 
-    if (context->getSettingsRef().report_segment_profiles)
+    if (context->getSettingsRef().report_segment_profiles || context->getSettingsRef().log_segment_profiles)
         fillRuntimeAttributeDescriptions(result);
 
     ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts);
@@ -1475,7 +1503,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
         auto sorting_key_prefix_expr = ExpressionAnalyzer(order_key_prefix_ast, syntax_result, context).getActionsDAG(false);
 
         can_read_in_partition_order = (settings.optimize_read_in_partition_order || settings.force_read_in_partition_order)
-            && canReadInPartitionOrder(*metadata_for_reading, *input_order_info, query_info.query->as<ASTSelectQuery &>());
+            && canReadInPartitionOrder(*metadata_for_reading, *input_order_info, query_info.query->as<ASTSelectQuery &>(), context);
 
         if (can_read_in_partition_order && result.selected_partitions > 1)
         {
@@ -1759,54 +1787,160 @@ std::shared_ptr<IQueryPlanStep> ReadFromMergeTree::copy(ContextPtr) const
 
 void ReadFromMergeTree::fillRuntimeAttributeDescriptions(const ReadFromMergeTree::AnalysisResult & result)
 {
-    auto index_stats = result.index_stats;
-    if (!result.index_stats.empty())
+    const auto & index_stats = result.index_stats;
+    if (!index_stats.empty())
     {
         RuntimeAttributeDescription index_desc;
-        for (size_t i = 0; i < index_stats.size(); ++i)
+        auto stages_array = std::make_unique<JSONBuilder::JSONArray>();
+        UInt64 prev_parts = 0;
+        UInt64 prev_granules = 0;
+        bool has_prev = false;
+        for (const auto & stat : index_stats)
         {
-            const auto & stat = index_stats[i];
             if (stat.type == IndexType::None)
                 continue;
-            std::stringstream out;
-            out << "Type: " << indexTypeToString(stat.type) << ";";
+            String entry = fmt::format("Type: {};", indexTypeToString(stat.type));
             if (!stat.name.empty())
-                out << " Name: " << stat.name << ";";
+                entry += fmt::format(" Name: {};", stat.name);
             if (!stat.description.empty())
-                out << " Description: " << stat.description << ";";
+                entry += fmt::format(" Description: {};", stat.description);
             if (!stat.used_keys.empty())
-            {
-                String keys = fmt::format("{}", fmt::join(stat.used_keys, ","));
-                out << " Keys: " << keys << ";";
-            }
+                entry += fmt::format(" Keys: {};", fmt::join(stat.used_keys, ","));
             if (!stat.condition.empty())
-                out << " Condition: " << stat.condition << ";";
-            out << " Parts: " << stat.num_parts_after;
-            if (i)
-                out << '/' << index_stats[i - 1].num_parts_after;
-            out << ";";
-            out << " Granules: " << stat.num_granules_after;
-            if (i)
-                out << '/' << index_stats[i - 1].num_granules_after;
-            out << ";";
-            index_desc.name_and_detail.emplace_back(indexTypeToString(stat.type), out.str());
+                entry += fmt::format(" Condition: {};", stat.condition);
+            if (has_prev)
+                entry += fmt::format(" Parts: {}/{};", stat.num_parts_after, prev_parts);
+            else
+                entry += fmt::format(" Parts: {};", stat.num_parts_after);
+            if (has_prev)
+                entry += fmt::format(" Granules: {}/{};", stat.num_granules_after, prev_granules);
+            else
+                entry += fmt::format(" Granules: {};", stat.num_granules_after);
+            index_desc.name_and_detail.emplace_back(indexTypeToString(stat.type), std::move(entry));
+
+            auto stage = std::make_unique<JSONBuilder::JSONMap>();
+            stage->add("type", indexTypeToString(stat.type));
+            if (!stat.name.empty())
+                stage->add("name", stat.name);
+            if (!stat.condition.empty())
+                stage->add("condition", stat.condition);
+            if (!stat.used_keys.empty())
+                stage->add("keys", fmt::to_string(fmt::join(stat.used_keys, ",")));
+            stage->add("parts_after", stat.num_parts_after);
+            stage->add("granules_after", stat.num_granules_after);
+            stages_array->add(std::move(stage));
+
+            prev_parts = stat.num_parts_after;
+            prev_granules = stat.num_granules_after;
+            has_prev = true;
         }
-        index_desc.description = "Indexes";
-        attribute_descriptions.emplace(index_desc.description, std::move(index_desc));
+
+        auto idx_json = std::make_unique<JSONBuilder::JSONMap>();
+        idx_json->add("total_parts", result.total_parts);
+        idx_json->add("stages", std::move(stages_array));
+        WriteBufferFromOwnString idx_buf;
+        JSONBuilder::FormatSettings idx_fmt{.settings = {}};
+        JSONBuilder::FormatContext idx_ctx{.out = idx_buf};
+        idx_json->format(idx_fmt, idx_ctx);
+        index_desc.additional = idx_buf.str();
+
+        index_desc.description = RuntimeAttributeKeys::Indexes;
+        attribute_descriptions.insert_or_assign(RuntimeAttributeKeys::Indexes, std::move(index_desc));
     }
 
     RuntimeAttributeDescription parts_desc;
-    String selected_parts_info = fmt::format(
-        "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
+    parts_desc.description = fmt::format(
+        "Selected {}/{} parts by partition key ({} partitions), {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
         result.parts_before_pk,
         result.total_parts,
+        result.selected_partitions,
         result.selected_parts,
         result.selected_marks_pk,
         result.total_marks_pk,
         result.selected_marks,
         result.selected_ranges);
-    parts_desc.description = selected_parts_info;
-    attribute_descriptions.emplace("SelectParts", std::move(parts_desc));
+    attribute_descriptions.insert_or_assign(RuntimeAttributeKeys::SelectParts, std::move(parts_desc));
+
+}
+
+void ReadFromMergeTree::collectCacheStats()
+{
+    auto query_id = CurrentThread::getQueryId().toString();
+    LOG_DEBUG(log, "collectCacheStats: query_id={}", query_id);
+    if (query_id.empty())
+        return;
+    auto cache_stats = DiskCacheFactory::instance().consumeQueryCacheStats(query_id);
+    if (!cache_stats)
+    {
+        LOG_DEBUG(log, "collectCacheStats: no stats found for query_id={}", query_id);
+        return;
+    }
+    LOG_DEBUG(log, "collectCacheStats: data hit={} miss={} steal={} s3_fallback={} cache_bytes={} s3_bytes={} idx hit={} miss={} idx_cache_bytes={} idx_s3_bytes={}",
+        cache_stats->cache_hit_segs, cache_stats->cache_miss_segs,
+        cache_stats->steal_segs, cache_stats->s3_fallback_segs,
+        cache_stats->cache_bytes, cache_stats->s3_bytes,
+        cache_stats->idx_hit_segs, cache_stats->idx_miss_segs,
+        cache_stats->idx_cache_bytes, cache_stats->idx_s3_bytes);
+    JSONBuilder::JSONMap cache_map;
+    cache_map.add("cache_hit_segs",   cache_stats->cache_hit_segs);
+    cache_map.add("cache_miss_segs",  cache_stats->cache_miss_segs);
+    cache_map.add("steal_segs",       cache_stats->steal_segs);
+    cache_map.add("s3_fallback_segs", cache_stats->s3_fallback_segs);
+    cache_map.add("cache_bytes",      cache_stats->cache_bytes);
+    cache_map.add("s3_bytes",         cache_stats->s3_bytes);
+    cache_map.add("cache_read_ms",     cache_stats->cache_read_ms);
+    cache_map.add("cache_read_ms_max", cache_stats->cache_read_ms_max);
+    cache_map.add("cache_read_ms_min", cache_stats->cache_read_ms_min);
+    cache_map.add("s3_read_ms",        cache_stats->s3_read_ms);
+    cache_map.add("idx_hit_segs",     cache_stats->idx_hit_segs);
+    cache_map.add("idx_miss_segs",    cache_stats->idx_miss_segs);
+    cache_map.add("idx_cache_bytes",  cache_stats->idx_cache_bytes);
+    cache_map.add("idx_s3_bytes",     cache_stats->idx_s3_bytes);
+    cache_map.add("idx_cache_read_ms", cache_stats->idx_cache_read_ms);
+    cache_map.add("idx_s3_read_ms",   cache_stats->idx_s3_read_ms);
+    WriteBufferFromOwnString buf;
+    JSONBuilder::FormatSettings json_fmt{.settings = {}};
+    JSONBuilder::FormatContext fmt_ctx{.out = buf};
+    cache_map.format(json_fmt, fmt_ctx);
+    RuntimeAttributeDescription cache_desc;
+    cache_desc.description = buf.str();
+    uint64_t cache_wall_ms = cache_stats->reader_count > 0
+        ? cache_stats->cache_read_ms / cache_stats->reader_count
+        : cache_stats->cache_read_ms;
+    uint64_t s3_wall_ms = cache_stats->reader_count > 0
+        ? cache_stats->s3_read_ms / cache_stats->reader_count
+        : cache_stats->s3_read_ms;
+    cache_desc.name_and_detail.emplace_back("data",
+        fmt::format("data: hit={} miss={} steal={} s3={} cache={:.1f}MB ReadTime: {}ms[max={}ms, min={}ms] s3={:.1f}MB/{}ms",
+            cache_stats->cache_hit_segs, cache_stats->cache_miss_segs,
+            cache_stats->steal_segs, cache_stats->s3_fallback_segs,
+            cache_stats->cache_bytes / (1024.0 * 1024.0),
+            cache_wall_ms, cache_stats->cache_read_ms_max, cache_stats->cache_read_ms_min,
+            cache_stats->s3_bytes / (1024.0 * 1024.0), s3_wall_ms));
+    uint64_t idx_s3_wall_ms = cache_stats->idx_reader_count > 0
+        ? cache_stats->idx_s3_read_ms / cache_stats->idx_reader_count
+        : cache_stats->idx_s3_read_ms;
+    uint64_t idx_cache_wall_ms = cache_stats->idx_reader_count > 0
+        ? cache_stats->idx_cache_read_ms / cache_stats->idx_reader_count
+        : cache_stats->idx_cache_read_ms;
+    cache_desc.name_and_detail.emplace_back("idx",
+        fmt::format("idx: hit={} miss={} cache={:.1f}MB/{}ms s3={:.1f}MB/{}ms",
+            cache_stats->idx_hit_segs, cache_stats->idx_miss_segs,
+            cache_stats->idx_cache_bytes / (1024.0 * 1024.0), idx_cache_wall_ms,
+            cache_stats->idx_s3_bytes / (1024.0 * 1024.0), idx_s3_wall_ms));
+
+    if (auto * tg = CurrentThread::getGroup().get())
+    {
+        auto seek_us = tg->performance_counters[ProfileEvents::IndexGranuleSeekTime].load();
+        auto read_us = tg->performance_counters[ProfileEvents::IndexGranuleReadTime].load();
+        auto calc_us = tg->performance_counters[ProfileEvents::IndexGranuleCalcTime].load();
+        if (seek_us > 0 || read_us > 0 || calc_us > 0)
+            cache_desc.name_and_detail.emplace_back("idx_eval",
+                fmt::format("idx_eval: seek={}ms read={}ms calc={}ms",
+                    seek_us / 1000, read_us / 1000, calc_us / 1000));
+    }
+
+    attribute_descriptions.insert_or_assign(RuntimeAttributeKeys::CacheStats, std::move(cache_desc));
 }
 
 bool MergeTreeDataSelectAnalysisResult::error() const
diff --git a/src/QueryPlan/ReadFromMergeTree.h b/src/QueryPlan/ReadFromMergeTree.h
index 947754eaf25..1265d5cef69 100644
--- a/src/QueryPlan/ReadFromMergeTree.h
+++ b/src/QueryPlan/ReadFromMergeTree.h
@@ -149,6 +149,7 @@ class ReadFromMergeTree final : public ISourceStep
     std::shared_ptr<IQueryPlanStep> copy(ContextPtr ptr) const override;
 
     void fillRuntimeAttributeDescriptions(const ReadFromMergeTree::AnalysisResult & result);
+    void collectCacheStats();
 
     StorageID getStorageID() const { return data.getStorageID(); }
     UInt64 getSelectedParts() const { return selected_parts; }
diff --git a/src/QueryPlan/TableScanStep.cpp b/src/QueryPlan/TableScanStep.cpp
index 1d164347395..39ed06fceec 100644
--- a/src/QueryPlan/TableScanStep.cpp
+++ b/src/QueryPlan/TableScanStep.cpp
@@ -17,6 +17,7 @@
 #include <optional>
 #include <QueryPlan/TableScanStep.h>
 #include <QueryPlan/ExecutePlanElement.h>
+#include <QueryPlan/ReadFromMergeTree.h>
 
 #include <Analyzers/TypeAnalyzer.h>
 #include <Formats/FormatSettings.h>
@@ -1341,17 +1342,17 @@ void TableScanStep::initializePipeline(QueryPipeline & pipeline, const BuildQuer
             QueryPlanOptimizationSettings::fromContext(build_context.context),
             BuildQueryPipelineSettings::fromContext(build_context.context));
 
+        for (auto & node : storage_plan.getNodes())
         {
-            for (auto & node : storage_plan.getNodes())
+            if (!read_step && dynamic_cast<ReadFromMergeTree *>(node.step.get()))
+                read_step = node.step;
+            auto & att_descs = node.step->getAttributeDescriptions();
+            if (att_descs.empty())
+                continue;
+            for (auto & desc : att_descs)
             {
-                auto & att_descs = node.step->getAttributeDescriptions();
-                if (att_descs.empty())
-                    continue;
-                for (auto & desc : att_descs)
-                {
-                    if (!attribute_descriptions.contains(desc.first))
-                        attribute_descriptions.emplace(desc.first, desc.second);
-                }
+                if (!attribute_descriptions.contains(desc.first))
+                    attribute_descriptions.emplace(desc.first, desc.second);
             }
         }
 
@@ -1645,7 +1646,7 @@ void TableScanStep::initializePipeline(QueryPipeline & pipeline, const BuildQuer
     setStepDescription(step_desc.str());
     RuntimeAttributeDescription tablescan_desc;
     tablescan_desc.description = step_desc.str();
-    attribute_descriptions.emplace("TableScanDescription", tablescan_desc);
+    attribute_descriptions.emplace(RuntimeAttributeKeys::TableScanDescription, tablescan_desc);
 
     LOG_DEBUG(log, "init pipeline total run time: {} ms, table scan descriptiion: {}", total_watch.elapsedMillisecondsAsDouble(), step_desc.str());
 }
@@ -2086,4 +2087,18 @@ void TableScanStep::fillQueryInfoV2(ContextPtr context)
     query_info.index_context = std::make_shared<MergeTreeIndexContext>();
 }
 
+void TableScanStep::collectPostExecutionAttributes()
+{
+    auto * rmt = dynamic_cast<ReadFromMergeTree *>(read_step.get());
+    if (!rmt)
+        return;
+    rmt->collectCacheStats();
+    auto & rmt_descs = rmt->getAttributeDescriptions();
+    LOG_DEBUG(log, "collectPostExecutionAttributes: collected {} attribute(s) from ReadFromMergeTree, has_cache_stats={}",
+        rmt_descs.size(), rmt_descs.contains(RuntimeAttributeKeys::CacheStats));
+    for (auto & [k, v] : rmt_descs)
+        attribute_descriptions.insert_or_assign(k, v);
+    read_step.reset();
+}
+
 }
diff --git a/src/QueryPlan/TableScanStep.h b/src/QueryPlan/TableScanStep.h
index ce71efae9ae..f571bc4bba0 100644
--- a/src/QueryPlan/TableScanStep.h
+++ b/src/QueryPlan/TableScanStep.h
@@ -87,6 +87,7 @@ class TableScanStep : public ISourceStep
     Type getType() const override { return Type::TableScan; }
 
     void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override;
+    void collectPostExecutionAttributes();
     void toProto(Protos::TableScanStep & proto, bool for_hash_equals = false) const;
     static std::shared_ptr<TableScanStep> fromProto(const Protos::TableScanStep & proto, ContextPtr context);
 
@@ -219,6 +220,9 @@ class TableScanStep : public ISourceStep
 
     // Only for worker.
     bool is_null_source{false};
+    // Kept alive after initializePipeline to allow collectPostExecutionAttributes
+    // to harvest CacheStats after pipeline execution.
+    std::shared_ptr<IQueryPlanStep> read_step;
 
     // Optimises the where clauses for a bucket table by rewriting the IN clause and hence reducing the IN set size
     void rewriteInForBucketTable(ContextPtr context) const;
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 6c88fa3ce46..e113f6f4390 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -534,6 +534,16 @@ void HTTPHandler::processQuery(
         /// see also https://github.com/ClickHouse/ClickHouse/pull/26864
         context = Context::createCopy(session->context);
         context->setSessionContext(session->context);
+
+        // Re-apply per-request database/tenant_id: they were set on the old context
+        // before the session copy replaced it, so they must be restored explicitly.
+        if (!database.empty())
+            context->setCurrentDatabase(database);
+        if (!tenant_id.empty())
+        {
+            context->setSetting("tenant_id", tenant_id);
+            context->setTenantId(tenant_id);
+        }
     }
 
     SCOPE_EXIT({
diff --git a/src/Server/ServerPrometheusMetricsWriter.cpp b/src/Server/ServerPrometheusMetricsWriter.cpp
index 16503b45882..c8827f5b062 100644
--- a/src/Server/ServerPrometheusMetricsWriter.cpp
+++ b/src/Server/ServerPrometheusMetricsWriter.cpp
@@ -7,7 +7,10 @@
 #include <Server/ServerPrometheusMetricsWriter.h>
 #include <ServiceDiscovery/IServiceDiscovery.h>
 #include <Storages/StorageCnchMergeTree.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
 #include <Common/HistogramMetrics.h>
+#include <Common/HostWithPorts.h>
 #include <Common/LabelledMetrics.h>
 #include <Common/RpcClientPool.h>
 #include <Common/StringUtils/StringUtils.h>
@@ -16,6 +19,7 @@
 
 #include <algorithm>
 #include <atomic>
+#include <unordered_set>
 
 namespace DB
 {
@@ -570,6 +574,79 @@ void ServerPrometheusMetricsWriter::writePartMetrics(WriteBuffer & wb)
 
 }
 
+void ServerPrometheusMetricsWriter::writeTTLCacheMetrics(WriteBuffer & wb)
+{
+    auto caches = DiskCacheFactory::instance().getAllTableTTLCaches();
+    if (caches.empty())
+        return;
+
+    const String worker_id = getWorkerID(context);
+
+    static constexpr auto PREFIX = "byconity_ttl_cache_";
+
+    // Emit one gauge or counter line. TYPE/HELP are written once per metric name across all
+    // tables, so we track which names we've already emitted the header for.
+    std::unordered_set<String> headers_written;
+
+    auto emit = [&](const char * name, const char * type, const char * help,
+                    const MetricLabels & labels, size_t value)
+    {
+        String key{PREFIX};
+        key += name;
+        if (headers_written.insert(key).second)
+        {
+            writeOutLine(wb, "# HELP", key, help);
+            writeOutLine(wb, "# TYPE", key, type);
+        }
+        writeOutLine(wb, key + getLabel(labels), value);
+    };
+
+    for (auto & [uuid, cache_ptr] : caches)
+    {
+        auto * ttl = dynamic_cast<DiskCacheTTL *>(cache_ptr.get());
+        if (!ttl)
+            continue;
+
+        auto s = ttl->getStats();
+        const String table_name = cache_ptr->getName();
+        MetricLabels base{{"table_name", table_name}, {"worker_id", worker_id}};
+
+        // gauges — current state, can go up or down
+        emit("entries",        GAUGE_TYPE, "Segments currently cached on disk",               base, s.total_entries);
+        emit("bytes",          GAUGE_TYPE, "Bytes currently cached on disk",                  base, s.total_bytes);
+        emit("ttl_minutes",    GAUGE_TYPE, "Configured TTL window in minutes",                base, ttl->getTTLMinutes());
+        emit("max_size_bytes", GAUGE_TYPE, "Per-table size cap in bytes (0 = unlimited)",     base, ttl->getMaxSizeBytes());
+
+        // counters — monotonically increasing, use rate() in Prometheus
+        auto base_q = base; base_q.insert({"write_type", "query"});
+        auto base_p = base; base_p.insert({"write_type", "preload"});
+        emit("segments_written_total", COUNTER_TYPE, "Segments written to TTL cache",         base_q, s.cached_from_query);
+        emit("segments_written_total", COUNTER_TYPE, "",                                      base_p, s.cached_from_preload);
+        emit("bytes_written_total",    COUNTER_TYPE, "Bytes written to TTL cache",            base_q, s.cached_bytes_query);
+        emit("bytes_written_total",    COUNTER_TYPE, "",                                      base_p, s.cached_bytes_preload);
+
+        auto base_exp  = base; base_exp.insert({"eviction_type",  "expired"});
+        auto base_size = base; base_size.insert({"eviction_type", "size_limit"});
+        emit("evictions_total", COUNTER_TYPE, "Segments evicted from TTL cache",              base_exp,  s.evicted_expired);
+        emit("evictions_total", COUNTER_TYPE, "",                                             base_size, s.evicted_size_limit);
+
+        emit("async_evictions_triggered_total", COUNTER_TYPE, "Async eviction trigger count", base, s.async_eviction_triggered);
+
+        auto base_old    = base; base_old.insert({"reason",    "too_old"});
+        auto base_ntime  = base; base_ntime.insert({"reason",  "non_time_partition"});
+        emit("rejections_total", COUNTER_TYPE, "Segments rejected from TTL cache",            base_old,   s.rejected_too_old);
+        emit("rejections_total", COUNTER_TYPE, "",                                            base_ntime, s.rejected_non_time_partition);
+
+        emit("hits_total",   COUNTER_TYPE, "Cache segment read hits",   base, s.total_hits);
+        emit("misses_total", COUNTER_TYPE, "Cache segment read misses", base, s.total_misses);
+    }
+
+    // global gauges — no table label
+    MetricLabels wlabel{{"worker_id", worker_id}};
+    emit("global_bytes",       GAUGE_TYPE, "Total bytes across all TTL caches on this worker", wlabel, DiskCacheFactory::instance().getGlobalTTLUsage());
+    emit("global_limit_bytes", GAUGE_TYPE, "Global TTL cache limit on this worker",            wlabel, DiskCacheFactory::instance().getGlobalTTLLimit());
+}
+
 void ServerPrometheusMetricsWriter::write(WriteBuffer & wb)
 {
     writeConfigMetrics(wb);
@@ -590,5 +667,7 @@ void ServerPrometheusMetricsWriter::write(WriteBuffer & wb)
 
     /// Export the parts related metrics, the values are consistent with the system.cnch_parts
     writePartMetrics(wb);
+
+    writeTTLCacheMetrics(wb);
 }
 }
diff --git a/src/Server/ServerPrometheusMetricsWriter.h b/src/Server/ServerPrometheusMetricsWriter.h
index ead674ee323..3cfe2c4dbfe 100644
--- a/src/Server/ServerPrometheusMetricsWriter.h
+++ b/src/Server/ServerPrometheusMetricsWriter.h
@@ -507,50 +507,40 @@ namespace ProfileEvents
     // extern const Event DropAccessPolicyFailed;
     extern const Event IsHostServerSuccess;
     extern const Event IsHostServerFailed;
-    // extern const Event S3GETMicroseconds;
-    // extern const Event S3GETBytes;
-    // extern const Event S3GETRequestsCount;
-    // extern const Event S3GETRequestsErrors;
-    // extern const Event S3GETRequestsThrottling;
-    // extern const Event S3GETRequestsRedirects;
-    // extern const Event S3HEADMicroseconds;
-    // extern const Event S3HEADBytes;
-    // extern const Event S3HEADRequestsCount;
-    // extern const Event S3HEADRequestsErrors;
-    // extern const Event S3HEADRequestsThrottling;
-    // extern const Event S3HEADRequestsRedirects;
-    // extern const Event S3POSTMicroseconds;
-    // extern const Event S3POSTBytes;
-    // extern const Event S3POSTRequestsCount;
-    // extern const Event S3POSTRequestsErrors;
-    // extern const Event S3POSTRequestsThrottling;
-    // extern const Event S3POSTRequestsRedirects;
-    // extern const Event S3DELETEMicroseconds;
-    // extern const Event S3DELETEBytes;
-    // extern const Event S3DELETERequestsCount;
-    // extern const Event S3DELETERequestsErrors;
-    // extern const Event S3DELETERequestsThrottling;
-    // extern const Event S3DELETERequestsRedirects;
-    // extern const Event S3PATCHMicroseconds;
-    // extern const Event S3PATCHBytes;
-    // extern const Event S3PATCHRequestsCount;
-    // extern const Event S3PATCHRequestsErrors;
-    // extern const Event S3PATCHRequestsThrottling;
-    // extern const Event S3PATCHRequestsRedirects;
-    // extern const Event S3PUTMicroseconds;
-    // extern const Event S3PUTBytes;
-    // extern const Event S3PUTRequestsCount;
-    // extern const Event S3PUTRequestsErrors;
-    // extern const Event S3PUTRequestsThrottling;
-    // extern const Event S3PUTRequestsRedirects;
-    // extern const Event WriteBufferFromS3WriteMicroseconds;
-    // extern const Event WriteBufferFromS3WriteBytes;
-    // extern const Event WriteBufferFromS3WriteErrors;
-    // extern const Event ReadFromS3BufferCount;
-    // extern const Event ReadBufferFromS3ReadFailed;
-    // extern const Event ReadBufferFromS3ReadBytes;
-    // extern const Event ReadBufferFromS3ReadMicroseconds;
-    // extern const Event S3ReadAheadReaderRead;
+    extern const Event UncompressedCacheHits;
+    extern const Event UncompressedCacheMisses;
+    extern const Event MarkCacheHits;
+    extern const Event MarkCacheMisses;
+    extern const Event DiskCacheDataHits;
+    extern const Event DiskCacheDataMisses;
+    extern const Event DiskCacheIdxHits;
+    extern const Event DiskCacheIdxMisses;
+    extern const Event DiskCacheUncompressedHit;
+    extern const Event DiskCacheUncompressedMiss;
+    extern const Event DiskCacheDecompressMicroseconds;
+    extern const Event DiskCacheDiskReadMicroseconds;
+    extern const Event IndexGranuleSeekTime;
+    extern const Event IndexGranuleReadTime;
+    extern const Event IndexGranuleCalcTime;
+    extern const Event NetworkReceiveElapsedMicroseconds;
+    extern const Event NetworkReceiveBytes;
+    extern const Event NetworkSendBytes;
+    extern const Event S3ReadMicroseconds;
+    extern const Event S3ReadRequestsThrottling;
+    extern const Event ReadBufferFromS3ReadCount;
+    extern const Event PrewhereSelectedMarks;
+    extern const Event PocoHTTPS3GetCount;
+    extern const Event S3ReadRequestsCount;
+    extern const Event S3ReadRequestsErrors;
+    extern const Event ReadBufferFromS3ReadBytes;
+    extern const Event ReadBufferFromS3ReadMicroseconds;
+    extern const Event PFRAWSReadBufferReadCount;
+    extern const Event PFRAWSReadBufferPrefetchCount;
+    extern const Event PFRAWSReadBufferPrefetchUtilCount;
+    extern const Event PFRAWSReadBufferPrefetchWaitMicro;
+    extern const Event PFRAWSReadBufferRemoteReadCount;
+    extern const Event PFRAWSReadBufferRemoteReadBytes;
+    extern const Event PFRAWSReadBufferReadMicro;
     extern const Event QueryMemoryLimitExceeded;
     extern const Event InsertQuery;
     extern const Event Merge;
@@ -716,6 +706,7 @@ class ServerPrometheusMetricsWriter : public IPrometheusMetricsWriter
     void writeHistogramMetrics(WriteBuffer & wb);
     void writeInternalMetrics(WriteBuffer & wb);
     void writePartMetrics(WriteBuffer & wb);
+    void writeTTLCacheMetrics(WriteBuffer & wb);
 
     static constexpr auto MAX_CONCURRENT_DEFAULT_QUERIES_KEY = "max_concurrent_default_queries";
     static constexpr auto MAX_CONCURRENT_INSERT_QUERIES_KEY = "max_concurrent_insert_queries";
@@ -1209,51 +1200,46 @@ class ServerPrometheusMetricsWriter : public IPrometheusMetricsWriter
         ProfileEvents::UniqueKeyIndexMetaCacheMiss,
         ProfileEvents::UniqueKeyIndexBlockCacheHit,
         ProfileEvents::UniqueKeyIndexBlockCacheMiss,
+        /// About uncompressed/mark cache
+        ProfileEvents::UncompressedCacheHits,
+        ProfileEvents::UncompressedCacheMisses,
+        ProfileEvents::MarkCacheHits,
+        ProfileEvents::MarkCacheMisses,
+        /// About TTL disk cache hit/miss
+        ProfileEvents::DiskCacheDataHits,
+        ProfileEvents::DiskCacheDataMisses,
+        ProfileEvents::DiskCacheIdxHits,
+        ProfileEvents::DiskCacheIdxMisses,
+        ProfileEvents::DiskCacheUncompressedHit,
+        ProfileEvents::DiskCacheUncompressedMiss,
+        ProfileEvents::DiskCacheDecompressMicroseconds,
+        ProfileEvents::DiskCacheDiskReadMicroseconds,
+        /// About index granule
+        ProfileEvents::IndexGranuleSeekTime,
+        ProfileEvents::IndexGranuleReadTime,
+        ProfileEvents::IndexGranuleCalcTime,
+        /// About network
+        ProfileEvents::NetworkReceiveElapsedMicroseconds,
+        ProfileEvents::NetworkReceiveBytes,
+        ProfileEvents::NetworkSendBytes,
         /// About s3
-        // ProfileEvents::S3GETMicroseconds,
-        // ProfileEvents::S3GETBytes,
-        // ProfileEvents::S3GETRequestsCount,
-        // ProfileEvents::S3GETRequestsErrors,
-        // ProfileEvents::S3GETRequestsThrottling,
-        // ProfileEvents::S3GETRequestsRedirects,
-        // ProfileEvents::S3HEADMicroseconds,
-        // ProfileEvents::S3HEADBytes,
-        // ProfileEvents::S3HEADRequestsCount,
-        // ProfileEvents::S3HEADRequestsErrors,
-        // ProfileEvents::S3HEADRequestsThrottling,
-        // ProfileEvents::S3HEADRequestsRedirects,
-        // ProfileEvents::S3POSTMicroseconds,
-        // ProfileEvents::S3POSTBytes,
-        // ProfileEvents::S3POSTRequestsCount,
-        // ProfileEvents::S3POSTRequestsErrors,
-        // ProfileEvents::S3POSTRequestsThrottling,
-        // ProfileEvents::S3POSTRequestsRedirects,
-        // ProfileEvents::S3DELETEMicroseconds,
-        // ProfileEvents::S3DELETEBytes,
-        // ProfileEvents::S3DELETERequestsCount,
-        // ProfileEvents::S3DELETERequestsErrors,
-        // ProfileEvents::S3DELETERequestsThrottling,
-        // ProfileEvents::S3DELETERequestsRedirects,
-        // ProfileEvents::S3PATCHMicroseconds,
-        // ProfileEvents::S3PATCHBytes,
-        // ProfileEvents::S3PATCHRequestsCount,
-        // ProfileEvents::S3PATCHRequestsErrors,
-        // ProfileEvents::S3PATCHRequestsThrottling,
-        // ProfileEvents::S3PATCHRequestsRedirects,
-        // ProfileEvents::S3PUTMicroseconds,
-        // ProfileEvents::S3PUTBytes,
-        // ProfileEvents::S3PUTRequestsCount,
-        // ProfileEvents::S3PUTRequestsErrors,
-        // ProfileEvents::S3PUTRequestsThrottling,
-        // ProfileEvents::S3PUTRequestsRedirects,
-        // ProfileEvents::WriteBufferFromS3WriteMicroseconds,
-        // ProfileEvents::WriteBufferFromS3WriteBytes,
-        // ProfileEvents::WriteBufferFromS3WriteErrors,
-        // ProfileEvents::ReadBufferFromS3Read,
-        // ProfileEvents::ReadBufferFromS3ReadFailed,
-        // ProfileEvents::ReadBufferFromS3ReadBytes,
-        // ProfileEvents::ReadBufferFromS3ReadMicroseconds,
-        // ProfileEvents::S3ReadAheadReaderRead,
+        ProfileEvents::S3ReadMicroseconds,
+        ProfileEvents::S3ReadRequestsCount,
+        ProfileEvents::S3ReadRequestsErrors,
+        ProfileEvents::S3ReadRequestsThrottling,
+        ProfileEvents::ReadBufferFromS3ReadBytes,
+        ProfileEvents::ReadBufferFromS3ReadMicroseconds,
+        ProfileEvents::ReadBufferFromS3ReadCount,
+        ProfileEvents::PrewhereSelectedMarks,
+        ProfileEvents::PocoHTTPS3GetCount,
+        /// About PFRA (active path when enable_io_pfra=true)
+        ProfileEvents::PFRAWSReadBufferReadCount,
+        ProfileEvents::PFRAWSReadBufferPrefetchCount,
+        ProfileEvents::PFRAWSReadBufferPrefetchUtilCount,
+        ProfileEvents::PFRAWSReadBufferPrefetchWaitMicro,
+        ProfileEvents::PFRAWSReadBufferRemoteReadCount,
+        ProfileEvents::PFRAWSReadBufferRemoteReadBytes,
+        ProfileEvents::PFRAWSReadBufferReadMicro,
         ProfileEvents::QueryMemoryLimitExceeded,
         ProfileEvents::InsertQuery,
         ProfileEvents::Merge,
diff --git a/src/Storages/DiskCache/DiskCacheFactory.cpp b/src/Storages/DiskCache/DiskCacheFactory.cpp
index 071e2f55ca8..25ad6dd8ea8 100644
--- a/src/Storages/DiskCache/DiskCacheFactory.cpp
+++ b/src/Storages/DiskCache/DiskCacheFactory.cpp
@@ -17,14 +17,21 @@
 #include <cstddef>
 #include <memory>
 
+#include <Catalog/Catalog.h>
+#include <Common/HostWithPorts.h>
+#include <Core/UUID.h>
 #include <Disks/IStoragePolicy.h>
 #include <Interpreters/Context.h>
 #include <Storages/DiskCache/DiskCacheLRU.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
 #include <Storages/DiskCache/DiskCacheSettings.h>
 #include <Storages/DiskCache/DiskCacheSimpleStrategy.h>
+#include <Storages/DiskCache/TTLCacheFDBIndex.h>
 #include <common/logger_useful.h>
 #include <Disks/SingleDiskVolume.h>
 #include <Storages/DiskCache/IDiskCache.h>
+#include <ResourceManagement/CommonData.h>
+#include <ResourceManagement/ResourceManagerClient.h>
 
 namespace DB
 {
@@ -108,6 +115,127 @@ void DiskCacheFactory::shutdown()
     IDiskCache::close();
 }
 
+size_t DiskCacheFactory::getGlobalTTLLimit() const
+{
+    auto it = caches.find(DiskCacheType::MergeTree);
+    if (it != caches.end() && it->second)
+        return it->second->getSettings().ttl_cache_max_size;
+    return 0;
+}
+
+IDiskCachePtr DiskCacheFactory::createDiskCacheFromTableSettings(
+    const String & table_name,
+    const UUID & table_uuid,
+    Context & context,
+    const ThrottlerPtr & throttler,
+    UInt64 ttl_minutes,
+    size_t max_size_bytes)
+{
+    Poco::Logger * log = &Poco::Logger::get("DiskCacheFactory");
+    DiskCacheSettings cache_settings;
+    {
+        auto it = caches.find(DiskCacheType::MergeTree);
+        if (it != caches.end() && it->second)
+            cache_settings = it->second->getSettings();
+    }
+
+    // Resolve effective limit before any comparison: 0 means "use global limit".
+    // Multiple tables should each have an explicit per-table limit; the global limit
+    // is the single-table default.
+    size_t effective_max_size = max_size_bytes > 0 ? max_size_bytes : cache_settings.ttl_cache_max_size;
+
+    // Check registry first (for worker reuse).
+    // Compare against effective_max_size so callers passing 0 (no per-table override)
+    // don't spuriously trigger recreation of a cache that was already created with the global limit.
+    {
+        std::lock_guard<std::mutex> lock(ttl_cache_registry_mutex);
+        auto reg_it = per_table_ttl_caches.find(table_uuid);
+        if (reg_it != per_table_ttl_caches.end())
+        {
+            auto existing = static_pointer_cast<DiskCacheTTL>(reg_it->second);
+            if (existing->getTTLMinutes() == ttl_minutes && existing->getMaxSizeBytes() == effective_max_size)
+            {
+                LOG_TRACE(log, "Reusing existing TTL cache for {} (UUID: {})", table_name, UUIDHelpers::UUIDToString(table_uuid));
+                return reg_it->second;
+            }
+            LOG_INFO(log, "TTL cache settings changed for {} (UUID: {}), updating in place (ttl: {}->{}min, max_size: {}->{}bytes)",
+                table_name, UUIDHelpers::UUIDToString(table_uuid),
+                existing->getTTLMinutes(), ttl_minutes,
+                existing->getMaxSizeBytes(), effective_max_size);
+            existing->updateSettings(ttl_minutes, effective_max_size);
+            return reg_it->second;
+        }
+    }
+
+    // Get volume from ttl_disk_policy
+    // defaults to disk_policy if not set
+    VolumePtr volume = context.getStoragePolicy(cache_settings.ttl_disk_policy)->getVolumeByName("local", true);
+
+    // Per-table cache is always TTL-based
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(cache_settings);
+    auto cache = std::make_shared<DiskCacheTTL>(
+        table_name, UUIDHelpers::UUIDToString(table_uuid), volume, throttler, cache_settings, strategy, ttl_minutes, effective_max_size);
+
+    LOG_INFO(log, "Created per-table TTL cache for {} (UUID: {}, TTL: {} minutes, max_size: {}GB, policy: {})",
+        table_name, UUIDHelpers::UUIDToString(table_uuid), ttl_minutes, effective_max_size / (1024*1024*1024), cache_settings.ttl_disk_policy);
+
+    if (auto catalog = context.getCnchCatalog())
+    {
+        try
+        {
+            auto metastore = catalog->getMetastore();
+            String ns = context.getCnchConfigRef().getString("catalog.name_space", "default");
+            String worker_id = getWorkerID(context.shared_from_this());
+            String uuid_str = UUIDHelpers::UUIDToString(table_uuid);
+            // Pass worker_id (not IP) as own identity — stable across pod restarts.
+            // The DCIREV_ reverse index stores worker_id values; findPeerOwner resolves
+            // them to host:port at runtime via DiskCacheFactory::resolveWorkerEndpoint.
+            auto fdb_idx = std::make_shared<TTLCacheFDBIndex>(metastore, ns, worker_id, uuid_str, worker_id);
+            static_pointer_cast<DiskCacheTTL>(cache)->setFDBIndex(std::move(fdb_idx));
+
+            // Set up worker endpoint resolver on first use (captures rm_client shared_ptr).
+            if (!worker_endpoint_resolver)
+            {
+                auto rm = context.getResourceManagerClient();
+                worker_endpoint_resolver = [rm]() -> std::unordered_map<String, String> {
+                    std::unordered_map<String, String> result;
+                    if (!rm)
+                        return result;
+                    std::vector<WorkerNodeResourceData> workers;
+                    try { rm->getAllWorkers(workers); }
+                    catch (...) { return result; }
+                    for (const auto & w : workers)
+                        if (!w.id.empty())
+                            result[w.id] = w.host_ports.getRPCAddress();
+                    return result;
+                };
+            }
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Failed to create TTLCacheFDBIndex, cache will use disk scan on restart");
+        }
+    }
+
+    // Insert into registry with re-check: if another thread won the race, discard ours.
+    // load() is called only on the winner so only one disk scan runs per table UUID.
+    {
+        std::lock_guard<std::mutex> lock(ttl_cache_registry_mutex);
+        auto [it, inserted] = per_table_ttl_caches.emplace(table_uuid, cache);
+        if (!inserted)
+        {
+            LOG_TRACE(log, "Reusing TTL cache created concurrently for {} (UUID: {})", table_name, UUIDHelpers::UUIDToString(table_uuid));
+            return it->second;
+        }
+    }
+
+    // Schedule disk scan only for the winning cache object.
+    auto & thread_pool = IDiskCache::getThreadPool();
+    thread_pool.scheduleOrThrowOnError([cache] { cache->load(); });
+
+    return cache;
+}
+
 void DiskCacheFactory::addNewCache(Context & context, const std::string & cache_name, bool create_default)
 {
     Poco::Logger * log{&Poco::Logger::get("DiskCacheFactory")};
@@ -144,6 +272,18 @@ void DiskCacheFactory::addNewCache(Context & context, const std::string & cache_
                 cache_settings.lru_max_nums));
     }
 
+    // Resolve global TTL cache limit — use TTL disk space when a separate ttl_disk_policy is configured
+    auto ttl_total_space_unlimited = !cache_settings.ttl_disk_policy.empty()
+        ? context.getStoragePolicy(cache_settings.ttl_disk_policy)->getVolumeByName("local", true)->getTotalSpace(true)
+        : total_space_unlimited;
+    cache_settings.ttl_cache_max_size = (cache_settings.ttl_cache_max_size > 0)
+        ? cache_settings.ttl_cache_max_size
+        : static_cast<size_t>(ttl_total_space_unlimited.bytes * (cache_settings.ttl_cache_max_percent / 100.0));
+
+    LOG_INFO(log, "{} cache: TTL global limit {}GB",
+             cache_name, cache_settings.ttl_cache_max_size / (1024*1024*1024));
+
+    // Global cache always uses LRU (TTL cache is per-table only)
     if (!cache_settings.meta_cache_size_ratio)
     {
         auto disk_cache = std::make_shared<DiskCacheLRU>(
@@ -167,4 +307,99 @@ void DiskCacheFactory::addNewCache(Context & context, const std::string & cache_
     }
 }
 
+void DiskCacheFactory::mergeQueryCacheStats(const String & query_id, const QueryCacheStatsSnapshot & local)
+{
+    if (local.empty())
+        return;
+
+    std::shared_ptr<QueryCacheStats> entry;
+    {
+        std::shared_lock rl(query_cache_stats_mutex);
+        auto it = query_cache_stats_map.find(query_id);
+        if (it != query_cache_stats_map.end())
+            entry = it->second;
+    }
+    if (!entry)
+    {
+        std::unique_lock wl(query_cache_stats_mutex);
+        auto [it, inserted] = query_cache_stats_map.emplace(query_id, std::make_shared<QueryCacheStats>());
+        entry = it->second;
+    }
+    // Lock-free updates after entry is visible
+    entry->cache_hit_segs.fetch_add(local.cache_hit_segs, std::memory_order_relaxed);
+    entry->cache_miss_segs.fetch_add(local.cache_miss_segs, std::memory_order_relaxed);
+    entry->steal_segs.fetch_add(local.steal_segs, std::memory_order_relaxed);
+    entry->s3_fallback_segs.fetch_add(local.s3_fallback_segs, std::memory_order_relaxed);
+    entry->cache_bytes.fetch_add(local.cache_bytes, std::memory_order_relaxed);
+    entry->s3_bytes.fetch_add(local.s3_bytes, std::memory_order_relaxed);
+    entry->cache_read_ms.fetch_add(local.cache_read_ms, std::memory_order_relaxed);
+    entry->s3_read_ms.fetch_add(local.s3_read_ms, std::memory_order_relaxed);
+    entry->reader_count.fetch_add(1, std::memory_order_relaxed);
+    for (auto cur = entry->cache_read_ms_max.load(std::memory_order_relaxed);
+         local.cache_read_ms > cur && !entry->cache_read_ms_max.compare_exchange_weak(cur, local.cache_read_ms, std::memory_order_relaxed);)
+        ;
+    for (auto cur = entry->cache_read_ms_min.load(std::memory_order_relaxed);
+         local.cache_read_ms < cur && !entry->cache_read_ms_min.compare_exchange_weak(cur, local.cache_read_ms, std::memory_order_relaxed);)
+        ;
+    entry->idx_hit_segs.fetch_add(local.idx_hit_segs, std::memory_order_relaxed);
+    entry->idx_miss_segs.fetch_add(local.idx_miss_segs, std::memory_order_relaxed);
+    entry->idx_cache_bytes.fetch_add(local.idx_cache_bytes, std::memory_order_relaxed);
+    entry->idx_s3_bytes.fetch_add(local.idx_s3_bytes, std::memory_order_relaxed);
+    entry->idx_cache_read_ms.fetch_add(local.idx_cache_read_ms, std::memory_order_relaxed);
+    entry->idx_s3_read_ms.fetch_add(local.idx_s3_read_ms, std::memory_order_relaxed);
+    if (local.idx_hit_segs > 0 || local.idx_miss_segs > 0 || local.idx_cache_bytes > 0 || local.idx_s3_bytes > 0)
+        entry->idx_reader_count.fetch_add(1, std::memory_order_relaxed);
+}
+
+std::optional<QueryCacheStatsSnapshot> DiskCacheFactory::consumeQueryCacheStats(const String & query_id)
+{
+    std::unique_lock wl(query_cache_stats_mutex);
+    auto it = query_cache_stats_map.find(query_id);
+    if (it == query_cache_stats_map.end())
+        return std::nullopt;
+
+    const auto & e = *it->second;
+    QueryCacheStatsSnapshot snap;
+    snap.cache_hit_segs   = e.cache_hit_segs.load(std::memory_order_relaxed);
+    snap.cache_miss_segs  = e.cache_miss_segs.load(std::memory_order_relaxed);
+    snap.steal_segs       = e.steal_segs.load(std::memory_order_relaxed);
+    snap.s3_fallback_segs = e.s3_fallback_segs.load(std::memory_order_relaxed);
+    snap.cache_bytes      = e.cache_bytes.load(std::memory_order_relaxed);
+    snap.s3_bytes         = e.s3_bytes.load(std::memory_order_relaxed);
+    snap.cache_read_ms     = e.cache_read_ms.load(std::memory_order_relaxed);
+    snap.cache_read_ms_max = e.cache_read_ms_max.load(std::memory_order_relaxed);
+    auto raw_min           = e.cache_read_ms_min.load(std::memory_order_relaxed);
+    snap.cache_read_ms_min = (raw_min == UINT64_MAX) ? 0 : raw_min;
+    snap.s3_read_ms        = e.s3_read_ms.load(std::memory_order_relaxed);
+    snap.reader_count      = e.reader_count.load(std::memory_order_relaxed);
+    snap.idx_hit_segs     = e.idx_hit_segs.load(std::memory_order_relaxed);
+    snap.idx_miss_segs    = e.idx_miss_segs.load(std::memory_order_relaxed);
+    snap.idx_cache_bytes  = e.idx_cache_bytes.load(std::memory_order_relaxed);
+    snap.idx_s3_bytes     = e.idx_s3_bytes.load(std::memory_order_relaxed);
+    snap.idx_cache_read_ms = e.idx_cache_read_ms.load(std::memory_order_relaxed);
+    snap.idx_s3_read_ms   = e.idx_s3_read_ms.load(std::memory_order_relaxed);
+    snap.idx_reader_count  = e.idx_reader_count.load(std::memory_order_relaxed);
+    query_cache_stats_map.erase(it);
+    return snap;
+}
+
+
+std::optional<String> DiskCacheFactory::resolveWorkerEndpoint(const String & worker_id)
+{
+    if (!worker_endpoint_resolver)
+        return std::nullopt;
+
+    std::lock_guard lk(worker_endpoint_cache_mutex);
+    time_t now = time(nullptr);
+    if (now - worker_endpoint_cache_refresh_time >= WORKER_ENDPOINT_CACHE_TTL_SEC)
+    {
+        worker_endpoint_cache = worker_endpoint_resolver();
+        worker_endpoint_cache_refresh_time = now;
+    }
+    auto it = worker_endpoint_cache.find(worker_id);
+    if (it != worker_endpoint_cache.end())
+        return it->second;
+    return std::nullopt;
+}
+
 }
diff --git a/src/Storages/DiskCache/DiskCacheFactory.h b/src/Storages/DiskCache/DiskCacheFactory.h
index ecd95f8763e..331585eeb29 100644
--- a/src/Storages/DiskCache/DiskCacheFactory.h
+++ b/src/Storages/DiskCache/DiskCacheFactory.h
@@ -15,9 +15,16 @@
 
 #pragma once
 
+#include <Core/UUID.h>
 #include <Storages/DiskCache/DiskCache_fwd.h>
 #include <common/singleton.h>
+#include <common/types.h>
+#include <atomic>
+#include <functional>
+#include <optional>
+#include <shared_mutex>
 #include <unordered_map>
+#include <vector>
 #include <Poco/Exception.h>
 
 namespace DB::ErrorCodes
@@ -29,6 +36,61 @@ extern const int LOGICAL_ERROR;
 namespace DB
 {
 class Context;
+class IVolume;
+class Throttler;
+using VolumePtr = std::shared_ptr<IVolume>;
+using ThrottlerPtr = std::shared_ptr<Throttler>;
+
+/// Per-query cache stats accumulated on workers and surfaced via segment profiles.
+struct QueryCacheStats
+{
+    std::atomic<size_t> cache_hit_segs{0};    // data segments served from local TTL cache
+    std::atomic<size_t> cache_miss_segs{0};   // data segments not found in local cache
+    std::atomic<size_t> steal_segs{0};        // segments fetched from peer via steal RPC
+    std::atomic<size_t> s3_fallback_segs{0};  // data segments read directly from S3
+    std::atomic<size_t> cache_bytes{0};       // bytes through cache_buffer for data (local + steal)
+    std::atomic<size_t> s3_bytes{0};          // bytes through source_buffer for data (S3)
+    std::atomic<uint64_t> cache_read_ms{0};
+    std::atomic<uint64_t> cache_read_ms_max{0};
+    std::atomic<uint64_t> cache_read_ms_min{UINT64_MAX};
+    std::atomic<uint64_t> s3_read_ms{0};
+    std::atomic<size_t> reader_count{0};
+    // Skip-index segment counters (extension .idx)
+    std::atomic<size_t> idx_hit_segs{0};
+    std::atomic<size_t> idx_miss_segs{0};
+    std::atomic<size_t> idx_cache_bytes{0};
+    std::atomic<size_t> idx_s3_bytes{0};
+    std::atomic<uint64_t> idx_cache_read_ms{0};
+    std::atomic<uint64_t> idx_s3_read_ms{0};
+    std::atomic<size_t> idx_reader_count{0};
+};
+
+/// Plain snapshot, used for local accumulation and return values.
+struct QueryCacheStatsSnapshot
+{
+    size_t cache_hit_segs{0};
+    size_t cache_miss_segs{0};
+    size_t steal_segs{0};
+    size_t s3_fallback_segs{0};
+    size_t cache_bytes{0};
+    size_t s3_bytes{0};
+    uint64_t cache_read_ms{0};
+    uint64_t cache_read_ms_max{0};
+    uint64_t cache_read_ms_min{0};
+    uint64_t s3_read_ms{0};
+    size_t reader_count{0};
+    // Skip-index segment counters (extension .idx)
+    size_t idx_hit_segs{0};
+    size_t idx_miss_segs{0};
+    size_t idx_cache_bytes{0};
+    size_t idx_s3_bytes{0};
+    uint64_t idx_cache_read_ms{0};
+    uint64_t idx_s3_read_ms{0};
+    size_t idx_reader_count{0};
+
+    bool empty() const { return cache_hit_segs == 0 && cache_miss_segs == 0 && steal_segs == 0 && s3_fallback_segs == 0
+        && idx_hit_segs == 0 && idx_miss_segs == 0; }
+};
 
 enum class DiskCacheType {
     File, // for generic file disk cache
@@ -66,8 +128,66 @@ class DiskCacheFactory : public ext::singleton<DiskCacheFactory>
         return it->second;
     }
 
+    /// Create per-table TTL cache instance from table settings
+    IDiskCachePtr createDiskCacheFromTableSettings(
+        const String & table_name,
+        const UUID & table_uuid,
+        Context & context,
+        const ThrottlerPtr & throttler,
+        UInt64 ttl_minutes,
+        size_t max_size_bytes = 0);
+
+    /// Return a snapshot of all registered per-table TTL caches (UUID → cache ptr).
+    std::unordered_map<UUID, IDiskCachePtr> getAllTableTTLCaches() const
+    {
+        std::lock_guard<std::mutex> lock(ttl_cache_registry_mutex);
+        return per_table_ttl_caches;
+    }
+
+    /// Remove a per-table TTL cache entry from the registry.
+    /// Called when disk_cache_ttl_hours is set to 0 so re-enabling creates a fresh object.
+    void removeTableTTLCache(const UUID & table_uuid)
+    {
+        std::lock_guard<std::mutex> lock(ttl_cache_registry_mutex);
+        per_table_ttl_caches.erase(table_uuid);
+    }
+
+    /// Global TTL cache usage tracking
+    /// shared across all per-table TTL caches
+    void addGlobalTTLUsage(size_t bytes) { global_ttl_cache_usage.fetch_add(bytes); }
+    void releaseGlobalTTL(size_t bytes) { global_ttl_cache_usage.fetch_sub(bytes); }
+    size_t getGlobalTTLUsage() const { return global_ttl_cache_usage.load(); }
+    size_t getGlobalTTLLimit() const;
+
+    /// Per-query cache stats registry.
+    /// unique_lock only for first insertion, then atomic fetch_add on the fields.
+    void mergeQueryCacheStats(const String & query_id, const QueryCacheStatsSnapshot & local);
+    std::optional<QueryCacheStatsSnapshot> consumeQueryCacheStats(const String & query_id);
+
+    /// Resolve a stable worker_id (e.g. byconity-vw-vw-default-0) to its current RPC
+    /// host:port by querying the Resource Manager. Result cached for 30 seconds.
+    std::optional<String> resolveWorkerEndpoint(const String & worker_id);
+
 private:
     void addNewCache(Context & context, const std::string & cache_name, bool create_default);
     std::unordered_map<DiskCacheType, IDiskCachePtr> caches;
+
+    /// Per-table TTL cache registry (for workers)
+    std::unordered_map<UUID, IDiskCachePtr> per_table_ttl_caches;
+    mutable std::mutex ttl_cache_registry_mutex;
+
+    /// Global TTL cache usage tracking
+    std::atomic<size_t> global_ttl_cache_usage{0};
+
+    /// Per-query cache stats (query_id → shared stats object)
+    std::unordered_map<String, std::shared_ptr<QueryCacheStats>> query_cache_stats_map;
+    mutable std::shared_mutex query_cache_stats_mutex;
+
+    /// Worker endpoint resolution: worker_id → host:port, refreshed every 30s from RM.
+    std::function<std::unordered_map<String, String>()> worker_endpoint_resolver;
+    mutable std::mutex worker_endpoint_cache_mutex;
+    std::unordered_map<String, String> worker_endpoint_cache;
+    time_t worker_endpoint_cache_refresh_time{0};
+    static constexpr int WORKER_ENDPOINT_CACHE_TTL_SEC = 30;
 };
 }
diff --git a/src/Storages/DiskCache/DiskCacheLRU.cpp b/src/Storages/DiskCache/DiskCacheLRU.cpp
index 7d4830fc744..aad87ea97f6 100644
--- a/src/Storages/DiskCache/DiskCacheLRU.cpp
+++ b/src/Storages/DiskCache/DiskCacheLRU.cpp
@@ -207,7 +207,7 @@ static fs::path getRelativePathForPart(const String & part_name, const String &
     return fs::path(prefix) / hex_key.substr(0, 3) / hex_key / "";
 }
 
-void DiskCacheLRU::set(const String& seg_name, ReadBuffer& value, size_t weight_hint, bool is_preload)
+void DiskCacheLRU::set(const String& seg_name, ReadBuffer& value, size_t weight_hint, bool is_preload, time_t min_time, time_t max_time)
 {
     if (is_droping)
     {
diff --git a/src/Storages/DiskCache/DiskCacheLRU.h b/src/Storages/DiskCache/DiskCacheLRU.h
index eb617aab200..b2e00361184 100644
--- a/src/Storages/DiskCache/DiskCacheLRU.h
+++ b/src/Storages/DiskCache/DiskCacheLRU.h
@@ -75,7 +75,7 @@ class DiskCacheLRU: public IDiskCache
         const IDiskCacheStrategyPtr & strategy_,
         IDiskCache::DataType type_ = IDiskCache::DataType::ALL);
 
-    void set(const String& seg_name, ReadBuffer& value, size_t weight_hint, bool is_preload) override;
+    void set(const String& seg_name, ReadBuffer& value, size_t weight_hint, bool is_preload, time_t min_time = 0, time_t max_time = 0) override;
     std::pair<DiskPtr, String> get(const String& seg_name) override;
     void load() override;
     size_t drop(const String & part_name) override;
diff --git a/src/Storages/DiskCache/DiskCacheSettings.cpp b/src/Storages/DiskCache/DiskCacheSettings.cpp
index ff69f2cd6ee..585533585cf 100644
--- a/src/Storages/DiskCache/DiskCacheSettings.cpp
+++ b/src/Storages/DiskCache/DiskCacheSettings.cpp
@@ -24,6 +24,7 @@ void DiskCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
 {
     std::string config_prefix = fmt::format("{}.{}", root, disk_cache_name); // {root}.MergeTree
     disk_policy = config.getString(config_prefix + ".disk_policy", "default");
+    ttl_disk_policy = config.getString(config_prefix + ".ttl_disk_policy", disk_policy);  // Fallback to disk_policy
     lru_max_nums = config.getUInt64(config_prefix + ".lru_max_object_num", std::numeric_limits<size_t>::max());
     // Todo: process the case which disk not have 2 TB free space
     lru_max_size = config.getUInt64(config_prefix + ".lru_max_size", static_cast<uint64_t>(2) * 1024 * 1024 * 1024 * 1024);
@@ -50,16 +51,23 @@ void DiskCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
     stealing_max_retry_times = config.getUInt(config_prefix + ".stealing_max_retry_times", 3);
     stealing_retry_sleep_ms = config.getUInt(config_prefix + ".stealing_retry_sleep_ms", 100);
     stealing_max_queue_count = config.getUInt(config_prefix + ".stealing_max_queue_count", 10000);
+
+    // TTL cache settings
+    ttl_cache_max_size = config.getUInt64(config_prefix + ".ttl_cache_max_size", 0);
+    ttl_cache_max_percent = config.getDouble(config_prefix + ".ttl_cache_max_percent", 80.0);
 }
 
 std::string DiskCacheSettings::toString() const
-    {
+{
     return fmt::format(
         R"({{
-            "disk_policy": {},
+            "disk_policy": "{}",
+            "ttl_disk_policy": "{}",
             "lru_max_percent": {},
             "lru_max_size": {},
             "lru_max_nums": {},
+            "ttl_cache_max_size": {},
+            "ttl_cache_max_percent": {},
             "random_drop_threshold": {},
             "mapping_bucket_size": {},
             "lru_update_interval": {},
@@ -74,13 +82,22 @@ std::string DiskCacheSettings::toString() const
             "stats_bucket_size": {},
             "previous_disk_cache_dir": "{}",
             "latest_disk_cache_dir": "{}",
-            "meta_cache_size_ratio": "{}",
-            "meta_cache_nums_ratio": "{}"
+            "meta_cache_size_ratio": {},
+            "meta_cache_nums_ratio": {},
+            "stealing_max_request_rate": {},
+            "stealing_connection_timeout_ms": {},
+            "stealing_read_timeout_ms": {},
+            "stealing_max_retry_times": {},
+            "stealing_retry_sleep_ms": {},
+            "stealing_max_queue_count": {}
         }})",
         disk_policy,
+        ttl_disk_policy,
         lru_max_percent,
         lru_max_size,
         lru_max_nums,
+        ttl_cache_max_size,
+        ttl_cache_max_percent,
         random_drop_threshold,
         mapping_bucket_size,
         lru_update_interval,
@@ -96,6 +113,12 @@ std::string DiskCacheSettings::toString() const
         previous_disk_cache_dir,
         latest_disk_cache_dir,
         meta_cache_size_ratio,
-        meta_cache_nums_ratio);
-    }
+        meta_cache_nums_ratio,
+        stealing_max_request_rate,
+        stealing_connection_timeout_ms,
+        stealing_read_timeout_ms,
+        stealing_max_retry_times,
+        stealing_retry_sleep_ms,
+        stealing_max_queue_count);
+}
 }
diff --git a/src/Storages/DiskCache/DiskCacheSettings.h b/src/Storages/DiskCache/DiskCacheSettings.h
index 13eebd0406b..4b37ed8117e 100644
--- a/src/Storages/DiskCache/DiskCacheSettings.h
+++ b/src/Storages/DiskCache/DiskCacheSettings.h
@@ -29,10 +29,15 @@ struct DiskCacheSettings
     void loadFromConfig(const Poco::Util::AbstractConfiguration & conf, const std::string & disk_cache_name);
 
     String disk_policy {"default"};
+    String ttl_disk_policy {""};  // Storage policy for TTL cache, empty = use disk_policy
     size_t lru_max_size {std::numeric_limits<size_t>::max()};
     size_t lru_max_nums {std::numeric_limits<size_t>::max()};
     // max percent of disk total capacity
     size_t lru_max_percent {80};
+    // TTL cache max size (bytes). 0 = use ttl_cache_max_percent instead
+    size_t ttl_cache_max_size {0};
+    // TTL cache max percent of disk capacity (used if ttl_cache_max_size == 0)
+    double ttl_cache_max_percent {80.0};
     // When queue size exceed random drop ratio, start drop disk cache task, range from 0 - 100
     size_t random_drop_threshold {50};
     // Cache mapping bucket size
diff --git a/src/Storages/DiskCache/DiskCacheTTL.cpp b/src/Storages/DiskCache/DiskCacheTTL.cpp
new file mode 100644
index 00000000000..bb93863a067
--- /dev/null
+++ b/src/Storages/DiskCache/DiskCacheTTL.cpp
@@ -0,0 +1,1257 @@
+/*
+ * Copyright (2022) Bytedance Ltd. and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Storages/DiskCache/DiskCacheTTL.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+#include <Storages/DiskCache/TTLCacheFDBIndex.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
+#include <Storages/MergeTree/MergeTreeSuffix.h>
+#include <fmt/core.h>
+#include <sys/stat.h>
+#include <algorithm>
+#include <atomic>
+#include <filesystem>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include "Common/Exception.h"
+#include "Common/hex.h"
+#include "common/logger_useful.h"
+#include <Common/Throttler.h>
+#include <Common/setThreadName.h>
+#include <IO/OpenedFileCache.h>
+#include "Interpreters/Context.h"
+#include "Storages/DiskCache/DiskCache_fwd.h"
+#include "Storages/DiskCache/IDiskCache.h"
+#include <common/errnoToString.h>
+#include <Disks/IVolume.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/copyData.h>
+
+namespace fs = std::filesystem;
+
+namespace CurrentMetrics
+{
+    extern const Metric DiskCacheEvictQueueLength;
+}
+
+namespace ProfileEvents
+{
+    extern const Event DiskCacheGetMetaMicroSeconds;
+    extern const Event DiskCacheGetTotalOps;
+    extern const Event DiskCacheSetTotalOps;
+    extern const Event DiskCacheSetTotalBytes;
+    extern const Event DiskCacheDataHits;
+    extern const Event DiskCacheDataMisses;
+    extern const Event DiskCacheIdxHits;
+    extern const Event DiskCacheIdxMisses;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int SYSTEM_ERROR;
+    extern const int LOGICAL_ERROR;
+    extern const int BAD_ARGUMENTS;
+}
+
+static constexpr auto DISK_CACHE_TEMP_FILE_SUFFIX = ".temp";
+static constexpr auto META_DISK_CACHE_DIR_PREFIX = "meta";
+static constexpr auto DATA_DISK_CACHE_DIR_PREFIX = "data";
+
+namespace
+{
+    constexpr size_t HEX_KEY_LEN = sizeof(DiskCacheTTL::KeyType) * 2;
+
+    // Extract UUID from segment/part name (format: uuid/part_name/...)
+    String extractUUID(const String & seg_name)
+    {
+        size_t first_slash = seg_name.find('/');
+        if (first_slash == std::string::npos)
+            return seg_name;
+
+        return seg_name.substr(0, first_slash);
+    }
+
+    // Extract part_name from segment name (format: uuid/part_name/column_segment.ext)
+    String extractPartName(const String & seg_name)
+    {
+        size_t first_slash = seg_name.find('/');
+        if (first_slash == std::string::npos)
+            return seg_name;
+
+        size_t second_slash = seg_name.find('/', first_slash + 1);
+        if (second_slash == std::string::npos)
+            return seg_name.substr(first_slash + 1);  // Return everything after uuid/
+
+        return seg_name.substr(first_slash + 1, second_slash - first_slash - 1);
+    }
+
+    // Extract partition_id from part_name (format: 20240315_1_100_2 → 20240315)
+    String extractPartitionId(const String & part_name)
+    {
+        size_t underscore_pos = part_name.find('_');
+        if (underscore_pos == std::string::npos)
+            return part_name;
+
+        return part_name.substr(0, underscore_pos);
+    }
+
+    // Get relative path for part with new structure
+    // Structure: prefix/uuid/partition/3char/hash_part/
+    // Example: data/a1b2c3.../20240315/abc/abc123def456/
+    fs::path getRelativePathForPart(const String & uuid, const String & part_name, const String & prefix)
+    {
+        String partition_id = extractPartitionId(part_name);
+        auto hash_part = sipHash64(part_name.data(), part_name.size());
+        String hex_hash(HEX_KEY_LEN / 2, '\0');
+        writeHexUIntLowercase(hash_part, hex_hash.data());
+
+        return fs::path(prefix) / uuid / partition_id / hex_hash.substr(0, 3) / hex_hash / "";
+    }
+
+    String formatPartitionId(time_t ts)
+    {
+        struct tm t;
+        gmtime_r(&ts, &t);
+        return fmt::format("{:04d}{:02d}{:02d}", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday);
+    }
+
+    bool isHexKey(const String & hex_key)
+    {
+        if (hex_key.size() != HEX_KEY_LEN)
+            return false;
+
+        for (char c : hex_key)
+        {
+            if (!(isNumericASCII(c) || (c >= 'a' && c <= 'f')))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool isHexHalf(const String & s)
+    {
+        if (s.size() != HEX_KEY_LEN / 2)
+            return false;
+        for (char c : s)
+            if (!(isNumericASCII(c) || (c >= 'a' && c <= 'f')))
+                return false;
+        return true;
+    }
+}
+
+DiskCacheTTL::DiskCacheTTL(
+    const String & name_,
+    const String & table_uuid_,
+    const VolumePtr & volume_,
+    const ThrottlerPtr & throttler_,
+    const DiskCacheSettings & settings_,
+    const IDiskCacheStrategyPtr & strategy_,
+    UInt64 ttl_minutes_,
+    size_t max_size_bytes_,
+    IDiskCache::DataType type_)
+    : IDiskCache(name_, volume_, throttler_, settings_, strategy_, false, type_)
+    , set_rate_throttler(settings_.cache_set_rate_limit == 0 ? nullptr : std::make_shared<Throttler>(settings_.cache_set_rate_limit))
+    , set_throughput_throttler(settings_.cache_set_throughput_limit == 0 ? nullptr : std::make_shared<Throttler>(settings_.cache_set_throughput_limit))
+    , table_uuid(table_uuid_)
+    , ttl_minutes(ttl_minutes_)
+    , max_size_bytes(max_size_bytes_)  // Already calculated by factory
+{
+    cache_stats.table_uuid = table_uuid_;
+    LOG_INFO(log, "Initialized TTL cache for table {} with ttl_minutes={}, max_size_bytes={} ({}GB)",
+             table_uuid_, ttl_minutes_, max_size_bytes, max_size_bytes / (1024*1024*1024));
+    if (settings.cache_load_dispatcher_drill_down_level < -1)
+    {
+        throw Exception(fmt::format("Load dispatcher's drill down level {} invalid, "
+            "must be positive or -1", settings.cache_load_dispatcher_drill_down_level),
+            ErrorCodes::BAD_ARGUMENTS);
+    }
+    // load() is called by the factory after this object wins the registry race,
+    // so only one disk scan runs per table UUID.
+}
+
+DiskCacheTTL::KeyType DiskCacheTTL::hash(const String & seg_key)
+{
+    // seg_key format: "uuid/part_name/column.bin/offset_0"
+    // hash_high = hash(part_name only) for grouping all segments of a part
+    // hash_low = hash(column + segment) for unique segment identification
+
+    size_t first_slash = seg_key.find('/');
+    if (first_slash == std::string::npos)
+        throw Exception("Invalid seg key: " + seg_key, ErrorCodes::LOGICAL_ERROR);
+
+    size_t second_slash = seg_key.find('/', first_slash + 1);
+    if (second_slash == std::string::npos)
+        throw Exception("Invalid seg key: " + seg_key, ErrorCodes::LOGICAL_ERROR);
+
+    // hash_high = hash(part_name) - all segments in same part share this
+    auto high = sipHash64(seg_key.data() + first_slash + 1, second_slash - first_slash - 1);
+
+    // hash_low = hash(column/segment) - unique per segment
+    auto low = sipHash64(seg_key.data() + second_slash + 1, seg_key.size() - second_slash - 1);
+
+    return {high, low};
+}
+
+String DiskCacheTTL::hexKey(const KeyType & key)
+{
+    std::string res(HEX_KEY_LEN, '\0');
+    writeHexUIntLowercase(key, res.data());
+    return res;
+}
+
+std::optional<DiskCacheTTL::KeyType> DiskCacheTTL::unhexKey(const String & hex_key)
+{
+    if (!isHexKey(hex_key))
+        return {};
+
+    auto low = unhexUInt<UInt64>(hex_key.data());
+    auto high = unhexUInt<UInt64>(hex_key.data() + HEX_KEY_LEN / 2);
+
+    return UInt128{high, low};
+}
+
+fs::path DiskCacheTTL::getPath(const DiskCacheTTL::KeyType & hash_key, const String & path, const String & seg_name, const String & prefix) const
+{
+    // New structure: uuid/partition/3char/hash_part/hash_low
+    // Example: a1b2c3d4.../20240315/abc/abc123def456/567890abcd
+
+    String hex_key = hexKey(hash_key);
+    std::string_view view(hex_key);
+    std::string_view hex_key_low = view.substr(0, HEX_KEY_LEN / 2);
+    std::string_view hex_key_high = view.substr(HEX_KEY_LEN / 2, HEX_KEY_LEN);
+
+    String part_name = extractPartName(seg_name);
+    String partition_id = extractPartitionId(part_name);
+    String data_prefix = endsWith(seg_name, DATA_FILE_EXTENSION) ? DATA_DISK_CACHE_DIR_PREFIX : META_DISK_CACHE_DIR_PREFIX;
+
+    // Structure: prefix/uuid/partition/3char/hash_high/hash_low
+    return fs::path(path) / (prefix.empty() ? data_prefix : prefix)
+           / table_uuid / partition_id
+           / hex_key_high.substr(0, 3) / hex_key_high / hex_key_low;
+}
+
+time_t DiskCacheTTL::parsePartitionTimestamp(const String & part_name)
+{
+    try
+    {
+        // Extract part name from segment path (format: uuid/part_name/segment_name)
+        size_t first_slash = part_name.find('/');
+        if (first_slash == std::string::npos)
+            return 0;
+
+        size_t second_slash = part_name.find('/', first_slash + 1);
+        String actual_part_name;
+        if (second_slash != std::string::npos)
+            actual_part_name = part_name.substr(first_slash + 1, second_slash - first_slash - 1);
+        else
+            actual_part_name = part_name.substr(first_slash + 1);
+
+        // Parse partition_id from part name
+        MergeTreePartInfo info;
+        if (!MergeTreePartInfo::tryParsePartName(actual_part_name, &info, MergeTreeDataFormatVersion(1)))
+            return 0;
+
+        const String & partition_id = info.partition_id;
+        if (partition_id.empty())
+            return 0;
+
+        // Try to parse as date/datetime
+        // Common formats: YYYYMMDD, YYYYMMDDHH, YYYYMM
+        if (partition_id.size() >= 8 && std::all_of(partition_id.begin(), partition_id.end(), ::isdigit))
+        {
+            // Parse as YYYYMMDD
+            int year = std::stoi(partition_id.substr(0, 4));
+            int month = std::stoi(partition_id.substr(4, 2));
+            int day = partition_id.size() >= 8 ? std::stoi(partition_id.substr(6, 2)) : 1;
+
+            struct tm tm_info = {};
+            tm_info.tm_year = year - 1900;
+            tm_info.tm_mon = month - 1;
+            tm_info.tm_mday = day;
+            tm_info.tm_hour = 0;
+            tm_info.tm_min = 0;
+            tm_info.tm_sec = 0;
+            tm_info.tm_isdst = -1;
+
+            return mktime(&tm_info);
+        }
+
+        return 0;
+    }
+    catch (...)
+    {
+        return 0;
+    }
+}
+
+bool DiskCacheTTL::shouldCache(time_t part_ts) const
+{
+    // TTL cache only for time-based partitions
+    if (part_ts == 0)
+        return false; // Non-time partitions are not cached
+
+    // TTL disabled, defensively not cache
+    if (ttl_minutes == 0)
+        return false;
+
+    time_t now = time(nullptr);
+    time_t age_seconds = now - part_ts;
+    time_t ttl_seconds = ttl_minutes * 60;
+
+    return age_seconds <= ttl_seconds;
+}
+
+void DiskCacheTTL::cacheInsertLocked(Shard & shard, KeyType key, std::shared_ptr<DiskCacheTTLMeta> meta, const String & precomputed_partition_id)
+{
+    shard.cache_map[key] = meta;
+    UInt64 hash_high = key.items[0];
+    auto & entry = shard.part_index[hash_high];
+    if (entry.partition_id.empty())
+    {
+        entry.partition_id = precomputed_partition_id.empty()
+            ? formatPartitionId(meta->max_timestamp)
+            : precomputed_partition_id;
+        entry.partition_ts = meta->max_timestamp;
+    }
+    entry.keys.insert(key);
+    entry.total_bytes += meta->size;
+}
+
+DiskCacheTTL::CacheEraseResult DiskCacheTTL::cacheEraseLocked(Shard & shard, KeyType key)
+{
+    CacheEraseResult result;
+    auto it = shard.cache_map.find(key);
+    if (it == shard.cache_map.end())
+        return result;
+
+    size_t bytes = it->second->size;
+    shard.cache_map.erase(it);
+
+    UInt64 hash_high = key.items[0];
+    auto pit = shard.part_index.find(hash_high);
+    if (pit != shard.part_index.end())
+    {
+        result.partition_id = pit->second.partition_id;
+        result.partition_ts = pit->second.partition_ts;
+        result.count = 1;
+        result.bytes = bytes;
+
+        pit->second.total_bytes -= bytes;
+        pit->second.keys.erase(key);
+        if (pit->second.keys.empty())
+            shard.part_index.erase(pit);
+    }
+    return result;
+}
+
+DiskCacheTTL::CacheEraseResult DiskCacheTTL::cacheErasePartLocked(Shard & shard, UInt64 hash_high)
+{
+    CacheEraseResult result;
+    auto pit = shard.part_index.find(hash_high);
+    if (pit == shard.part_index.end())
+        return result;
+
+    result.partition_id = pit->second.partition_id;
+    result.partition_ts = pit->second.partition_ts;
+    result.hash_high = hash_high;
+    result.count = pit->second.keys.size();
+    result.bytes = pit->second.total_bytes;
+
+    for (const auto & key : pit->second.keys)
+    {
+        auto it = shard.cache_map.find(key);
+        if (it != shard.cache_map.end())
+        {
+            if (it->second->disk && !it->second->rel_path.empty())
+                result.files.emplace_back(it->second->disk, it->second->rel_path);
+            shard.cache_map.erase(it);
+        }
+    }
+
+    shard.part_index.erase(pit);
+    return result;
+}
+
+void DiskCacheTTL::addToPartitionStats(const String & partition_id, time_t partition_ts, size_t bytes, size_t count)
+{
+    {
+        std::shared_lock<std::shared_mutex> lk(cache_stats.partition_stats_mutex);
+        auto it = cache_stats.partition_stats.find(partition_id);
+        if (it != cache_stats.partition_stats.end())
+        {
+            it->second.entry_count += count;
+            it->second.total_bytes += bytes;
+            return;
+        }
+    }
+    std::unique_lock<std::shared_mutex> lk(cache_stats.partition_stats_mutex);
+    auto [it, inserted] = cache_stats.partition_stats.try_emplace(partition_id);
+    if (inserted)
+    {
+        it->second.partition_id = partition_id;
+        it->second.partition_timestamp = partition_ts;
+    }
+    it->second.entry_count += count;
+    it->second.total_bytes += bytes;
+}
+
+void DiskCacheTTL::applyEraseResults(std::vector<CacheEraseResult> & results, size_t & total_evicted, const char * log_tag)
+{
+    for (auto & result : results)
+    {
+        for (const auto & [disk, rel_path] : result.files)
+        {
+            try { disk->removeFileIfExists(rel_path); }
+            catch (...) { tryLogCurrentException(log, log_tag); }
+        }
+        if (fdb_index)
+            fdb_index->evictPart(result.partition_id, result.hash_high);
+        subtractFromPartitionStats(result);
+        total_evicted += result.count;
+    }
+}
+
+void DiskCacheTTL::subtractFromPartitionStats(const CacheEraseResult & result)
+{
+    if (result.count == 0)
+        return;
+    // shared_lock suffices: we only decrement existing atomics, no map insert/rehash.
+    std::shared_lock<std::shared_mutex> lk(cache_stats.partition_stats_mutex);
+    auto it = cache_stats.partition_stats.find(result.partition_id);
+    if (it == cache_stats.partition_stats.end())
+        return;
+    auto & ps = it->second;
+    ps.entry_count -= result.count;
+    ps.total_bytes -= result.bytes;
+}
+
+void DiskCacheTTL::set(const String& seg_name, ReadBuffer& value, size_t weight_hint, bool is_preload, time_t /*min_time*/, time_t max_time)
+{
+    if (is_droping)
+    {
+        LOG_WARNING(log, fmt::format("skip write disk cache for droping disk cache is running"));
+        return;
+    }
+
+    if (weight_hint == 0)
+        return;
+
+    // Use provided max_time if available, else parse from partition_id
+    time_t part_ts = (max_time > 0) ? max_time : parsePartitionTimestamp(seg_name);
+    if (!shouldCache(part_ts))
+    {
+        if (part_ts == 0)
+            cache_stats.rejected_non_time_partition++;
+        else
+            cache_stats.rejected_too_old++;
+        LOG_TRACE(log, "Skipping cache for expired partition: {}", seg_name);
+        return;
+    }
+
+    if (set_rate_throttler)
+    {
+        set_rate_throttler->add(1);
+    }
+
+    ProfileEvents::increment(ProfileEvents::DiskCacheSetTotalOps, 1, Metrics::MetricType::Rate, {{"type", (is_preload ? "preload": "query")}});
+
+    auto key = hash(seg_name);
+    String part_name = extractPartName(seg_name);
+    String partition_id = extractPartitionId(part_name);
+    bool is_idx_seg = endsWith(seg_name, INDEX_FILE_EXTENSION);
+    time_t cached_at = time(nullptr);
+
+    auto & shard = getShard(key.items[0]);
+
+    // First lock: check if already exists, reserve slot
+    {
+        Stopwatch wait_sw;
+        std::unique_lock<std::shared_mutex> lock(shard.mutex);
+        if (wait_sw.elapsedMicroseconds() > 1000)
+            LOG_WARNING(log, "[ttl-perf] set() first lock waited {} us", wait_sw.elapsedMicroseconds());
+
+        if (shard.cache_map.find(key) != shard.cache_map.end())
+            return;
+
+        shard.cache_map[key] = std::make_shared<DiskCacheTTLMeta>(
+            DiskCacheTTLMeta::State::Caching, nullptr, 0, cached_at, part_ts
+        );
+    }
+
+    ReservationPtr reserved_space = nullptr;
+    try
+    {
+        reserved_space = volume->reserve(weight_hint);
+        if (reserved_space == nullptr)
+        {
+            throw Exception("Failed to reserve space", ErrorCodes::BAD_ARGUMENTS);
+        }
+
+        String cache_rel_path = getRelativePath(key, seg_name).string();
+        size_t weight = writeSegment(value, reserved_space, cache_rel_path);
+        ProfileEvents::increment(ProfileEvents::DiskCacheSetTotalBytes, weight, Metrics::MetricType::Rate, {{"type", (is_preload ? "preload": "query")}});
+
+        {
+            Stopwatch wait_sw;
+            std::unique_lock<std::shared_mutex> lock(shard.mutex);
+            if (wait_sw.elapsedMicroseconds() > 1000)
+                LOG_WARNING(log, "[ttl-perf] set() second lock waited {} us", wait_sw.elapsedMicroseconds());
+
+            auto meta = std::make_shared<DiskCacheTTLMeta>(
+                DiskCacheTTLMeta::State::Cached, reserved_space->getDisk(), weight, cached_at, part_ts, cache_rel_path
+            );
+            cacheInsertLocked(shard, key, meta, partition_id);
+            total_entries++;
+            total_size += weight;
+
+            // Track write source (preload vs query), split by segment type
+            if (is_preload)
+            {
+                if (is_idx_seg) { cache_stats.cached_idx_from_preload++; cache_stats.cached_idx_bytes_preload += weight; }
+                else { cache_stats.cached_from_preload++; cache_stats.cached_bytes_preload += weight; }
+            }
+            else
+            {
+                if (is_idx_seg) { cache_stats.cached_idx_from_query++; cache_stats.cached_idx_bytes_query += weight; }
+                else { cache_stats.cached_from_query++; cache_stats.cached_bytes_query += weight; }
+            }
+
+            // Update global TTL usage
+            DiskCacheFactory::instance().addGlobalTTLUsage(weight);
+        }
+
+        // Update partition stats outside shard mutex to avoid lock ordering with partition_stats_mutex
+        addToPartitionStats(partition_id, part_ts, weight);
+
+        if (fdb_index)
+            fdb_index->onSet(key, seg_name, weight, part_ts);
+
+        // Async size-based eviction once the hard cap is exceeded.
+        // max_size_bytes is always set (factory falls back to global limit when no per-table limit
+        // is configured), so one check suffices. Done after updatePartitionStats so the
+        // just-added partition is visible to evictOldestPartitionsUntilSpace.
+        if (max_size_bytes > 0 && total_size.load() > max_size_bytes)
+        {
+            time_t now = time(nullptr);
+            time_t last_trigger = last_size_eviction_trigger.load();
+
+            // Rate limit: at most once per 10 seconds
+            if (now - last_trigger > 10)
+            {
+                if (last_size_eviction_trigger.compare_exchange_strong(last_trigger, now))
+                {
+                    size_t excess = total_size.load() - max_size_bytes;
+                    size_t target_free = excess + max_size_bytes * 0.10;
+                    cache_stats.async_eviction_triggered++;
+                    LOG_DEBUG(log, "Table cache {}% full, scheduling async eviction to free {} bytes",
+                             (total_size.load() * 100 / max_size_bytes), target_free);
+
+                    auto & thread_pool = IDiskCache::getEvictPool();
+                    thread_pool.scheduleOrThrow([this, target_free] {
+                        Stopwatch watch;
+                        evictOldestPartitionsUntilSpace(target_free);
+                        LOG_INFO(log, "Async size-based eviction freed space in {} ms",
+                                watch.elapsedMilliseconds());
+                    });
+                }
+            }
+            else
+            {
+                cache_stats.async_eviction_skipped_rate_limit++;
+            }
+        }
+    }
+    catch(const Exception & e)
+    {
+        String local_disk_path = reserved_space == nullptr ? "" : reserved_space->getDisk()->getPath();
+        tryLogCurrentException(log, fmt::format("Failed to write key {} "
+            "to local, disk path: {}, weight: {}, fail: {}", seg_name, local_disk_path, weight_hint, e.message()));
+
+        std::unique_lock<std::shared_mutex> lock(shard.mutex);
+        cacheEraseLocked(shard, key);  // also cleans up part_index reservation slot
+    }
+}
+
+std::pair<DiskPtr, String> DiskCacheTTL::get(const String & seg_name)
+{
+    ProfileEvents::increment(ProfileEvents::DiskCacheGetTotalOps);
+    Stopwatch watch;
+    SCOPE_EXIT({ProfileEvents::increment(ProfileEvents::DiskCacheGetMetaMicroSeconds,
+        watch.elapsedMicroseconds());});
+
+    // Periodic eviction check (every 5 minutes)
+    time_t now = time(nullptr);
+    time_t last_check = last_eviction_check.load();
+    if (now - last_check > 300)
+    {
+        if (last_eviction_check.compare_exchange_strong(last_check, now))
+        {
+            // Trigger eviction asynchronously
+            auto & thread_pool = IDiskCache::getEvictPool();
+            thread_pool.scheduleOrThrow([this] { evictExpired(); });
+        }
+    }
+
+    auto key = hash(seg_name);
+    bool is_idx_seg = endsWith(seg_name, INDEX_FILE_EXTENSION);
+
+    DiskPtr disk;
+    String rel_path;
+    CacheEraseResult erase_result;
+
+    auto & shard = getShard(key.items[0]);
+    {
+        Stopwatch wait_sw;
+        std::shared_lock<std::shared_mutex> lock(shard.mutex);
+        if (wait_sw.elapsedMicroseconds() > 1000)
+            LOG_WARNING(log, "[ttl-perf] get() lock waited {} us, shard_size={}", wait_sw.elapsedMicroseconds(), shard.cache_map.size());
+
+        auto it = shard.cache_map.find(key);
+        if (it == shard.cache_map.end() || it->second->state != DiskCacheTTLMeta::State::Cached)
+        {
+            if (is_idx_seg) { cache_stats.idx_misses++; ProfileEvents::increment(ProfileEvents::DiskCacheIdxMisses); }
+            else { cache_stats.data_misses++; ProfileEvents::increment(ProfileEvents::DiskCacheDataMisses); }
+        }
+        else if (unlikely(it->second->disk == nullptr))
+        {
+            // Corrupted entry: upgrade to exclusive lock to erase
+            lock.unlock();
+            std::unique_lock<std::shared_mutex> ulock(shard.mutex);
+            auto it2 = shard.cache_map.find(key);
+            if (it2 != shard.cache_map.end() && it2->second->disk == nullptr)
+            {
+                LOG_ERROR(log, "Cached entry {} has null disk — corrupted meta, evicting", seg_name);
+                erase_result = cacheEraseLocked(shard, key);
+                if (erase_result.count > 0)
+                {
+                    total_entries--;
+                    total_size -= erase_result.bytes;
+                    DiskCacheFactory::instance().releaseGlobalTTL(erase_result.bytes);
+                }
+            }
+            if (is_idx_seg) { cache_stats.idx_misses++; ProfileEvents::increment(ProfileEvents::DiskCacheIdxMisses); }
+            else { cache_stats.data_misses++; ProfileEvents::increment(ProfileEvents::DiskCacheDataMisses); }
+        }
+        else
+        {
+            if (is_idx_seg) { cache_stats.idx_hits++; ProfileEvents::increment(ProfileEvents::DiskCacheIdxHits); }
+            else { cache_stats.data_hits++; ProfileEvents::increment(ProfileEvents::DiskCacheDataHits); }
+            disk = it->second->disk;
+            rel_path = it->second->rel_path;
+        }
+    }
+
+    if (erase_result.count > 0)
+        subtractFromPartitionStats(erase_result);
+
+    return {disk, rel_path};
+}
+
+size_t DiskCacheTTL::writeSegment(ReadBuffer& buffer, ReservationPtr& reservation, const String& cache_rel_path)
+{
+    DiskPtr disk = reservation->getDisk();
+    String temp_cache_rel_path = cache_rel_path + ".temp";
+
+    try
+    {
+        disk->createDirectories(fs::path(cache_rel_path).parent_path());
+
+        size_t written_size = 0;
+        {
+            WriteBufferFromFile to(
+                fs::path(disk->getPath()) / temp_cache_rel_path, DBMS_DEFAULT_BUFFER_SIZE, -1, 0666, nullptr, 0, set_throughput_throttler);
+            copyData(buffer, to, reservation.get());
+            to.finalize();
+            written_size = to.count();
+        }
+
+        disk->replaceFile(temp_cache_rel_path, cache_rel_path);
+
+        if (disk->getFileSize(cache_rel_path) != written_size)
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "cached {} file size {} doesn't match written size {}",
+                cache_rel_path,
+                disk->getFileSize(cache_rel_path),
+                written_size);
+
+        return written_size;
+    }
+    catch (...)
+    {
+        disk->removeFileIfExists(temp_cache_rel_path);
+        disk->removeFileIfExists(cache_rel_path);
+        throw;
+    }
+}
+
+void DiskCacheTTL::evictExpired()
+{
+    // Single lock: scan + erase in one critical section — no disk I/O happens inside.
+    // Collect expired hash_highs first, then erase in a second pass
+    // to avoid iterator invalidation from cacheErasePartLocked.
+    std::vector<CacheEraseResult> erase_results;
+    size_t evicted_bytes = 0;
+
+    for (auto & shard : shards)
+    {
+        std::unique_lock<std::shared_mutex> lock(shard.mutex);
+
+        std::vector<UInt64> expired_hash_highs;
+        for (const auto & [hash_high, entry] : shard.part_index)
+        {
+            if (entry.keys.empty()) continue;
+            auto sample = shard.cache_map.find(*entry.keys.begin());
+            if (sample != shard.cache_map.end() && !shouldCache(sample->second->max_timestamp))
+                expired_hash_highs.push_back(hash_high);
+        }
+
+        for (UInt64 hash_high : expired_hash_highs)
+        {
+            auto result = cacheErasePartLocked(shard, hash_high);
+            if (result.count > 0)
+            {
+                total_entries -= result.count;
+                total_size -= result.bytes;
+                evicted_bytes += result.bytes;
+                erase_results.push_back(std::move(result));
+            }
+        }
+    }
+
+    if (erase_results.empty())
+    {
+        cache_stats.last_eviction_run = time(nullptr);
+        return;
+    }
+
+    size_t total_evicted = 0;
+    applyEraseResults(erase_results, total_evicted, "Failed to evict expired segment");
+    cache_stats.evicted_expired += total_evicted;
+    DiskCacheFactory::instance().releaseGlobalTTL(evicted_bytes);
+
+    LOG_INFO(log, "Evicted {} expired segments, freed {} bytes", total_evicted, evicted_bytes);
+    cache_stats.last_eviction_run = time(nullptr);
+}
+
+void DiskCacheTTL::evictOldestPartitionsUntilSpace(size_t needed_bytes)
+{
+    size_t cur = total_size.load();
+    size_t target_size = cur > needed_bytes ? cur - needed_bytes : 0;
+
+    LOG_DEBUG(log, "Size eviction: current={}, needed={}, target={}", cur, needed_bytes, target_size);
+
+    size_t total_evicted = 0;
+    size_t evicted_bytes = 0;
+
+    for (auto & shard : shards)
+    {
+        if (total_size.load() <= target_size)
+            break;
+
+        // Snapshot part timestamps under a short lock — no allocations, just push_backs.
+        std::vector<std::pair<time_t, UInt64>> by_ts; // (partition_ts, hash_high)
+        {
+            std::unique_lock<std::shared_mutex> lock(shard.mutex);
+            by_ts.reserve(shard.part_index.size());
+            for (const auto & [hash_high, entry] : shard.part_index)
+                by_ts.emplace_back(entry.partition_ts, hash_high);
+        }
+
+        // Sort oldest-first outside the lock.
+        std::sort(by_ts.begin(), by_ts.end());
+
+        // Evict under a second lock. Parts may have been removed between the two locks;
+        // cacheErasePartLocked returns count=0 for missing entries and is skipped.
+        std::vector<CacheEraseResult> erase_results;
+        {
+            std::unique_lock<std::shared_mutex> lock(shard.mutex);
+            size_t current_size = total_size.load();
+            for (auto & [ts, hash_high] : by_ts)
+            {
+                if (current_size <= target_size)
+                    break;
+
+                auto result = cacheErasePartLocked(shard, hash_high);
+                if (result.count > 0)
+                {
+                    total_entries -= result.count;
+                    total_size -= result.bytes;
+                    current_size -= result.bytes;
+                    evicted_bytes += result.bytes;
+                    erase_results.push_back(std::move(result));
+                }
+            }
+        }
+
+        applyEraseResults(erase_results, total_evicted, "Failed to evict segment for size limit");
+    }
+
+    if (total_evicted > 0)
+    {
+        cache_stats.evicted_size_limit += total_evicted;
+        DiskCacheFactory::instance().releaseGlobalTTL(evicted_bytes);
+        LOG_INFO(log, "Evicted {} segments from oldest parts for size limit, freed {} bytes",
+                 total_evicted, evicted_bytes);
+    }
+}
+
+void DiskCacheTTL::load()
+{
+    if (fdb_index)
+    {
+        auto result = fdb_index->reconcile(
+            volume,
+            [this](UInt128 key, const String & seg_name) { return getRelativePath(key, seg_name); },
+            [this](time_t ts) { return shouldCache(ts); },
+            [this](TTLCacheFDBIndex::ReconcileBatch & batch) {
+                // Group by shard: one lock per shard instead of one per entry.
+                std::array<std::vector<std::pair<UInt128, std::shared_ptr<DiskCacheTTLMeta>>>, NUM_SHARDS> by_shard;
+                for (auto & [key, meta] : batch)
+                    by_shard[key.items[0] & (NUM_SHARDS - 1)].emplace_back(key, meta);
+                for (size_t i = 0; i < NUM_SHARDS; ++i)
+                {
+                    if (by_shard[i].empty())
+                        continue;
+                    std::unique_lock<std::shared_mutex> lock(shards[i].mutex);
+                    for (auto & [key, meta] : by_shard[i])
+                        cacheInsertLocked(shards[i], key, meta);
+                }
+
+                // Batch stats update: one lock per unique partition instead of one per entry.
+                std::unordered_map<String, std::tuple<time_t, size_t, size_t>> stats_acc; // pid -> (ts, bytes, count)
+                for (auto & [key, meta] : batch)
+                {
+                    auto pid = formatPartitionId(meta->max_timestamp);
+                    auto & [ts, bytes, count] = stats_acc[pid];
+                    ts = meta->max_timestamp;
+                    bytes += meta->size;
+                    count++;
+                }
+                for (auto & [pid, tbc] : stats_acc)
+                {
+                    auto & [ts, bytes, count] = tbc;
+                    addToPartitionStats(pid, ts, bytes, count);
+                    cache_stats.cached_from_restored += count;
+                    cache_stats.cached_bytes_restored += bytes;
+                }
+            });
+
+        if (result)
+        {
+            auto [entries, bytes] = *result;
+            // fetch_add: concurrent set() calls may have already bumped these counters between cache registration and now
+            total_entries.fetch_add(entries, std::memory_order_relaxed);
+            total_size.fetch_add(bytes, std::memory_order_relaxed);
+
+            LOG_INFO(log, "TTL cache for {} recovered from FDB index: {} entries, {} bytes",
+                table_uuid, entries, bytes);
+            return;
+        }
+        // reconcile() already logged the per-entry summary (restored/stale counts)
+        LOG_WARNING(log, "TTL cache for {}: FDB index had no restorable entries, falling back to disk scan", table_uuid);
+    }
+    else
+    {
+        LOG_WARNING(log, "TTL cache for {}: no FDB index available, loading from disk scan", table_uuid);
+    }
+
+    LOG_INFO(log, "Loading TTL disk cache from disk scan for {}...", table_uuid);
+
+    for (const auto & disk : volume->getDisks())
+    {
+        DiskCacheLoader loader(*this, disk, settings.cache_loader_per_disk,
+            settings.cache_load_dispatcher_drill_down_level,
+            settings.cache_load_dispatcher_drill_down_level);
+
+        for (const auto & dir_path : previous_disk_cache_dirs)
+        {
+            if (disk->exists(dir_path))
+                loader.exec(dir_path);
+        }
+
+        if (disk->exists(latest_disk_cache_dir))
+            loader.exec(latest_disk_cache_dir);
+
+        LOG_INFO(log, "Loaded {} segments from disk {}", loader.total_loaded, disk->getName());
+    }
+
+    LOG_INFO(log, "TTL disk cache load complete. Total: {} segments, {} bytes", total_entries.load(), total_size.load());
+
+    // Post-scan eviction: trigger synchronously now that partition_stats are fully populated.
+    // This handles the deadlock where a disk that was overfull before restart has all subsequent
+    // set() calls fail at volume->reserve() before reaching the eviction check in the write path,
+    // leaving the cache stuck full with no way to self-recover via normal writes.
+    // max_size_bytes is always set (factory falls back to global limit), so one check suffices.
+    // Use hard cap (not 90%) — max_size_bytes already encodes the configured percent of disk.
+    if (max_size_bytes > 0 && total_size.load() > max_size_bytes)
+    {
+        size_t excess = total_size.load() - max_size_bytes;
+        size_t target_free = excess + max_size_bytes * 0.10;
+        LOG_INFO(log, "Post-scan eviction triggered: total_size={}, max={}, freeing {} bytes",
+                 total_size.load(), max_size_bytes, target_free);
+        evictOldestPartitionsUntilSpace(target_free);
+    }
+}
+
+size_t DiskCacheTTL::drop(const String & part_base_path)
+{
+    // New structure: uuid/partition/3char/hash_part/
+    // part_base_path format: "uuid/part_name"
+    fs::path meta_path, data_path;
+
+    if (part_base_path.empty())
+    {
+        // Drop entire cache for this table
+        if (type == DataType::ALL || type == DataType::META)
+            meta_path = fs::path(latest_disk_cache_dir) / META_DISK_CACHE_DIR_PREFIX / table_uuid;
+        if (type == DataType::ALL || type == DataType::DATA)
+            data_path = fs::path(latest_disk_cache_dir) / DATA_DISK_CACHE_DIR_PREFIX / table_uuid;
+    }
+    else
+    {
+        // Drop specific part: extract uuid and part_name from part_base_path
+        String uuid = extractUUID(part_base_path);
+        String part_name = extractPartName(part_base_path);
+
+        if (type == DataType::ALL || type == DataType::META)
+            meta_path = fs::path(latest_disk_cache_dir) / getRelativePathForPart(uuid, part_name, META_DISK_CACHE_DIR_PREFIX);
+        if (type == DataType::ALL || type == DataType::DATA)
+            data_path = fs::path(latest_disk_cache_dir) / getRelativePathForPart(uuid, part_name, DATA_DISK_CACHE_DIR_PREFIX);
+    }
+
+    LOG_TRACE(log, "Dropping cache for part {} (meta: {}, data: {})", part_base_path, meta_path.string(), data_path.string());
+
+    const Disks & disks = volume->getDisks();
+    size_t delete_file_size = 0;
+
+    for (const auto & disk : disks)
+    {
+        if (!meta_path.empty() && disk->exists(meta_path))
+        {
+            DiskCacheDeleter deleter(*this, disk, 1, -1, -1);
+            deleter.exec(meta_path);
+            delete_file_size += deleter.delete_file_size;
+        }
+
+        if (!data_path.empty() && disk->exists(data_path))
+        {
+            DiskCacheDeleter deleter(*this, disk, 1, -1, -1);
+            deleter.exec(data_path);
+            delete_file_size += deleter.delete_file_size;
+        }
+    }
+
+    if (part_base_path.empty())
+    {
+        size_t dropped_bytes = total_size.load();
+        for (auto & shard : shards)
+        {
+            std::unique_lock<std::shared_mutex> lock(shard.mutex);
+            shard.cache_map.clear();
+            shard.part_index.clear();
+        }
+        total_entries.store(0);
+        total_size.store(0);
+        DiskCacheFactory::instance().releaseGlobalTTL(dropped_bytes);
+
+        std::unique_lock<std::shared_mutex> lk(cache_stats.partition_stats_mutex);
+        cache_stats.partition_stats.clear();
+    }
+    else
+    {
+        String part_name_only = extractPartName(part_base_path);
+        UInt64 hash_high = sipHash64(part_name_only.data(), part_name_only.size());
+
+        auto & shard = getShard(hash_high);
+        CacheEraseResult result;
+        {
+            std::unique_lock<std::shared_mutex> lock(shard.mutex);
+            result = cacheErasePartLocked(shard, hash_high);
+            if (result.count > 0)
+            {
+                total_entries -= result.count;
+                total_size -= result.bytes;
+                DiskCacheFactory::instance().releaseGlobalTTL(result.bytes);
+            }
+        }
+        if (result.count > 0)
+        {
+            subtractFromPartitionStats(result);
+            if (fdb_index)
+                fdb_index->evictPart(result.partition_id, result.hash_high);
+        }
+    }
+
+    LOG_TRACE(log, "Dropped {} bytes of cache for part {}", delete_file_size, part_base_path);
+    return delete_file_size;
+}
+
+// DiskIterator implementations
+DiskCacheTTL::DiskIterator::DiskIterator(
+    const String & name_, DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk_, int min_depth_parallel_, int max_depth_parallel_)
+    : name(name_), disk_cache(cache_), disk(disk_), worker_per_disk(worker_per_disk_),
+      min_depth_parallel(min_depth_parallel_), max_depth_parallel(max_depth_parallel_)
+{
+    log = &Poco::Logger::get(name);
+
+    if (worker_per_disk > 1)
+        pool = std::make_unique<ThreadPool>(worker_per_disk);
+}
+
+void DiskCacheTTL::DiskIterator::exec(std::filesystem::path entry_path)
+{
+    iterateDirectory(entry_path, 0);
+
+    if (pool)
+        pool->wait();
+}
+
+void DiskCacheTTL::DiskIterator::iterateDirectory(std::filesystem::path rel_path, size_t depth)
+{
+    if (!disk->exists(rel_path))
+        return;
+
+    for (auto it = disk->iterateDirectory(rel_path); it->isValid(); it->next())
+    {
+        auto entry_path = rel_path / it->name();
+
+        if (disk->isDirectory(entry_path))
+        {
+            iterateDirectory(entry_path, depth + 1);
+        }
+        else if (disk->isFile(entry_path))
+        {
+            iterateFile(entry_path, disk->getFileSize(entry_path));
+        }
+    }
+}
+
+// DiskCacheLoader
+DiskCacheTTL::DiskCacheLoader::DiskCacheLoader(
+    DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk_, int min_depth_parallel_, int max_depth_parallel_)
+    : DiskIterator("DiskCacheTTLLoader", cache_, disk_, worker_per_disk_, min_depth_parallel_, max_depth_parallel_)
+{
+}
+
+DiskCacheTTL::DiskCacheLoader::~DiskCacheLoader()
+{
+}
+
+void DiskCacheTTL::DiskCacheLoader::iterateFile(std::filesystem::path file_path, size_t file_size)
+{
+    String filename = file_path.filename();
+
+    // Skip temp files
+    if (endsWith(filename, DISK_CACHE_TEMP_FILE_SUFFIX))
+    {
+        disk->removeFileIfExists(file_path);
+        return;
+    }
+
+    // Skip and clean up 0-byte files — they indicate an interrupted or empty write
+    // and would cause false cache HITs returning empty content.
+    if (file_size == 0)
+    {
+        disk->removeFileIfExists(file_path);
+        return;
+    }
+
+    // Path structure: {cache_dir}/{data|meta}/{uuid}/{partition}/{3char}/{hash_high}/{hash_low}
+    // The filename is hash_low (low 64 bits of the key) and the parent dir is hash_high.
+    // Parse each half as a hex UInt64 to reconstruct the full UInt128 key.
+    if (!isHexHalf(filename))
+    {
+        LOG_WARNING(log, "Invalid cache file (hash_low): {}", file_path.string());
+        return;
+    }
+    UInt64 low = unhexUInt<UInt64>(filename.data());
+
+    // New structure: data/uuid/partition/3char/hash_high/hash_low
+    // Extract partition from path hierarchy
+    auto hash_high_dir = file_path.parent_path().filename().string();  // hash_high
+    auto partition_dir = file_path.parent_path().parent_path().parent_path().filename().string();  // partition_id
+
+    if (!isHexHalf(hash_high_dir))
+    {
+        LOG_WARNING(log, "Invalid cache directory (hash_high): {}", file_path.string());
+        return;
+    }
+    UInt64 high = unhexUInt<UInt64>(hash_high_dir.data());
+
+    // Build full key matching UInt128{high, low} as returned by unhexKey
+    UInt128 key = {high, low};
+
+    // Parse timestamp from partition_id (e.g., "20240315")
+    time_t part_ts = 0;
+    if (partition_dir.size() >= 8 && std::all_of(partition_dir.begin(), partition_dir.end(), ::isdigit))
+    {
+        try
+        {
+            int year = std::stoi(partition_dir.substr(0, 4));
+            int month = std::stoi(partition_dir.substr(4, 2));
+            int day = std::stoi(partition_dir.substr(6, 2));
+
+            struct tm tm_info = {};
+            tm_info.tm_year = year - 1900;
+            tm_info.tm_mon = month - 1;
+            tm_info.tm_mday = day;
+            tm_info.tm_isdst = -1;
+
+            part_ts = mktime(&tm_info);
+        }
+        catch (...)
+        {
+            LOG_WARNING(log, "Failed to parse partition timestamp from: {}", partition_dir);
+        }
+    }
+
+    // Skip expired or non-time-based segments; delete the stale file so it
+    // doesn't accumulate on disk across restarts.
+    if (!disk_cache.shouldCache(part_ts))
+    {
+        disk->removeFileIfExists(file_path);
+        return;
+    }
+
+    String file_path_str = file_path.string();
+    {
+        auto & shard = disk_cache.getShard(high);
+        std::unique_lock<std::shared_mutex> lock(shard.mutex);
+        auto meta = std::make_shared<DiskCacheTTLMeta>(
+            DiskCacheTTLMeta::State::Cached, disk, file_size, time(nullptr), part_ts, std::move(file_path_str)
+        );
+        disk_cache.cacheInsertLocked(shard, key, meta, partition_dir);
+        disk_cache.total_entries++;
+        disk_cache.total_size += file_size;
+        DiskCacheFactory::instance().addGlobalTTLUsage(file_size);
+    }
+
+    // Update partition stats outside shard mutex to avoid lock ordering with partition_stats_mutex
+    disk_cache.addToPartitionStats(partition_dir, part_ts, file_size);
+    disk_cache.cache_stats.cached_from_restored++;
+    disk_cache.cache_stats.cached_bytes_restored += file_size;
+
+    total_loaded++;
+}
+
+// DiskCacheMigrator (stub)
+DiskCacheTTL::DiskCacheMigrator::DiskCacheMigrator(
+    DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk_, int min_depth_parallel_, int max_depth_parallel_)
+    : DiskIterator("DiskCacheTTLMigrator", cache_, disk_, worker_per_disk_, min_depth_parallel_, max_depth_parallel_)
+{
+}
+
+DiskCacheTTL::DiskCacheMigrator::~DiskCacheMigrator()
+{
+}
+
+void DiskCacheTTL::DiskCacheMigrator::iterateFile(std::filesystem::path, size_t)
+{
+}
+
+// DiskCacheDeleter (stub)
+DiskCacheTTL::DiskCacheDeleter::DiskCacheDeleter(
+    DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk_, int min_depth_parallel_, int max_depth_parallel_)
+    : DiskIterator("DiskCacheTTLDeleter", cache_, disk_, worker_per_disk_, min_depth_parallel_, max_depth_parallel_)
+{
+}
+
+DiskCacheTTL::DiskCacheDeleter::~DiskCacheDeleter()
+{
+}
+
+void DiskCacheTTL::DiskCacheDeleter::exec(std::filesystem::path entry_path)
+{
+    disk->removeRecursive(entry_path);
+}
+
+void DiskCacheTTL::DiskCacheDeleter::iterateFile(std::filesystem::path, size_t)
+{
+}
+
+DiskCacheTTL::TTLCacheStats DiskCacheTTL::getStats() const
+{
+    TTLCacheStats stats;
+    stats.table_uuid = cache_stats.table_uuid;
+    stats.total_entries = total_entries.load();
+    stats.total_bytes = total_size.load();
+    stats.evicted_expired = cache_stats.evicted_expired.load();
+    stats.evicted_size_limit = cache_stats.evicted_size_limit.load();
+    stats.rejected_non_time_partition = cache_stats.rejected_non_time_partition.load();
+    stats.rejected_too_old = cache_stats.rejected_too_old.load();
+    stats.last_eviction_run = cache_stats.last_eviction_run.load();
+    stats.async_eviction_triggered = cache_stats.async_eviction_triggered.load();
+    stats.async_eviction_skipped_rate_limit = cache_stats.async_eviction_skipped_rate_limit.load();
+    stats.cached_from_preload = cache_stats.cached_from_preload.load();
+    stats.cached_from_query = cache_stats.cached_from_query.load();
+    stats.cached_bytes_preload = cache_stats.cached_bytes_preload.load();
+    stats.cached_bytes_query = cache_stats.cached_bytes_query.load();
+    stats.cached_from_restored = cache_stats.cached_from_restored.load();
+    stats.cached_bytes_restored = cache_stats.cached_bytes_restored.load();
+    stats.cached_idx_from_preload = cache_stats.cached_idx_from_preload.load();
+    stats.cached_idx_bytes_preload = cache_stats.cached_idx_bytes_preload.load();
+    stats.cached_idx_from_query = cache_stats.cached_idx_from_query.load();
+    stats.cached_idx_bytes_query = cache_stats.cached_idx_bytes_query.load();
+    stats.data_hits = cache_stats.data_hits.load();
+    stats.data_misses = cache_stats.data_misses.load();
+    stats.idx_hits = cache_stats.idx_hits.load();
+    stats.idx_misses = cache_stats.idx_misses.load();
+    stats.total_hits = stats.data_hits + stats.idx_hits;
+    stats.total_misses = stats.data_misses + stats.idx_misses;
+    return stats;
+}
+
+std::vector<DiskCacheTTL::PartitionStats> DiskCacheTTL::getPartitionStats() const
+{
+    std::vector<PartitionStats> result;
+    std::shared_lock<std::shared_mutex> lock(cache_stats.partition_stats_mutex);
+    result.reserve(cache_stats.partition_stats.size());
+    for (const auto & [partition_id, internal_stats] : cache_stats.partition_stats)
+    {
+        PartitionStats snapshot;
+        snapshot.partition_id = internal_stats.partition_id;
+        snapshot.entry_count = internal_stats.entry_count.load();
+        snapshot.total_bytes = internal_stats.total_bytes.load();
+        snapshot.partition_timestamp = internal_stats.partition_timestamp;
+        result.push_back(snapshot);
+    }
+    return result;
+}
+
+std::optional<String> DiskCacheTTL::findPeerOwner(const String & seg_name)
+{
+    if (!fdb_index)
+        return std::nullopt;
+
+    auto key = hash(seg_name);
+    String part_name = extractPartName(seg_name);
+    String partition_id = extractPartitionId(part_name);
+
+    auto maybe_worker_id = fdb_index->findPeerOwner(key, partition_id);
+    if (!maybe_worker_id)
+        return std::nullopt;
+
+    return DiskCacheFactory::instance().resolveWorkerEndpoint(*maybe_worker_id);
+}
+
+}
+
diff --git a/src/Storages/DiskCache/DiskCacheTTL.h b/src/Storages/DiskCache/DiskCacheTTL.h
new file mode 100644
index 00000000000..7d35c791a27
--- /dev/null
+++ b/src/Storages/DiskCache/DiskCacheTTL.h
@@ -0,0 +1,349 @@
+/*
+ * Copyright (2022) Bytedance Ltd. and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <filesystem>
+#include <memory>
+#include <unordered_map>
+#include <shared_mutex>
+#include <vector>
+#include <Common/HashTable/Hash.h>
+#include <Storages/DiskCache/IDiskCache.h>
+#include <Common/ShardCache.h>
+#include <sys/types.h>
+#include <Poco/Logger.h>
+
+namespace DB
+{
+
+class TTLCacheFDBIndex;
+
+class DiskCacheTTLMeta
+{
+public:
+    enum class State
+    {
+        Caching,
+        Cached,
+        Deleting,
+    };
+
+    DiskCacheTTLMeta(State state_, const DiskPtr & disk_, size_t size_, time_t cached_at_, time_t max_ts_, String rel_path_ = {})
+        : state(state_), disk(disk_), size(size_), cached_at(cached_at_), max_timestamp(max_ts_), rel_path(std::move(rel_path_))
+    {}
+
+    State state;
+    DiskPtr disk;
+    size_t size;
+    time_t cached_at;
+    time_t max_timestamp;
+    String rel_path;  // exact on-disk relative path; avoids reconstructing prefix (data/ vs meta/) at eviction time
+};
+
+struct DiskCacheTTLWeightFunction
+{
+    size_t operator()(const DiskCacheTTLMeta& meta) const
+    {
+        if (meta.state == DiskCacheTTLMeta::State::Cached)
+            return meta.size;
+        return 0;
+    }
+};
+
+/// TTL-based disk cache
+/// Evicts parts based on partition timestamp and retention window
+/// Parallel to DiskCacheLRU
+class DiskCacheTTL: public IDiskCache
+{
+public:
+    using KeyType = UInt128;
+
+    DiskCacheTTL(
+        const String & name_,
+        const String & table_uuid_,
+        const VolumePtr & volume,
+        const ThrottlerPtr & throttler,
+        const DiskCacheSettings & settings,
+        const IDiskCacheStrategyPtr & strategy_,
+        UInt64 ttl_minutes_,
+        size_t max_size_bytes_ = 0,  // 0 = use settings.ttl_cache_max_size
+        IDiskCache::DataType type_ = IDiskCache::DataType::ALL);
+
+    void set(const String& seg_name, ReadBuffer& value, size_t weight_hint, bool is_preload, time_t min_time = 0, time_t max_time = 0) override;
+    std::pair<DiskPtr, String> get(const String& seg_name) override;
+    void load() override;
+    size_t drop(const String & part_name) override;
+
+    size_t getKeyCount() const override { return total_entries.load(); }
+    size_t getCachedSize() const override { return total_size.load(); }
+    std::filesystem::path getRelativePath(const KeyType & key, const String & seg_name, const String & prefix = {}) { return getPath(key, latest_disk_cache_dir, seg_name, prefix);}
+
+    std::filesystem::path getPath(const KeyType & key, const String & path, const String & seg_name, const String & prefix) const;
+
+    static KeyType hash(const String & seg_name);
+    static String hexKey(const KeyType & key);
+
+    void evictExpired();
+    void evictOldestPartitionsUntilSpace(size_t needed_bytes);
+    static std::optional<KeyType> unhexKey(const String & hex);
+
+    /// Parse partition timestamp from part name
+    /// Returns 0 if partition is not time-based
+    static time_t parsePartitionTimestamp(const String & part_name);
+
+    // Stats structures for observability
+
+    // Internal stats with atomics (not copyable)
+    struct PartitionStatsInternal
+    {
+        String partition_id;
+        std::atomic<size_t> entry_count{0};
+        std::atomic<size_t> total_bytes{0};
+        time_t partition_timestamp{0};
+    };
+
+    // Snapshot for return (plain types, copyable)
+    struct PartitionStats
+    {
+        String partition_id;
+        size_t entry_count{0};
+        size_t total_bytes{0};
+        time_t partition_timestamp{0};
+    };
+
+    // Snapshot for return (plain types, copyable)
+    struct TTLCacheStats
+    {
+        String table_uuid;
+        size_t total_entries{0};
+        size_t total_bytes{0};
+
+        // TTL-specific counters
+        size_t evicted_expired{0};
+        size_t evicted_size_limit{0};
+        size_t rejected_non_time_partition{0};
+        size_t rejected_too_old{0};
+        time_t last_eviction_run{0};
+
+        // Async size-based eviction stats
+        size_t async_eviction_triggered{0};
+        size_t async_eviction_skipped_rate_limit{0};
+
+        // Write source breakdown (preload vs query-triggered vs restored from FDB on startup)
+        size_t cached_from_preload{0};
+        size_t cached_from_query{0};
+        size_t cached_bytes_preload{0};
+        size_t cached_bytes_query{0};
+        size_t cached_from_restored{0};
+        size_t cached_bytes_restored{0};
+        // Skip-index write breakdown (same events, idx extension only)
+        size_t cached_idx_from_preload{0};
+        size_t cached_idx_bytes_preload{0};
+        size_t cached_idx_from_query{0};
+        size_t cached_idx_bytes_query{0};
+
+        // Aggregated hit/miss counts across all partitions, by segment type
+        size_t total_hits{0};
+        size_t total_misses{0};
+        size_t data_hits{0};
+        size_t data_misses{0};
+        size_t idx_hits{0};
+        size_t idx_misses{0};
+    };
+
+    // Internal stats with atomics
+    struct TTLCacheStatsInternal
+    {
+        String table_uuid;
+
+        // TTL-specific counters
+        std::atomic<size_t> evicted_expired{0};
+        std::atomic<size_t> evicted_size_limit{0};
+        std::atomic<size_t> rejected_non_time_partition{0};
+        std::atomic<size_t> rejected_too_old{0};
+        std::atomic<time_t> last_eviction_run{0};
+
+        // Async size-based eviction stats
+        std::atomic<size_t> async_eviction_triggered{0};
+        std::atomic<size_t> async_eviction_skipped_rate_limit{0};
+
+        // Write source breakdown (preload vs query-triggered vs restored from FDB on startup)
+        std::atomic<size_t> cached_from_preload{0};
+        std::atomic<size_t> cached_from_query{0};
+        std::atomic<size_t> cached_bytes_preload{0};
+        std::atomic<size_t> cached_bytes_query{0};
+        std::atomic<size_t> cached_from_restored{0};
+        std::atomic<size_t> cached_bytes_restored{0};
+        // Skip-index write breakdown
+        std::atomic<size_t> cached_idx_from_preload{0};
+        std::atomic<size_t> cached_idx_bytes_preload{0};
+        std::atomic<size_t> cached_idx_from_query{0};
+        std::atomic<size_t> cached_idx_bytes_query{0};
+
+        // Aggregated hit/miss by segment type
+        std::atomic<size_t> data_hits{0};
+        std::atomic<size_t> data_misses{0};
+        std::atomic<size_t> idx_hits{0};
+        std::atomic<size_t> idx_misses{0};
+
+        // Per-partition breakdown
+        mutable std::shared_mutex partition_stats_mutex;
+        std::unordered_map<String, PartitionStatsInternal> partition_stats;
+    };
+
+    TTLCacheStats getStats() const;
+    std::vector<PartitionStats> getPartitionStats() const;
+
+    UInt64 getTTLMinutes() const { return ttl_minutes.load(std::memory_order_relaxed); }
+    size_t getMaxSizeBytes() const { return max_size_bytes.load(std::memory_order_relaxed); }
+    void setFDBIndex(std::shared_ptr<TTLCacheFDBIndex> idx) { fdb_index = std::move(idx); }
+
+    void updateSettings(UInt64 new_ttl_minutes, size_t new_max_size_bytes)
+    {
+        ttl_minutes.store(new_ttl_minutes, std::memory_order_relaxed);
+        max_size_bytes.store(new_max_size_bytes, std::memory_order_relaxed);
+    }
+
+    /// Look up whether a peer worker has this segment cached via the FDB reverse index.
+    /// Returns peer RPC endpoint if found, nullopt if not found or FDB unavailable.
+    /// Gated on fdb_index being set; caller is responsible for checking stealing mode.
+    std::optional<String> findPeerOwner(const String & seg_name);
+
+private:
+    struct CacheEraseResult {
+        String partition_id;
+        time_t partition_ts{0};
+        UInt64 hash_high{0};
+        size_t count{0};
+        size_t bytes{0};
+        std::vector<std::pair<DiskPtr, String>> files;
+    };
+
+    struct PartIndexEntry {
+        String partition_id;
+        time_t partition_ts{0};
+        std::unordered_set<KeyType, UInt128Hash> keys;
+        size_t total_bytes{0};
+    };
+
+    size_t writeSegment(ReadBuffer& buffer, ReservationPtr& reservation, const String& cache_rel_path);
+    bool shouldCache(time_t part_ts) const;
+
+    static constexpr size_t NUM_SHARDS = 64;
+
+    struct Shard {
+        mutable std::shared_mutex mutex;
+        std::unordered_map<KeyType, std::shared_ptr<DiskCacheTTLMeta>, UInt128Hash> cache_map;
+        std::unordered_map<UInt64, PartIndexEntry> part_index;
+    };
+
+    Shard & getShard(UInt64 hash_high) { return shards[hash_high & (NUM_SHARDS - 1)]; }
+
+    /// Structural helpers — caller must hold shard.mutex
+    void cacheInsertLocked(Shard & shard, KeyType key, std::shared_ptr<DiskCacheTTLMeta> meta, const String & precomputed_partition_id = {});
+    CacheEraseResult cacheEraseLocked(Shard & shard, KeyType key);
+    CacheEraseResult cacheErasePartLocked(Shard & shard, UInt64 hash_high);
+
+    /// Stats helpers — caller must NOT hold any shard mutex
+    void addToPartitionStats(const String & partition_id, time_t partition_ts, size_t bytes, size_t count = 1);
+    void subtractFromPartitionStats(const CacheEraseResult & result);
+
+    /// Apply a batch of erase results: delete files, notify FDB, update partition stats.
+    /// Caller must NOT hold any shard mutex. Increments total_evicted by result.count for each entry.
+    void applyEraseResults(std::vector<CacheEraseResult> & results, size_t & total_evicted, const char * log_tag);
+
+
+    struct DiskIterator : private boost::noncopyable
+    {
+        explicit DiskIterator(
+            const String & name_, DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk_, int min_depth_parallel_, int max_depth_parallel_);
+        virtual ~DiskIterator() = default;
+
+        virtual void exec(std::filesystem::path entry_path);
+        virtual void iterateDirectory(std::filesystem::path rel_path, size_t depth);
+        virtual void iterateFile(std::filesystem::path file_path, size_t file_size) = 0;
+
+        String name;
+        DiskCacheTTL & disk_cache;
+        DiskPtr disk;
+        size_t worker_per_disk{1};
+        int min_depth_parallel{-1};
+        int max_depth_parallel{-1};
+        std::unique_ptr<ThreadPool> pool;
+        ExceptionHandler handler;
+        Poco::Logger * log;
+    };
+
+    struct DiskCacheLoader : DiskIterator
+    {
+        explicit DiskCacheLoader(
+            DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk, int min_depth_parallel, int max_depth_parallel);
+        ~DiskCacheLoader() override;
+        void iterateFile(std::filesystem::path file_path, size_t file_size) override;
+
+        std::atomic_size_t total_loaded = 0;
+    };
+
+    struct DiskCacheMigrator : DiskIterator
+    {
+        explicit DiskCacheMigrator(
+            DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk, int min_depth_parallel, int max_depth_parallel);
+        ~DiskCacheMigrator() override;
+        void iterateFile(std::filesystem::path file_path, size_t file_size) override;
+
+        std::atomic_size_t total_migrated = 0;
+    };
+
+    struct DiskCacheDeleter : DiskIterator
+    {
+        explicit DiskCacheDeleter(
+            DiskCacheTTL & cache_, DiskPtr disk_, size_t worker_per_disk, int min_depth_parallel, int max_depth_parallel);
+        ~DiskCacheDeleter() override;
+        void exec(std::filesystem::path entry_path) override;
+        void iterateFile(std::filesystem::path file_path, size_t file_size) override;
+
+        size_t delete_file_size {0};
+    };
+
+    /// FDB-backed index for fast startup recovery
+    /// optional — null if catalog unavailable
+    std::shared_ptr<TTLCacheFDBIndex> fdb_index;
+
+    ThrottlerPtr set_rate_throttler;
+    ThrottlerPtr set_throughput_throttler;
+    std::atomic<bool> is_droping{false};
+
+    const String table_uuid;
+    std::atomic<UInt64> ttl_minutes;
+    std::atomic<size_t> max_size_bytes;  // 0 = unlimited
+
+    std::array<Shard, NUM_SHARDS> shards;
+    std::atomic<size_t> total_entries{0};
+    std::atomic<size_t> total_size{0};
+
+    /// Last eviction check time
+    std::atomic<time_t> last_eviction_check{0};
+
+    /// Last async size-based eviction trigger time
+    std::atomic<time_t> last_size_eviction_trigger{0};
+
+    /// Cache statistics
+    TTLCacheStatsInternal cache_stats;
+};
+
+}
diff --git a/src/Storages/DiskCache/IDiskCache.cpp b/src/Storages/DiskCache/IDiskCache.cpp
index 91e20554830..1e22915ecb2 100644
--- a/src/Storages/DiskCache/IDiskCache.cpp
+++ b/src/Storages/DiskCache/IDiskCache.cpp
@@ -69,9 +69,9 @@ void IDiskCache::init(const Context & global_context)
         false /*throw_on_exception*/);
 
     local_disk_cache_preload_thread_pool = std::make_unique<ThreadPool>(
-        settings.cnch_parallel_preloading,
-        settings.cnch_parallel_preloading,
-        settings.cnch_parallel_preloading * 100,
+        settings.local_disk_cache_preload_thread_pool_size,
+        settings.local_disk_cache_preload_thread_pool_size,
+        settings.local_disk_cache_preload_thread_pool_size * 100,
         false /*throw_on_exception*/);
 }
 
diff --git a/src/Storages/DiskCache/IDiskCache.h b/src/Storages/DiskCache/IDiskCache.h
index 3756560dc84..b73a450a1f2 100644
--- a/src/Storages/DiskCache/IDiskCache.h
+++ b/src/Storages/DiskCache/IDiskCache.h
@@ -82,7 +82,8 @@ class IDiskCache : public std::enable_shared_from_this<IDiskCache>
     virtual void shutdown();
 
     /// set segment name in cache and write value to disk cache
-    virtual void set(const String & key, ReadBuffer & value, size_t weight_hint, bool is_preload) = 0;
+    /// min_time/max_time: optional timestamps from part data (0 = not provided, will parse from partition_id for TTL cache)
+    virtual void set(const String & key, ReadBuffer & value, size_t weight_hint, bool is_preload, time_t min_time = 0, time_t max_time = 0) = 0;
 
     /// get segment from cache and return local path if exists.
     virtual std::pair<DiskPtr, String> get(const String & key) = 0;
@@ -200,7 +201,7 @@ class MultiDiskCache : public IDiskCache
         return dropped_size;
     }
 
-    virtual void set(const String &, ReadBuffer &, size_t, bool ) override { throw Exception("MultiDiskCache `set` is not supported now", ErrorCodes::LOGICAL_ERROR);}
+    virtual void set(const String &, ReadBuffer &, size_t, bool, time_t = 0, time_t = 0) override { throw Exception("MultiDiskCache `set` is not supported now", ErrorCodes::LOGICAL_ERROR);}
     virtual std::pair<DiskPtr, String> get(const String &) override { throw Exception("MultiDiskCache `get` is not supported now", ErrorCodes::LOGICAL_ERROR);}
     virtual void load() override { throw Exception("MultiDiskCache `load` is not supported now", ErrorCodes::LOGICAL_ERROR);}
     virtual size_t getKeyCount() const override {throw Exception("MultiDiskCache `getKeyCount` is not supported now", ErrorCodes::LOGICAL_ERROR); }
diff --git a/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp b/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp
index b2c895ffecb..ccef92ab2dc 100644
--- a/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp
+++ b/src/Storages/DiskCache/PartFileDiskCacheSegment.cpp
@@ -89,7 +89,10 @@ PartFileDiskCacheSegment::PartFileDiskCacheSegment(
           mrk_file_pos.file_size,
           merge_tree_reader_settings,
           1,
-          mark_disk_cache_)
+          mark_disk_cache_,
+          {},
+          data_part_->storage.getStorageUUID(),
+          data_part_->getUniquePartName())
 {
 }
 
@@ -180,11 +183,14 @@ void PartFileDiskCacheSegment::cacheToDisk(IDiskCache & disk_cache, bool throw_e
             != DiskCacheMode::
                 FORCE_STEAL_DISK_CACHE) // FORCE_STEAL_DISK_CACHE is used for testing, which only allow remote cache request so will skip local cache write
         {
+            // Get min/max time from part for TTL cache granularity
+            auto [min_time, max_time] = data_part->getMinMaxTime();
+
             if (!preload_level || (preload_level & PreloadLevelSettings::DataPreload) == PreloadLevelSettings::DataPreload)
             {
                 data_file->seek(stream_file_pos.file_offset + cache_data_left_offset);
                 LimitReadBuffer segment_value(*data_file, cache_data_bytes, false);
-                disk_cache.getDataCache()->set(getSegmentName(), segment_value, cache_data_bytes, preload_level > 0);
+                disk_cache.getDataCache()->set(getSegmentName(), segment_value, cache_data_bytes, preload_level > 0, min_time, max_time);
                 LOG_TRACE(disk_cache.getLogger(), "Cached part{} data file: {}, preload_level: {}", extension, getSegmentName(), preload_level);
             }
 
@@ -194,7 +200,7 @@ void PartFileDiskCacheSegment::cacheToDisk(IDiskCache & disk_cache, bool throw_e
                 data_file->seek(mrk_file_pos.file_offset);
                 LimitReadBuffer marks_value(*data_file, mrk_file_pos.file_size, false);
                 String marks_key = getMarkName();
-                disk_cache.getMetaCache()->set(marks_key, marks_value, mrk_file_pos.file_size, preload_level > 0);
+                disk_cache.getMetaCache()->set(marks_key, marks_value, mrk_file_pos.file_size, preload_level > 0, min_time, max_time);
                 LOG_TRACE(disk_cache.getLogger(), "Cached part{} mark file: {}, preload_level: {}", extension, marks_key, preload_level);
             }
 
diff --git a/src/Storages/DiskCache/PreloadRegistry.cpp b/src/Storages/DiskCache/PreloadRegistry.cpp
new file mode 100644
index 00000000000..a7ff14a27af
--- /dev/null
+++ b/src/Storages/DiskCache/PreloadRegistry.cpp
@@ -0,0 +1,71 @@
+#include "PreloadRegistry.h"
+
+namespace DB
+{
+
+PreloadRegistry & PreloadRegistry::instance()
+{
+    static PreloadRegistry inst;
+    return inst;
+}
+
+void PreloadRegistry::registerParts(
+    const String & table_name,
+    const String & table_uuid,
+    const String & partition_id,
+    size_t parts_count,
+    UInt64 preload_level)
+{
+    if (parts_count == 0)
+        return;
+
+    Key key{table_uuid, partition_id};
+    std::lock_guard lock(mu);
+    auto it = entries.find(key);
+    if (it == entries.end())
+    {
+        auto entry = std::make_shared<PreloadEntry>(table_name, table_uuid, partition_id, parts_count, preload_level);
+        entry->parts_in_flight.store(parts_count, std::memory_order_relaxed);
+        entries.emplace(key, std::move(entry));
+    }
+    else
+    {
+        it->second->parts_submitted += parts_count;
+        it->second->parts_in_flight.fetch_add(parts_count, std::memory_order_relaxed);
+    }
+}
+
+void PreloadRegistry::partFinished(const String & table_uuid, const String & partition_id)
+{
+    Key key{table_uuid, partition_id};
+    std::lock_guard lock(mu);
+    auto it = entries.find(key);
+    if (it == entries.end())
+        return;
+    if (it->second->parts_in_flight.fetch_sub(1, std::memory_order_acq_rel) == 1)
+        entries.erase(it);
+}
+
+std::vector<PreloadPartitionSnapshot> PreloadRegistry::getSnapshot() const
+{
+    std::lock_guard lock(mu);
+    std::vector<PreloadPartitionSnapshot> result;
+    result.reserve(entries.size());
+    auto now = std::chrono::steady_clock::now();
+    for (const auto & [_, e] : entries)
+    {
+        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - e->start_time).count();
+        result.push_back({
+            e->table_name,
+            e->table_uuid,
+            e->partition_id,
+            e->parts_in_flight.load(std::memory_order_relaxed),
+            e->parts_submitted,
+            static_cast<UInt64>(elapsed),
+            e->preload_level,
+        });
+    }
+    return result;
+}
+
+}
diff --git a/src/Storages/DiskCache/PreloadRegistry.h b/src/Storages/DiskCache/PreloadRegistry.h
new file mode 100644
index 00000000000..b46616d5ae3
--- /dev/null
+++ b/src/Storages/DiskCache/PreloadRegistry.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <atomic>
+#include <chrono>
+#include <mutex>
+#include <unordered_map>
+#include <vector>
+
+#include <common/types.h>
+
+namespace DB
+{
+
+struct PreloadEntry
+{
+    String table_name;
+    String table_uuid;
+    String partition_id;
+    std::atomic<size_t> parts_in_flight{0};
+    size_t parts_submitted{0};
+    std::chrono::steady_clock::time_point start_time;
+    UInt64 preload_level{0};
+
+    PreloadEntry(String tn, String uuid, String pid, size_t submitted, UInt64 level)
+        : table_name(std::move(tn))
+        , table_uuid(std::move(uuid))
+        , partition_id(std::move(pid))
+        , parts_submitted(submitted)
+        , start_time(std::chrono::steady_clock::now())
+        , preload_level(level)
+    {
+    }
+
+    // non-copyable due to atomics
+    PreloadEntry(const PreloadEntry &) = delete;
+    PreloadEntry & operator=(const PreloadEntry &) = delete;
+};
+
+struct PreloadPartitionSnapshot
+{
+    String table_name;
+    String table_uuid;
+    String partition_id;
+    size_t parts_in_flight;
+    size_t parts_submitted;
+    UInt64 elapsed_ms;
+    UInt64 preload_level;
+};
+
+/// Global registry tracking in-flight async preload tasks, grouped by (table_uuid, partition_id).
+class PreloadRegistry
+{
+public:
+    static PreloadRegistry & instance();
+
+    /// Register parts_count tasks for a partition. Returns a handle whose destructor
+    /// decrements the counter (call once per part from within the task lambda).
+    /// The entry is removed automatically when parts_in_flight drops to zero.
+    void registerParts(const String & table_name, const String & table_uuid,
+                       const String & partition_id, size_t parts_count, UInt64 preload_level);
+
+    /// Decrement in-flight count for a partition. Removes entry when it reaches zero.
+    void partFinished(const String & table_uuid, const String & partition_id);
+
+    std::vector<PreloadPartitionSnapshot> getSnapshot() const;
+
+private:
+    using Key = std::pair<String, String>; // (table_uuid, partition_id)
+    struct PairHash
+    {
+        size_t operator()(const Key & k) const
+        {
+            size_t h = std::hash<String>{}(k.first);
+            h ^= std::hash<String>{}(k.second) + 0x9e3779b9 + (h << 6) + (h >> 2);
+            return h;
+        }
+    };
+
+    mutable std::mutex mu;
+    std::unordered_map<Key, std::shared_ptr<PreloadEntry>, PairHash> entries;
+};
+
+}
diff --git a/src/Storages/DiskCache/TTLCacheFDBIndex.cpp b/src/Storages/DiskCache/TTLCacheFDBIndex.cpp
new file mode 100644
index 00000000000..5b86a05b8a2
--- /dev/null
+++ b/src/Storages/DiskCache/TTLCacheFDBIndex.cpp
@@ -0,0 +1,327 @@
+#include <Storages/DiskCache/TTLCacheFDBIndex.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+
+#include <Catalog/MetastoreCommon.h>
+#include <Catalog/MetastoreProxy.h>
+#include <Catalog/StringHelper.h>
+#include <Common/hex.h>
+#include <fmt/core.h>
+
+namespace DB
+{
+
+
+TTLCacheFDBIndex::TTLCacheFDBIndex(
+    std::shared_ptr<Catalog::IMetaStore> metastore_,
+    const String & name_space,
+    const String & worker_id,
+    const String & table_uuid,
+    const String & own_endpoint_)
+    : metastore(std::move(metastore_))
+    , key_prefix(Catalog::escapeString(name_space) + "_DCI_" + Catalog::escapeString(worker_id) + "_" + table_uuid)
+    , rev_key_prefix(Catalog::escapeString(name_space) + "_DCIREV_" + table_uuid)
+    , own_worker_id(own_endpoint_)
+    , log(&Poco::Logger::get("TTLCacheFDBIndex"))
+{
+    bg = std::thread([this] { bgLoop(); });
+}
+
+TTLCacheFDBIndex::~TTLCacheFDBIndex()
+{
+    {
+        std::lock_guard lk(mu);
+        stopped = true;
+    }
+    cv.notify_all();
+    if (bg.joinable())
+        bg.join();
+}
+
+String TTLCacheFDBIndex::makeSegKey(UInt128 key, const String & partition_id) const
+{
+    return key_prefix + "_" + partition_id + "_" + getHexUIntLowercase(key.items[0]) + "_" + getHexUIntLowercase(key.items[1]);
+}
+
+String TTLCacheFDBIndex::makePartPrefix(const String & partition_id, UInt64 hash_high) const
+{
+    return key_prefix + "_" + partition_id + "_" + getHexUIntLowercase(hash_high) + "_";
+}
+
+String TTLCacheFDBIndex::makeRevKey(UInt128 key, const String & partition_id) const
+{
+    return rev_key_prefix + "_" + partition_id + "_" + getHexUIntLowercase(key.items[0]) + "_" + getHexUIntLowercase(key.items[1]);
+}
+
+String TTLCacheFDBIndex::makeRevPartPrefix(const String & partition_id, UInt64 hash_high) const
+{
+    return rev_key_prefix + "_" + partition_id + "_" + getHexUIntLowercase(hash_high) + "_";
+}
+
+String TTLCacheFDBIndex::encodeValue(const String & seg_name, size_t size, time_t part_ts)
+{
+    // Format: "part_ts:size:seg_name"
+    // seg_name uses '/' as separator internally, no ':' — safe delimiter
+    return fmt::format("{}:{}:{}", static_cast<int64_t>(part_ts), size, seg_name);
+}
+
+bool TTLCacheFDBIndex::decodeValue(const String & raw, String & seg_name, size_t & size, time_t & part_ts)
+{
+    auto p1 = raw.find(':');
+    if (p1 == String::npos)
+        return false;
+    auto p2 = raw.find(':', p1 + 1);
+    if (p2 == String::npos)
+        return false;
+
+    try
+    {
+        part_ts = static_cast<time_t>(std::stoll(raw.substr(0, p1)));
+        size    = static_cast<size_t>(std::stoull(raw.substr(p1 + 1, p2 - p1 - 1)));
+        seg_name = raw.substr(p2 + 1);
+        return !seg_name.empty();
+    }
+    catch (...) { return false; }
+}
+
+void TTLCacheFDBIndex::onSet(UInt128 key, const String & seg_name, size_t size, time_t part_ts)
+{
+    // partition_id is the YYYYMMDD component of the file path, derived from part_ts
+    struct tm t{};
+    gmtime_r(&part_ts, &t);
+    String partition_id = fmt::format("{:04d}{:02d}{:02d}", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday);
+
+    PendingOp fwd;
+    fwd.type  = PendingOp::Type::Set;
+    fwd.key   = makeSegKey(key, partition_id);
+    fwd.value = encodeValue(seg_name, size, part_ts);
+
+    PendingOp rev;
+    rev.type  = PendingOp::Type::Set;
+    rev.key   = makeRevKey(key, partition_id);
+    rev.value = own_worker_id;
+
+    {
+        std::lock_guard lk(mu);
+        queue.push_back(std::move(fwd));
+        queue.push_back(std::move(rev));
+    }
+    cv.notify_one();
+}
+
+void TTLCacheFDBIndex::evictPart(const String & partition_id, UInt64 hash_high)
+{
+    PendingOp fwd;
+    fwd.type = PendingOp::Type::Evict;
+    fwd.key  = makePartPrefix(partition_id, hash_high);
+
+    PendingOp rev;
+    rev.type = PendingOp::Type::Evict;
+    rev.key  = makeRevPartPrefix(partition_id, hash_high);
+
+    {
+        std::lock_guard lk(mu);
+        queue.push_back(std::move(fwd));
+        queue.push_back(std::move(rev));
+    }
+    cv.notify_one();
+}
+
+void TTLCacheFDBIndex::bgLoop()
+{
+    while (true)
+    {
+        std::vector<PendingOp> batch;
+        {
+            std::unique_lock lk(mu);
+            cv.wait_for(lk, std::chrono::milliseconds(MAX_WAIT_MS),
+                [this] { return stopped || queue.size() >= BATCH_SIZE; });
+
+            if (stopped && queue.empty())
+                return;
+
+            size_t n = std::min(queue.size(), BATCH_SIZE);
+            batch.reserve(n);
+            for (size_t i = 0; i < n; ++i)
+            {
+                batch.push_back(std::move(queue.front()));
+                queue.pop_front();
+            }
+        }
+
+        if (!batch.empty())
+            flush(batch);
+    }
+}
+
+void TTLCacheFDBIndex::flush(std::vector<PendingOp> & ops)
+{
+    // Split: sets go through batchWrite, evicts go through clean() individually
+    Catalog::BatchCommitRequest batch;
+    for (auto & op : ops)
+    {
+        if (op.type == PendingOp::Type::Set)
+            batch.AddPut(Catalog::SinglePutRequest(op.key, op.value));
+    }
+
+    if (!batch.puts.empty())
+    {
+        try
+        {
+            Catalog::BatchCommitResponse resp;
+            metastore->batchWrite(batch, resp);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "TTLCacheFDBIndex: batch write failed");
+        }
+    }
+
+    for (auto & op : ops)
+    {
+        if (op.type == PendingOp::Type::Evict)
+        {
+            try { metastore->clean(op.key); }
+            catch (...) { tryLogCurrentException(log, "TTLCacheFDBIndex: clean failed for " + op.key); }
+        }
+    }
+}
+
+std::optional<String> TTLCacheFDBIndex::findPeerOwner(UInt128 key, const String & partition_id)
+{
+    String rev_key = makeRevKey(key, partition_id);
+    String endpoint;
+    try
+    {
+        if (metastore->get(rev_key, endpoint) == 0)
+            return std::nullopt;  // key not found
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log, "TTLCacheFDBIndex: findPeerOwner FDB get failed");
+        return std::nullopt;
+    }
+
+    // endpoint now holds the peer's worker_id; skip if it's ourselves
+    if (endpoint.empty() || endpoint == own_worker_id)
+        return std::nullopt;
+
+    return endpoint;  // caller resolves worker_id → host:port via DiskCacheFactory
+}
+
+std::optional<std::pair<size_t, size_t>> TTLCacheFDBIndex::reconcile(
+    const VolumePtr & volume,
+    std::function<std::filesystem::path(UInt128, const String &)> get_rel_path,
+    std::function<bool(time_t)> should_cache,
+    std::function<void(ReconcileBatch &)> on_reconcile_batch,
+    std::function<void(time_t, size_t)> on_stats_update)
+{
+    // Page through FDB in chunks to avoid hitting the 5-second transaction timeout
+    // that occurs when scanning millions of entries in a single transaction.
+    static constexpr size_t PAGE_SIZE = 100'000;
+
+    const auto & disks = volume->getDisks();
+    if (disks.empty())
+        return std::nullopt;
+
+    size_t total_restored = 0;
+    size_t total_restored_bytes = 0;
+    size_t total_stale = 0;
+    String scan_start_key;  // empty = start from key_prefix
+
+    while (true)
+    {
+        ReconcileBatch page;
+        page.reserve(PAGE_SIZE);
+        std::vector<String> stale_fwd_keys;
+        size_t page_bytes = 0;
+        size_t page_count = 0;
+        String last_key;
+
+        Catalog::IMetaStore::IteratorPtr it;
+        try { it = metastore->getByPrefix(key_prefix, PAGE_SIZE, DEFAULT_SCAN_BATCH_COUNT, scan_start_key); }
+        catch (...) { tryLogCurrentException(log, "TTLCacheFDBIndex: getByPrefix failed"); return std::nullopt; }
+
+        while (it->next())
+        {
+            last_key = it->key();
+            page_count++;
+
+            String seg_name;
+            size_t size{0};
+            time_t part_ts{0};
+
+            if (!decodeValue(it->value(), seg_name, size, part_ts))
+            {
+                LOG_WARNING(log, "TTLCacheFDBIndex reconcile: decode failed for key={} value={}", it->key(), it->value());
+                stale_fwd_keys.push_back(last_key);
+                continue;
+            }
+
+            if (!should_cache(part_ts))
+            {
+                LOG_DEBUG(log, "TTLCacheFDBIndex reconcile: TTL expired for seg={} part_ts={}", seg_name, part_ts);
+                stale_fwd_keys.push_back(last_key);
+                continue;
+            }
+
+            auto key = DiskCacheTTL::hash(seg_name);
+            auto rel_path = get_rel_path(key, seg_name);
+
+            // TODO: multi-disk JBOD support — store disk name in FDB value so reconcile can
+            // assign the correct disk without a per-file exists() scan across all disks.
+            // For now assume single-disk volume (one PVC per pod) and trust FDB as authoritative,
+            // skipping the per-file exists() syscall (too costly at millions of entries).
+            page.emplace_back(key, std::make_shared<DiskCacheTTLMeta>(
+                DiskCacheTTLMeta::State::Cached, disks[0], size, time(nullptr), part_ts, rel_path.string()));
+            page_bytes += size;
+        }
+
+        if (!page.empty())
+        {
+            on_reconcile_batch(page);
+            DiskCacheFactory::instance().addGlobalTTLUsage(page_bytes);
+            if (on_stats_update)
+            {
+                for (const auto & [key, meta] : page)
+                    on_stats_update(meta->max_timestamp, meta->size);
+            }
+        }
+
+        if (!stale_fwd_keys.empty())
+        {
+            try
+            {
+                Catalog::BatchCommitRequest batch;
+                for (const auto & fwd : stale_fwd_keys)
+                {
+                    batch.AddDelete(Catalog::SingleDeleteRequest(fwd));
+                    String rev = rev_key_prefix + fwd.substr(key_prefix.size());
+                    batch.AddDelete(Catalog::SingleDeleteRequest(rev));
+                }
+                Catalog::BatchCommitResponse resp;
+                metastore->batchWrite(batch, resp);
+                LOG_DEBUG(log, "TTLCacheFDBIndex reconcile: removed {} stale fwd+rev pairs", stale_fwd_keys.size());
+            }
+            catch (...) { tryLogCurrentException(log, "TTLCacheFDBIndex: stale cleanup failed"); }
+        }
+
+        total_restored += page.size();
+        total_restored_bytes += page_bytes;
+        total_stale += stale_fwd_keys.size();
+
+        if (page_count < PAGE_SIZE)
+            break;
+
+        // Advance past the last key seen ('\x00' suffix = next key in FDB ordering).
+        scan_start_key = last_key + '\x00';
+    }
+
+    LOG_INFO(log, "TTLCacheFDBIndex reconcile complete: {} entries restored, {} stale removed", total_restored, total_stale);
+
+    if (total_restored == 0)
+        return std::nullopt;
+    return std::make_pair(total_restored, total_restored_bytes);
+}
+
+}
diff --git a/src/Storages/DiskCache/TTLCacheFDBIndex.h b/src/Storages/DiskCache/TTLCacheFDBIndex.h
new file mode 100644
index 00000000000..e55a1b2fb47
--- /dev/null
+++ b/src/Storages/DiskCache/TTLCacheFDBIndex.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <filesystem>
+#include <functional>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <utility>
+
+#include <Catalog/IMetastore.h>
+#include <Core/Types.h>
+#include <Disks/IVolume.h>
+#include <common/logger_useful.h>
+#include <common/types.h>
+
+namespace DB
+{
+
+class DiskCacheTTL;
+class DiskCacheTTLMeta;
+
+/// FDB-backed index for DiskCacheTTL.
+/// On set(): async-writes an entry so the in-memory cache_map can be restored from
+/// FDB on the next startup instead of doing a slow disk scan.
+/// On evictPart(): issues a single FDB clean() covering all segments of a part.
+/// reconcile(): called from load() — scans FDB, verifies files on disk, populates cache_map.
+class TTLCacheFDBIndex
+{
+public:
+    TTLCacheFDBIndex(
+        std::shared_ptr<Catalog::IMetaStore> metastore_,
+        const String & name_space,
+        const String & worker_id,
+        const String & table_uuid,
+        const String & own_endpoint_);
+
+    ~TTLCacheFDBIndex();
+
+    /// Enqueue async FDB write after a segment is successfully cached.
+    void onSet(UInt128 key, const String & seg_name, size_t size, time_t part_ts);
+
+    /// Issue FDB clean() for all segments of one part (hash_high).
+    /// partition_id: YYYYMMDD string derived from max_timestamp (same as path structure).
+    void evictPart(const String & partition_id, UInt64 hash_high);
+
+    /// Look up whether any peer worker has this segment cached.
+    /// Returns peer RPC endpoint (host:port) if found, nullopt otherwise.
+    std::optional<String> findPeerOwner(UInt128 key, const String & partition_id);
+
+    /// Scan FDB index and restore cache_map.
+    /// Calls on_stats_update for each successfully restored entry so the
+    /// caller can update partition_stats without re-scanning cache_map
+    /// Returns {entries, bytes} restored, or nullopt if index is empty/unavailable.
+    using ReconcileBatch = std::vector<std::pair<UInt128, std::shared_ptr<DiskCacheTTLMeta>>>;
+
+    std::optional<std::pair<size_t, size_t>> reconcile(
+        const VolumePtr & volume,
+        std::function<std::filesystem::path(UInt128, const String &)> get_rel_path,
+        std::function<bool(time_t)> should_cache,
+        std::function<void(ReconcileBatch &)> on_reconcile_batch,
+        std::function<void(time_t, size_t)> on_stats_update = nullptr);
+
+private:
+    struct PendingOp
+    {
+        enum class Type { Set, Evict } type;
+        String key;    // full FDB key (Set) or prefix to clean (Evict)
+        String value;  // serialized entry (Set only)
+    };
+
+    void bgLoop();
+    void flush(std::vector<PendingOp> & ops);
+
+    String makeSegKey(UInt128 key, const String & partition_id) const;
+    String makePartPrefix(const String & partition_id, UInt64 hash_high) const;
+
+    static String encodeValue(const String & seg_name, size_t size, time_t part_ts);
+    static bool decodeValue(const String & raw, String & seg_name, size_t & size, time_t & part_ts);
+
+    String makeRevKey(UInt128 key, const String & partition_id) const;
+    String makeRevPartPrefix(const String & partition_id, UInt64 hash_high) const;
+
+    std::shared_ptr<Catalog::IMetaStore> metastore;
+    String key_prefix;       // escapeString(ns) + "_DCI_" + escapeString(worker_id) + "_" + table_uuid
+    String rev_key_prefix;   // escapeString(ns) + "_DCIREV_" + table_uuid
+    String own_worker_id;    // stable worker identity (WORKER_ID env), stored in DCIREV_ values and used to skip self
+
+    std::mutex mu;
+    std::deque<PendingOp> queue;
+    std::condition_variable cv;
+    std::thread bg;
+    std::atomic<bool> stopped{false};
+
+    static constexpr size_t BATCH_SIZE = 100;
+    static constexpr size_t MAX_WAIT_MS = 5000;
+
+    Poco::Logger * log;
+};
+
+}
diff --git a/src/Storages/DiskCache/tests/gtest_disk_cache_key_test.cpp b/src/Storages/DiskCache/tests/gtest_disk_cache_key_test.cpp
index 67c17a49f87..4091376e300 100644
--- a/src/Storages/DiskCache/tests/gtest_disk_cache_key_test.cpp
+++ b/src/Storages/DiskCache/tests/gtest_disk_cache_key_test.cpp
@@ -15,6 +15,7 @@
 
 #include <Core/UUID.h>
 #include <Storages/DiskCache/DiskCacheLRU.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
 #include <Storages/DiskCache/IDiskCacheSegment.h>
 #include <gtest/gtest.h>
 
@@ -45,4 +46,46 @@ TEST(DiskCache, DiskCachePathTest)
     EXPECT_NE(path1.filename(), path2.filename());
 }
 
+// TTL cache key tests
+TEST(DiskCacheTTL, UnhexKeyTest)
+{
+    String table_uuid = UUIDHelpers::UUIDToString(UUIDHelpers::generateV4());
+    String seg_key = IDiskCacheSegment::formatSegmentName(table_uuid, "20240315_1_100_2", "col", 0, ".bin");
+
+    DiskCacheTTL::KeyType key = DiskCacheTTL::hash(seg_key);
+    String hex_key = DiskCacheTTL::hexKey(key);
+    auto unhex = DiskCacheTTL::unhexKey(hex_key);
+    EXPECT_TRUE(unhex.has_value());
+    EXPECT_EQ(unhex.value(), key);
+
+    // Invalid hex keys
+    EXPECT_FALSE(DiskCacheTTL::unhexKey("invalid").has_value());
+    EXPECT_FALSE(DiskCacheTTL::unhexKey("12345").has_value());
+    EXPECT_FALSE(DiskCacheTTL::unhexKey("gggggggggggggggggggggggggggggggg").has_value());
+}
+
+TEST(DiskCacheTTL, PartitionHierarchyPathTest)
+{
+    String table_uuid = UUIDHelpers::UUIDToString(UUIDHelpers::generateV4());
+    String seg_key1 = IDiskCacheSegment::formatSegmentName(table_uuid, "20240315_1_100_2", "col", 0, ".bin");
+    String seg_key2 = IDiskCacheSegment::formatSegmentName(table_uuid, "20240315_1_100_2", "col", 0, ".mrk");
+    String seg_key3 = IDiskCacheSegment::formatSegmentName(table_uuid, "20240316_1_100_2", "col", 0, ".bin");
+
+    auto key1 = DiskCacheTTL::hash(seg_key1);
+    auto key2 = DiskCacheTTL::hash(seg_key2);
+    auto key3 = DiskCacheTTL::hash(seg_key3);
+
+    // Path structure doesn't need cache instance, just use hexKey to verify structure
+    String hex1 = DiskCacheTTL::hexKey(key1);
+    String hex2 = DiskCacheTTL::hexKey(key2);
+    String hex3 = DiskCacheTTL::hexKey(key3);
+
+    // Verify same part -> same hash_high (first half of hex)
+    EXPECT_EQ(hex1.substr(16, 16), hex2.substr(16, 16));  // hash_high for same part
+    EXPECT_NE(hex1.substr(0, 16), hex2.substr(0, 16));    // hash_low differs (different segments)
+
+    // Different partitions -> different hash_high
+    EXPECT_NE(hex1.substr(16, 16), hex3.substr(16, 16));
+}
+
 }
diff --git a/src/Storages/DiskCache/tests/gtest_disk_cache_ttl_test.cpp b/src/Storages/DiskCache/tests/gtest_disk_cache_ttl_test.cpp
new file mode 100644
index 00000000000..d062e12a54a
--- /dev/null
+++ b/src/Storages/DiskCache/tests/gtest_disk_cache_ttl_test.cpp
@@ -0,0 +1,1696 @@
+/*
+ * Copyright (2022) Bytedance Ltd. and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <filesystem>
+#include <thread>
+#include <atomic>
+#include <map>
+#include <fmt/core.h>
+#include <gtest/gtest.h>
+#include <Disks/DiskLocal.h>
+#include <Disks/SingleDiskVolume.h>
+#include <Disks/VolumeJBOD.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+#include <Storages/DiskCache/DiskCacheSettings.h>
+#include <Storages/DiskCache/DiskCacheSimpleStrategy.h>
+#include <Storages/DiskCache/TTLCacheFDBIndex.h>
+#include <Catalog/IMetastore.h>
+#include <Common/tests/gtest_global_context.h>
+#include <Common/tests/gtest_utils.h>
+#include <IO/ReadBufferFromString.h>
+#include <Poco/ConsoleChannel.h>
+#include <Poco/FormattingChannel.h>
+#include <Poco/Logger.h>
+#include <Poco/PatternFormatter.h>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+class DiskCacheTTLTest : public ::testing::Test
+{
+public:
+    static void SetUpTestCase()
+    {
+        Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t"));
+        Poco::AutoPtr<Poco::ConsoleChannel> console_chanel(new Poco::ConsoleChannel);
+        Poco::AutoPtr<Poco::FormattingChannel> channel(new Poco::FormattingChannel(formatter, console_chanel));
+        Poco::Logger::root().setLevel("trace");
+        Poco::Logger::root().setChannel(channel);
+
+        ctx = getContext().context;
+    }
+
+    static void TearDownTestCase()
+    {
+        ctx->shutdown();
+    }
+
+    void SetUp() override
+    {
+        fs::remove_all("tmp/");
+        fs::create_directories("tmp/");
+        fs::create_directory("tmp/ttl_cache/");
+        UnitTest::initLogger();
+        DB::IDiskCache::init(*getContext().context);
+    }
+
+    void TearDown() override
+    {
+        fs::remove_all("tmp/");
+        DB::IDiskCache::close();
+    }
+
+    VolumePtr createTestVolume()
+    {
+        fs::create_directory("tmp/ttl_disk/");
+        auto disk = std::make_shared<DiskLocal>("ttl_disk", "tmp/ttl_disk/", DiskStats{});
+        return std::make_shared<SingleDiskVolume>("ttl_volume", std::move(disk), 0);
+    }
+
+    VolumePtr createDualDiskVolume()
+    {
+        fs::create_directory("tmp/ttl_disk1/");
+        fs::create_directory("tmp/ttl_disk2/");
+        Disks disks;
+        disks.emplace_back(std::make_shared<DiskLocal>("ttl_disk1", "tmp/ttl_disk1/", DiskStats{}));
+        disks.emplace_back(std::make_shared<DiskLocal>("ttl_disk2", "tmp/ttl_disk2/", DiskStats{}));
+        return std::make_shared<VolumeJBOD>("ttl_dual_volume", disks, disks.front()->getName(), 0, false);
+    }
+
+    static std::shared_ptr<Context> ctx;
+};
+
+std::shared_ptr<Context> DiskCacheTTLTest::ctx = nullptr;
+
+// Test parsing partition timestamps from part names
+TEST_F(DiskCacheTTLTest, ParsePartitionTimestamp)
+{
+    // YYYYMMDD format (20240315)
+    {
+        String part_name = "20240315_1_100_2";
+        time_t ts = DiskCacheTTL::parsePartitionTimestamp(part_name);
+        ASSERT_GT(ts, 0);
+
+        struct tm tm_time;
+        gmtime_r(&ts, &tm_time);
+        ASSERT_EQ(tm_time.tm_year + 1900, 2024);
+        ASSERT_EQ(tm_time.tm_mon + 1, 3);
+        ASSERT_EQ(tm_time.tm_mday, 15);
+    }
+
+    // YYYYMMDDHH format (2024031523)
+    {
+        String part_name = "2024031523_1_100_2";
+        time_t ts = DiskCacheTTL::parsePartitionTimestamp(part_name);
+        ASSERT_GT(ts, 0);
+
+        struct tm tm_time;
+        gmtime_r(&ts, &tm_time);
+        ASSERT_EQ(tm_time.tm_year + 1900, 2024);
+        ASSERT_EQ(tm_time.tm_mon + 1, 3);
+        ASSERT_EQ(tm_time.tm_mday, 15);
+        ASSERT_EQ(tm_time.tm_hour, 23);
+    }
+
+    // YYYYMM format (202403)
+    {
+        String part_name = "202403_1_100_2";
+        time_t ts = DiskCacheTTL::parsePartitionTimestamp(part_name);
+        ASSERT_GT(ts, 0);
+
+        struct tm tm_time;
+        gmtime_r(&ts, &tm_time);
+        ASSERT_EQ(tm_time.tm_year + 1900, 2024);
+        ASSERT_EQ(tm_time.tm_mon + 1, 3);
+        ASSERT_EQ(tm_time.tm_mday, 1);
+    }
+
+    // Non-time partition (string partition)
+    {
+        String part_name = "some_partition_1_100_2";
+        time_t ts = DiskCacheTTL::parsePartitionTimestamp(part_name);
+        ASSERT_EQ(ts, 0);
+    }
+
+    // Invalid format
+    {
+        String part_name = "999_1_100_2";
+        time_t ts = DiskCacheTTL::parsePartitionTimestamp(part_name);
+        ASSERT_EQ(ts, 0);
+    }
+
+    // Empty partition
+    {
+        String part_name = "_1_100_2";
+        time_t ts = DiskCacheTTL::parsePartitionTimestamp(part_name);
+        ASSERT_EQ(ts, 0);
+    }
+}
+
+// Test TTL behavior through set/get operations (tests shouldCache indirectly)
+TEST_F(DiskCacheTTLTest, TTLBehaviorThroughOperations)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60; // 1 hour TTL
+    DiskCacheTTL cache("test_ttl", "test-uuid-0000-0000-0000-000000000001", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+
+    // Recent partition (30 minutes old) - should cache
+    {
+        struct tm tm_time;
+        time_t recent_time = now - (30 * 60);
+        gmtime_r(&recent_time, &tm_time);
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_time.tm_year + 1900, tm_time.tm_mon + 1, tm_time.tm_mday);
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000001/{}/col.bin/offset_0", part);
+
+        String data = "test";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+
+        auto [disk, path] = cache.get(seg);
+        ASSERT_FALSE(path.empty()); // Should be cached
+    }
+
+    // Old partition (2 hours old) - should not cache
+    {
+        struct tm tm_time;
+        time_t old_time = now - (2 * 60 * 60);
+        gmtime_r(&old_time, &tm_time);
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_time.tm_year + 1900, tm_time.tm_mon + 1, tm_time.tm_mday);
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000001/{}/col.bin/offset_1", part);
+
+        String data = "test";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+
+        auto [disk, path] = cache.get(seg);
+        ASSERT_TRUE(path.empty()); // Should NOT be cached
+    }
+}
+
+// ttl_minutes=0 means "cache nothing" — all writes are rejected
+TEST_F(DiskCacheTTLTest, TTLZeroRejectsAll)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, 0, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_time;
+    gmtime_r(&now, &tm_time);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_time.tm_year + 1900, tm_time.tm_mon + 1, tm_time.tm_mday);
+    String seg = fmt::format("test-uuid-0000-0000-0000-000000000002/{}/col.bin/offset_0", part);
+
+    String data = "test";
+    ReadBufferFromString buf(data);
+    cache.set(seg, buf, data.size(), false);
+
+    auto [disk, path] = cache.get(seg);
+    ASSERT_TRUE(path.empty()); // ttl_minutes=0 rejects all writes
+    ASSERT_EQ(cache.getStats().rejected_too_old, 1u);
+}
+
+// Test non-time partitions are rejected
+TEST_F(DiskCacheTTLTest, RejectNonTimePartitions)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    // String partition (non-time)
+    String nontime_part = "string_partition_1_100_2";
+    String nontime_seg = fmt::format("test_uuid/{}/column.bin/offset_0", nontime_part);
+
+    // Numeric but invalid date partition
+    String invalid_part = "999_1_100_2";
+    String invalid_seg = fmt::format("test_uuid/{}/column.bin/offset_1", invalid_part);
+
+    // Try to cache non-time partitions - should be rejected
+    {
+        String data = "test data";
+        ReadBufferFromString buf1(data);
+        ReadBufferFromString buf2(data);
+        cache.set(nontime_seg, buf1, data.size(), false);
+        cache.set(invalid_seg, buf2, data.size(), false);
+
+        // Should not be cached
+        auto [disk1, path1] = cache.get(nontime_seg);
+        auto [disk2, path2] = cache.get(invalid_seg);
+        ASSERT_TRUE(path1.empty());
+        ASSERT_TRUE(path2.empty());
+    }
+
+    ASSERT_EQ(cache.getKeyCount(), 0);
+}
+
+// Test basic set/get operations with TTL filtering
+TEST_F(DiskCacheTTLTest, BasicOperations)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60; // 1 hour TTL
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+
+    // Create recent segment name (should be cached)
+    struct tm tm_recent;
+    gmtime_r(&now, &tm_recent);
+    String recent_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_recent.tm_year + 1900, tm_recent.tm_mon + 1, tm_recent.tm_mday);
+    String recent_seg = fmt::format("test_uuid/{}/column.bin/offset_123", recent_part);
+
+    // Create old segment name (should not be cached)
+    time_t old_time = now - (2 * 60 * 60); // 2 hours ago
+    struct tm tm_old;
+    gmtime_r(&old_time, &tm_old);
+    String old_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_old.tm_year + 1900, tm_old.tm_mon + 1, tm_old.tm_mday);
+    String old_seg = fmt::format("test_uuid/{}/column.bin/offset_456", old_part);
+
+    // Try to set recent segment - should succeed
+    {
+        String test_data = "test data content";
+        ReadBufferFromString buffer(test_data);
+        cache.set(recent_seg, buffer, test_data.size(), false);
+
+        auto [disk, path] = cache.get(recent_seg);
+        ASSERT_FALSE(path.empty());
+        ASSERT_TRUE(disk != nullptr);
+    }
+
+    // Try to set old segment - should be rejected (not cached due to TTL)
+    {
+        String test_data = "old data content";
+        ReadBufferFromString buffer(test_data);
+        cache.set(old_seg, buffer, test_data.size(), false);
+
+        auto [disk, path] = cache.get(old_seg);
+        ASSERT_TRUE(path.empty()); // Should not be cached
+    }
+}
+
+// Test eviction of expired entries
+TEST_F(DiskCacheTTLTest, EvictExpired)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60; // 1 hour TTL
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+
+    // Create recent partition (30 minutes old - should survive)
+    struct tm tm_recent;
+    time_t recent_time = now - (30 * 60);
+    gmtime_r(&recent_time, &tm_recent);
+    String recent_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_recent.tm_year + 1900, tm_recent.tm_mon + 1, tm_recent.tm_mday);
+    String recent_seg = fmt::format("test-uuid-0000-0000-0000-000000000005/{}/column.bin/offset_0", recent_part);
+
+    // Create old partition (2 hours old - should be evicted)
+    struct tm tm_old;
+    time_t old_time = now - (2 * 60 * 60);
+    gmtime_r(&old_time, &tm_old);
+    String old_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_old.tm_year + 1900, tm_old.tm_mon + 1, tm_old.tm_mday);
+    String old_seg = fmt::format("test-uuid-0000-0000-0000-000000000005/{}/column.bin/offset_1", old_part);
+
+    // Add both segments
+    String data = "test data";
+    ReadBufferFromString buf1(data);
+    ReadBufferFromString buf2(data);
+    cache.set(recent_seg, buf1, data.size(), false);
+    cache.set(old_seg, buf2, data.size(), false);
+
+    // Verify both exist initially
+    size_t initial_count = cache.getKeyCount();
+    ASSERT_EQ(initial_count, 2);
+
+    // Wait a moment for potential background eviction
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+    // Old partition should be evicted, recent should remain
+    ASSERT_EQ(cache.getKeyCount(), 1);
+
+    auto [disk1, path1] = cache.get(recent_seg);
+    auto [disk2, path2] = cache.get(old_seg);
+
+    ASSERT_FALSE(path1.empty());  // Recent still cached
+    ASSERT_TRUE(path2.empty());   // Old evicted
+}
+
+// Periodic eviction is tested indirectly through EvictExpired test
+// (eviction happens automatically every hour during get() operations)
+
+// Test concurrent set/get operations
+TEST_F(DiskCacheTTLTest, ConcurrentAccess)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 10 * 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    std::vector<std::thread> threads;
+    std::atomic<int> success_count{0};
+
+    // Multiple threads writing different segments
+    for (int i = 0; i < 10; i++)
+    {
+        threads.emplace_back([&, i]() {
+            String seg_name = fmt::format("test_uuid/{}/col.bin/offset_{}", part, i);
+            String data = fmt::format("data_{}", i);
+            ReadBufferFromString buffer(data);
+            cache.set(seg_name, buffer, data.size(), false);
+
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+
+            auto [disk, path] = cache.get(seg_name);
+            if (!path.empty())
+                success_count++;
+        });
+    }
+
+    for (auto& t : threads)
+        t.join();
+
+    ASSERT_EQ(success_count, 10);
+    ASSERT_EQ(cache.getKeyCount(), 10);
+}
+
+// Test drop() method removes part segments
+TEST_F(DiskCacheTTLTest, DropPart)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part1 = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+    String part2 = fmt::format("{:04d}{:02d}{:02d}_2_200_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    // Add segments for two parts
+    for (int i = 0; i < 3; i++)
+    {
+        String seg1 = fmt::format("test_uuid/{}/col.bin/offset_{}", part1, i);
+        String seg2 = fmt::format("test_uuid/{}/col.bin/offset_{}", part2, i);
+
+        String data = "test data";
+        ReadBufferFromString buf1(data);
+        ReadBufferFromString buf2(data);
+        cache.set(seg1, buf1, data.size(), false);
+        cache.set(seg2, buf2, data.size(), false);
+    }
+
+    size_t initial_count = cache.getKeyCount();
+    ASSERT_EQ(initial_count, 6);
+
+    // Drop part1 — path must include the UUID prefix used in segment names
+    cache.drop("test_uuid/" + part1);
+    ASSERT_EQ(cache.getKeyCount(), 3);
+
+    // Verify part1 gone, part2 remains
+    String seg1_check = fmt::format("test_uuid/{}/col.bin/offset_0", part1);
+    String seg2_check = fmt::format("test_uuid/{}/col.bin/offset_0", part2);
+
+    auto [disk1, path1] = cache.get(seg1_check);
+    auto [disk2, path2] = cache.get(seg2_check);
+
+    ASSERT_TRUE(path1.empty());
+    ASSERT_FALSE(path2.empty());
+}
+
+// Test cache stats
+TEST_F(DiskCacheTTLTest, CacheStats)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    ASSERT_EQ(cache.getKeyCount(), 0);
+    ASSERT_EQ(cache.getCachedSize(), 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    // Add entries
+    for (int i = 0; i < 5; i++)
+    {
+        String seg_name = fmt::format("test_uuid/{}/col.bin/offset_{}", part, i);
+        String data = String(100, 'a');
+        ReadBufferFromString buffer(data);
+        cache.set(seg_name, buffer, data.size(), false);
+    }
+
+    ASSERT_EQ(cache.getKeyCount(), 5);
+    ASSERT_GT(cache.getCachedSize(), 0);
+}
+
+// Test multi-disk volume
+TEST_F(DiskCacheTTLTest, MultiDiskVolume)
+{
+    auto volume = createDualDiskVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    // Add multiple segments to trigger distribution across disks
+    for (int i = 0; i < 10; i++)
+    {
+        String seg_name = fmt::format("test_uuid/{}/col.bin/offset_{}", part, i);
+        String data = String(1000, 'a');
+        ReadBufferFromString buffer(data);
+        cache.set(seg_name, buffer, data.size(), false);
+    }
+
+    ASSERT_EQ(cache.getKeyCount(), 10);
+
+    // Verify all can be retrieved
+    for (int i = 0; i < 10; i++)
+    {
+        String seg_name = fmt::format("test_uuid/{}/col.bin/offset_{}", part, i);
+        auto [disk, path] = cache.get(seg_name);
+        ASSERT_FALSE(path.empty());
+        ASSERT_TRUE(disk != nullptr);
+    }
+}
+
+// Test detailed statistics collection
+TEST_F(DiskCacheTTLTest, DetailedStats)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 10 * 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+
+    // Add recent entries (should be cached)
+    struct tm tm_recent;
+    time_t recent_time = now - (30 * 60);
+    gmtime_r(&recent_time, &tm_recent);
+    String recent_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_recent.tm_year + 1900, tm_recent.tm_mon + 1, tm_recent.tm_mday);
+
+    for (int i = 0; i < 5; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000b/{}/col.bin/offset_{}", recent_part, i);
+        String data = String(100, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Try to add old entries (should be rejected)
+    struct tm tm_old;
+    time_t old_time = now - (2 * 60 * 60);
+    gmtime_r(&old_time, &tm_old);
+    String old_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_old.tm_year + 1900, tm_old.tm_mon + 1, tm_old.tm_mday);
+
+    for (int i = 0; i < 3; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000b/{}/col.bin/offset_{}", old_part, i);
+        String data = String(100, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Try to add non-time partition (should be rejected)
+    String nontime_part = "string_partition_1_100_2";
+    for (int i = 0; i < 2; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000b/{}/col.bin/offset_{}", nontime_part, i);
+        String data = String(100, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Check global stats
+    auto stats = cache.getStats();
+    ASSERT_EQ(stats.table_uuid, "test-uuid-0000-0000-0000-00000000000b");
+    ASSERT_EQ(stats.total_entries, 5); // Only recent entries
+    ASSERT_GT(stats.total_bytes, 0);
+    ASSERT_EQ(stats.rejected_too_old, 3); // Old entries rejected
+    ASSERT_EQ(stats.rejected_non_time_partition, 2); // Non-time rejected
+
+    // Perform gets (hits)
+    for (int i = 0; i < 5; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000b/{}/col.bin/offset_{}", recent_part, i);
+        auto [disk, path] = cache.get(seg);
+        ASSERT_FALSE(path.empty());
+    }
+
+    // Perform gets (misses)
+    for (int i = 0; i < 3; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000b/{}/col.bin/offset_{}", old_part, i);
+        auto [disk, path] = cache.get(seg);
+        ASSERT_TRUE(path.empty());
+    }
+
+    // Check partition stats
+    auto partition_stats = cache.getPartitionStats();
+    ASSERT_GE(partition_stats.size(), 1);
+
+    // Find recent partition stats
+    String recent_partition_id = fmt::format("{:04d}{:02d}{:02d}",
+        tm_recent.tm_year + 1900, tm_recent.tm_mon + 1, tm_recent.tm_mday);
+
+    bool found_recent = false;
+    for (const auto & ps : partition_stats)
+    {
+        if (ps.partition_id == recent_partition_id)
+        {
+            found_recent = true;
+            ASSERT_EQ(ps.entry_count, 5);
+            ASSERT_GT(ps.total_bytes, 0);
+            break;
+        }
+    }
+    ASSERT_TRUE(found_recent);
+}
+
+// Test stats after eviction
+TEST_F(DiskCacheTTLTest, StatsAfterEviction)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+
+    // Add recent entries
+    struct tm tm_recent;
+    time_t recent_time = now - (30 * 60);
+    gmtime_r(&recent_time, &tm_recent);
+    String recent_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_recent.tm_year + 1900, tm_recent.tm_mon + 1, tm_recent.tm_mday);
+
+    for (int i = 0; i < 3; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000c/{}/col.bin/offset_{}", recent_part, i);
+        String data = "test data";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Add old entries (will be cached initially but evicted later)
+    struct tm tm_old;
+    time_t old_time = now - (2 * 60 * 60);
+    gmtime_r(&old_time, &tm_old);
+    String old_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_old.tm_year + 1900, tm_old.tm_mon + 1, tm_old.tm_mday);
+
+    for (int i = 0; i < 2; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000c/{}/col.bin/offset_{}", old_part, i);
+        String data = "test data";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    auto stats_before = cache.getStats();
+    size_t entries_before = stats_before.total_entries;
+
+    // Wait for eviction
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+    auto stats_after = cache.getStats();
+
+    // Old entries should be evicted
+    ASSERT_LT(stats_after.total_entries, entries_before);
+    ASSERT_GT(stats_after.evicted_expired, 0);
+    ASSERT_GT(stats_after.last_eviction_run, 0);
+}
+
+// Test per-partition hit rate calculation
+TEST_F(DiskCacheTTLTest, PartitionHitRate)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+    String partition_id = fmt::format("{:04d}{:02d}{:02d}",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    // Add 10 segments
+    for (int i = 0; i < 10; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000d/{}/col.bin/offset_{}", part, i);
+        String data = "test";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Hit 7 segments, miss 3
+    for (int i = 0; i < 7; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000d/{}/col.bin/offset_{}", part, i);
+        auto [disk, path] = cache.get(seg);
+        ASSERT_FALSE(path.empty());
+    }
+
+    for (int i = 10; i < 13; i++) // Non-existent segments
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000d/{}/col.bin/offset_{}", part, i);
+        auto [disk, path] = cache.get(seg);
+        ASSERT_TRUE(path.empty());
+    }
+
+    auto partition_stats = cache.getPartitionStats();
+    bool found = false;
+    for (const auto & ps : partition_stats)
+    {
+        if (ps.partition_id == partition_id)
+        {
+            found = true;
+            ASSERT_GT(ps.entry_count, 0);
+            ASSERT_GT(ps.total_bytes, 0);
+            break;
+        }
+    }
+    ASSERT_TRUE(found);
+}
+
+// Test async size-based eviction
+TEST_F(DiskCacheTTLTest, AsyncSizeBasedEviction)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;  // 1MB limit
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test_async_eviction", "test-uuid-0000-0000-0000-00000000000e", volume, nullptr, settings, strategy, ttl_minutes, 1024 * 1024);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+
+    // Fill cache to ~95% (trigger async eviction threshold of 90%)
+    size_t segment_size = 100 * 1024;  // 100KB per segment
+    int segments_to_add = 10;  // 1MB total
+
+    for (int i = 0; i < segments_to_add; i++)
+    {
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000e/{}/col.bin/offset_{}", part, i);
+
+        String data = String(segment_size, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    auto stats_before = cache.getStats();
+    ASSERT_GT(stats_before.total_bytes, settings.ttl_cache_max_size * 0.90);
+    ASSERT_EQ(stats_before.async_eviction_triggered, 0);
+
+    // Add one more segment - should trigger async eviction
+    {
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000e/{}/col.bin/offset_trigger", part);
+
+        String data = String(segment_size, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Check that async eviction was triggered
+    auto stats_after = cache.getStats();
+    ASSERT_EQ(stats_after.async_eviction_triggered, 1);
+
+    // Wait for async eviction to complete
+    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+
+    // Verify some space was freed
+    auto stats_final = cache.getStats();
+    ASSERT_GT(stats_final.evicted_size_limit, 0);
+
+    // Try adding another segment immediately - should be rate limited
+    {
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+        String seg = fmt::format("test-uuid-0000-0000-0000-00000000000e/{}/col.bin/offset_rate_limit", part);
+
+        String data = String(segment_size, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Should be rate limited (still 1 trigger, but skipped counter increased)
+    auto stats_rate_limit = cache.getStats();
+    ASSERT_EQ(stats_rate_limit.async_eviction_triggered, 1);
+    ASSERT_GT(stats_rate_limit.async_eviction_skipped_rate_limit, 0);
+}
+
+// Test explicit min/max time parameters override partition_id parsing
+TEST_F(DiskCacheTTLTest, ExplicitTimestamps)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60; // 1 hour TTL
+    DiskCacheTTL cache("test_ttl", "test-uuid-0000-0000-0000-000000000010", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+
+    // Use old partition_id (2 hours ago) that would be rejected by partition parsing
+    time_t old_time = now - (2 * 60 * 60);
+    struct tm tm_old;
+    gmtime_r(&old_time, &tm_old);
+    String old_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_old.tm_year + 1900, tm_old.tm_mon + 1, tm_old.tm_mday);
+
+    // Test 1: Without explicit timestamps - should be rejected (partition is old)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000010/{}/col.bin/offset_0", old_part);
+        String data = "test1";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+
+        auto [disk, path] = cache.get(seg);
+        ASSERT_TRUE(path.empty()); // Should NOT be cached (old partition)
+    }
+
+    // Test 2: With explicit max_time (recent) - should be cached despite old partition_id
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000010/{}/col.bin/offset_1", old_part);
+        String data = "test2";
+        ReadBufferFromString buf(data);
+
+        time_t recent_max_time = now - (30 * 60); // 30 minutes ago (within TTL)
+        cache.set(seg, buf, data.size(), false, 0, recent_max_time);
+
+        auto [disk, path] = cache.get(seg);
+        ASSERT_FALSE(path.empty()); // Should BE cached (explicit max_time is recent)
+    }
+
+    // Test 3: With explicit max_time (old) - should be rejected
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000010/{}/col.bin/offset_2", old_part);
+        String data = "test3";
+        ReadBufferFromString buf(data);
+
+        time_t old_max_time = now - (90 * 60); // 90 minutes ago (outside TTL)
+        cache.set(seg, buf, data.size(), false, 0, old_max_time);
+
+        auto [disk, path] = cache.get(seg);
+        ASSERT_TRUE(path.empty()); // Should NOT be cached (explicit max_time is old)
+    }
+}
+
+// Test preload vs query stats tracking
+TEST_F(DiskCacheTTLTest, PreloadQueryStats)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60; // 1 hour TTL
+    DiskCacheTTL cache("test_ttl", "test-uuid-0000-0000-0000-000000000011", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    // Cache with preload=false (query-triggered)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000011/{}/col.bin/offset_0", part);
+        String data = String(100, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false); // is_preload=false
+
+        auto stats = cache.getStats();
+        ASSERT_EQ(stats.cached_from_query, 1);
+        ASSERT_EQ(stats.cached_bytes_query, 100);
+        ASSERT_EQ(stats.cached_from_preload, 0);
+        ASSERT_EQ(stats.cached_bytes_preload, 0);
+    }
+
+    // Cache with preload=true (background preload)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000011/{}/col.bin/offset_1", part);
+        String data = String(200, 'b');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), true); // is_preload=true
+
+        auto stats = cache.getStats();
+        ASSERT_EQ(stats.cached_from_query, 1);
+        ASSERT_EQ(stats.cached_bytes_query, 100);
+        ASSERT_EQ(stats.cached_from_preload, 1);
+        ASSERT_EQ(stats.cached_bytes_preload, 200);
+    }
+
+    // Cache more query-triggered segments
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000011/{}/col.bin/offset_2", part);
+        String data = String(50, 'c');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false); // is_preload=false
+
+        auto stats = cache.getStats();
+        ASSERT_EQ(stats.cached_from_query, 2);
+        ASSERT_EQ(stats.cached_bytes_query, 150);
+        ASSERT_EQ(stats.cached_from_preload, 1);
+        ASSERT_EQ(stats.cached_bytes_preload, 200);
+    }
+}
+
+// Test unlimited per-table cache (constrained only by global limit)
+TEST_F(DiskCacheTTLTest, UnlimitedPerTable)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 0;  // No worker-level per-table default
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+
+    // Pass max_size_bytes=0 → unlimited per-table (constrained by global)
+    DiskCacheTTL cache("test_unlimited", "test-uuid-0000-0000-0000-000000000012", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+        tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    // Cache some data - no per-table limit check
+    for (int i = 0; i < 5; i++)
+    {
+        String seg = fmt::format("test-uuid-0000-0000-0000-000000000012/{}/col.bin/offset_{}", part, i);
+        String data = String(1024, 'x');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    // Verify cached (no per-table eviction triggered)
+    auto stats = cache.getStats();
+    ASSERT_EQ(stats.async_eviction_triggered, 0);  // No local eviction
+    ASSERT_EQ(stats.total_entries, 5);
+}
+
+// Test 2-tier precedence: per-table max_size_bytes > worker ttl_cache_max_size > unlimited (0)
+TEST_F(DiskCacheTTLTest, SizeLimitPrecedence)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 10 * 1024 * 1024;  // 10MB worker-level limit
+    settings.ttl_cache_max_percent = 80;  // Would be larger than 10MB
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+
+    // Test 1: Per-table limit (1MB) overrides worker-level (10MB)
+    {
+        DiskCacheTTL cache("test_per_table", "test-uuid-0000-0000-0000-000000000013",
+                          volume, nullptr, settings, strategy, ttl_minutes, 1024 * 1024);
+
+        time_t now = time(nullptr);
+        struct tm tm_now;
+        gmtime_r(&now, &tm_now);
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+        // Fill to 95% of 1MB (should trigger at 90%)
+        size_t segment_size = 100 * 1024;  // 100KB per segment
+        for (int i = 0; i < 10; i++)
+        {
+            String seg = fmt::format("test-uuid-0000-0000-0000-000000000013/{}/col.bin/offset_{}", part, i);
+            String data = String(segment_size, 'a');
+            ReadBufferFromString buf(data);
+            cache.set(seg, buf, data.size(), false);
+        }
+
+        // Should trigger eviction at 1MB limit, not 10MB
+        auto stats = cache.getStats();
+        ASSERT_GT(stats.async_eviction_triggered, 0);
+    }
+
+    // Test 2: Worker-level limit (10MB) used when per-table = 0
+    {
+        DiskCacheTTL cache("test_worker_level", "test-uuid-0000-0000-0000-000000000014",
+                          volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+        time_t now = time(nullptr);
+        struct tm tm_now;
+        gmtime_r(&now, &tm_now);
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+        // Fill to 95% of 10MB
+        size_t segment_size = 1024 * 1024;  // 1MB per segment
+        for (int i = 0; i < 10; i++)
+        {
+            String seg = fmt::format("test-uuid-0000-0000-0000-000000000014/{}/col.bin/offset_{}", part, i);
+            String data = String(segment_size, 'b');
+            ReadBufferFromString buf(data);
+            cache.set(seg, buf, data.size(), false);
+        }
+
+        // Should trigger eviction at 10MB limit
+        auto stats = cache.getStats();
+        ASSERT_GT(stats.async_eviction_triggered, 0);
+    }
+
+    // Test 3: Unlimited when both per-table and worker-level = 0
+    {
+        DiskCacheSettings settings_no_limit;
+        settings_no_limit.ttl_cache_max_size = 0;
+        auto strategy_no_limit = std::make_shared<DiskCacheSimpleStrategy>(settings_no_limit);
+
+        DiskCacheTTL cache("test_unlimited", "test-uuid-0000-0000-0000-000000000015",
+                          volume, nullptr, settings_no_limit, strategy_no_limit, ttl_minutes, 0);
+
+        time_t now = time(nullptr);
+        struct tm tm_now;
+        gmtime_r(&now, &tm_now);
+        String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2",
+            tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+        // Cache data - no per-table limit
+        for (int i = 0; i < 5; i++)
+        {
+            String seg = fmt::format("test-uuid-0000-0000-0000-000000000015/{}/col.bin/offset_{}", part, i);
+            String data = String(1024, 'c');
+            ReadBufferFromString buf(data);
+            cache.set(seg, buf, data.size(), false);
+        }
+
+        // No per-table eviction (unlimited, only constrained by global)
+        auto stats = cache.getStats();
+        ASSERT_EQ(stats.async_eviction_triggered, 0);
+        ASSERT_EQ(stats.total_entries, 5);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+static String makeSegKey(const String & uuid, const String & part, const String & col, const String & ext)
+{
+    return fmt::format("{}/{}/{}#0{}", uuid, part, col, ext);
+}
+
+static String todayPart()
+{
+    time_t now = time(nullptr);
+    struct tm t;
+    gmtime_r(&now, &t);
+    return fmt::format("{:04d}{:02d}{:02d}_1_100_2", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday);
+}
+
+static String expiredPart()
+{
+    time_t ts = time(nullptr) - 2 * 24 * 3600;
+    struct tm t;
+    gmtime_r(&ts, &t);
+    return fmt::format("{:04d}{:02d}{:02d}_1_100_2", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday);
+}
+
+// ---------------------------------------------------------------------------
+// In-memory IMetaStore mock
+// ---------------------------------------------------------------------------
+
+class MockMetaStore : public Catalog::IMetaStore
+{
+public:
+    struct MockIterator : public Iterator
+    {
+        std::vector<std::pair<String, String>> entries;
+        int pos = -1;
+        bool next() override { return ++pos < static_cast<int>(entries.size()); }
+        String key()   override { return entries[pos].first; }
+        String value() override { return entries[pos].second; }
+    };
+
+    void put(const String & key, const String & value, bool = false) override { store[key] = value; }
+    std::pair<bool, String> putCAS(const String &, const String &, const String &, bool) override { return {false, {}}; }
+    uint64_t get(const String & key, String & value) override
+    {
+        auto it = store.find(key);
+        if (it == store.end()) return 0;
+        value = it->second;
+        return 1;
+    }
+    std::vector<std::pair<String, UInt64>> multiGet(const std::vector<String> &) override { return {}; }
+    bool batchWrite(const Catalog::BatchCommitRequest &, Catalog::BatchCommitResponse &) override { return true; }
+    void drop(const String & key, const UInt64 &) override { store.erase(key); }
+    void drop(const String & key, const String &)  override { store.erase(key); }
+    IteratorPtr getAll() override { return getByPrefix(""); }
+    IteratorPtr getByPrefix(const String & prefix, const size_t & limit = 0, uint32_t = 0, const String & start_key = "") override
+    {
+        auto iter = std::make_shared<MockIterator>();
+        for (auto & [k, v] : store)
+        {
+            if (!k.starts_with(prefix))
+                continue;
+            // start_key is inclusive (FIRST_GREATER_OR_EQUAL semantics for first batch)
+            if (!start_key.empty() && k < start_key)
+                continue;
+            iter->entries.emplace_back(k, v);
+            if (limit > 0 && iter->entries.size() >= limit)
+                break;
+        }
+        return iter;
+    }
+    IteratorPtr getByRange(const String &, const String &, bool, bool) override { return std::make_shared<MockIterator>(); }
+    void clean(const String & prefix) override
+    {
+        for (auto it = store.begin(); it != store.end(); )
+            it = it->first.starts_with(prefix) ? store.erase(it) : std::next(it);
+    }
+    void close() override {}
+    uint32_t getMaxBatchSize() override { return 1000; }
+    uint32_t getMaxKVSize()    override { return 1024 * 1024; }
+
+    std::map<String, String> store;
+};
+
+// ---------------------------------------------------------------------------
+// Parameterized: set / get / evict for .bin, .mrk, .idx
+// ---------------------------------------------------------------------------
+
+struct SegCase { const char * ext; const char * expected_prefix; };
+
+class SegmentPrefixTest : public DiskCacheTTLTest,
+                          public ::testing::WithParamInterface<SegCase> {};
+
+INSTANTIATE_TEST_SUITE_P(AllTypes, SegmentPrefixTest, ::testing::Values(
+    SegCase{".bin", "data/"},
+    SegCase{".mrk", "meta/"},
+    SegCase{".idx", "meta/"}
+));
+
+TEST_P(SegmentPrefixTest, SetGoesToCorrectDir)
+{
+    auto p = GetParam();
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 64 * 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, 60, 0);
+
+    String seg = makeSegKey("aaaa-bbbb", todayPart(), "col", p.ext);
+    String data = "payload";
+    ReadBufferFromString buf(data);
+    cache.set(seg, buf, data.size(), false);
+
+    auto [disk, path] = cache.get(seg);
+    ASSERT_FALSE(path.empty()) << "segment not found after set: " << seg;
+    EXPECT_NE(path.find(p.expected_prefix), String::npos)
+        << p.ext << " should be under " << p.expected_prefix << " but path=" << path;
+    // Also verify the file actually exists at the returned path
+    ASSERT_TRUE(disk);
+    EXPECT_TRUE(disk->exists(path)) << "file missing on disk at: " << path;
+}
+
+TEST_P(SegmentPrefixTest, GetReturnsExistingFile)
+{
+    auto p = GetParam();
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 64 * 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, 60, 0);
+
+    String seg = makeSegKey("aaaa-bbbb", todayPart(), "col", p.ext);
+    String data = "payload";
+    ReadBufferFromString buf(data);
+    cache.set(seg, buf, data.size(), false);
+
+    // get() must return a path that actually contains the prefix and the file
+    auto [disk, path] = cache.get(seg);
+    ASSERT_TRUE(disk) << "no disk for " << seg;
+    EXPECT_TRUE(disk->exists(path)) << "file not on disk: " << path;
+    EXPECT_NE(path.find(p.expected_prefix), String::npos)
+        << p.ext << " get() returned wrong prefix: " << path;
+}
+
+TEST_P(SegmentPrefixTest, EvictRemovesFromDisk)
+{
+    auto p = GetParam();
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 64 * 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+    // 1-minute TTL — 2-day-old part is expired
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, 1, 0);
+
+    time_t old_ts = time(nullptr) - 2 * 24 * 3600;
+    String seg = makeSegKey("aaaa-bbbb", expiredPart(), "col", p.ext);
+    String data = "payload";
+    ReadBufferFromString buf(data);
+    cache.set(seg, buf, data.size(), false, 0, old_ts);
+
+    ASSERT_EQ(cache.getKeyCount(), 1);
+    auto [disk, path] = cache.get(seg);
+    ASSERT_TRUE(disk && disk->exists(path)) << "file should exist before eviction: " << path;
+
+    cache.evictExpired();
+
+    EXPECT_EQ(cache.getKeyCount(), 0);
+    EXPECT_FALSE(disk->exists(path))
+        << p.ext << " file still on disk after eviction — rel_path prefix bug? path=" << path;
+}
+
+// ---------------------------------------------------------------------------
+// Reconcile: FDB entries for all three types restore with correct rel_path
+// ---------------------------------------------------------------------------
+
+TEST_F(DiskCacheTTLTest, ReconcileRestoresAllTypesWithCorrectRelPath)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 64 * 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+    DiskCacheTTL cache("test-cache", "test-uuid", volume, nullptr, settings, strategy, 60, 0);
+
+    const String uuid = "aaaa-bbbb-cccc-dddd";
+    const String part = todayPart();
+    const time_t now  = time(nullptr);
+
+    struct SegInfo { String ext; String expected_prefix; };
+    SegInfo cases[] = {{".bin", "data/"}, {".mrk", "meta/"}, {".idx", "meta/"}};
+
+    // Write all three types to disk so reconcile can verify file existence
+    std::map<String, String> seg_to_path;
+    for (auto & c : cases)
+    {
+        String seg = makeSegKey(uuid, part, "col", c.ext);
+        String data = "payload";
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false, 0, now);
+        auto [disk, path] = cache.get(seg);
+        ASSERT_FALSE(path.empty()) << "failed to cache: " << seg;
+        EXPECT_NE(path.find(c.expected_prefix), String::npos)
+            << "wrong write prefix for " << c.ext << ": " << path;
+        seg_to_path[seg] = path;
+    }
+
+    // Build mock FDB store — key_prefix = "{ns}_DCI_{worker}_{uuid}"
+    // Seed one entry per segment using encodeValue; the key just needs the prefix.
+    const String ns = "byconity";
+    const String worker = "test-worker";
+    const String key_prefix = fmt::format("{}_DCI_{}_{}", ns, worker, uuid);
+    auto mock_store = std::make_shared<MockMetaStore>();
+    int i = 0;
+    for (auto & [seg, path] : seg_to_path)
+    {
+        String fdb_key = fmt::format("{}_{:04d}", key_prefix, i++);
+        mock_store->store[fdb_key] = fmt::format("{}:{}:{}", static_cast<int64_t>(now), 7, seg);
+    }
+
+    // Reconcile into a fresh cache_map
+    TTLCacheFDBIndex fdb_idx(mock_store, ns, worker, uuid, worker);
+    std::map<UInt128, std::shared_ptr<DiskCacheTTLMeta>> cache_map;
+    auto get_rel_path = [&cache](UInt128 key, const String & seg_name) -> std::filesystem::path
+    {
+        return cache.getRelativePath(key, seg_name);
+    };
+
+    fdb_idx.reconcile(
+        volume,
+        get_rel_path,
+        [](time_t) { return true; },
+        [&cache_map](TTLCacheFDBIndex::ReconcileBatch & batch) {
+            for (auto & [key, meta] : batch)
+                cache_map[key] = meta;
+        }
+    );
+
+    ASSERT_EQ(cache_map.size(), 3u) << "expected 3 entries restored";
+
+    for (auto & [seg, expected_path] : seg_to_path)
+    {
+        auto key = DiskCacheTTL::hash(seg);
+        auto it = cache_map.find(key);
+        ASSERT_NE(it, cache_map.end()) << "segment not restored: " << seg;
+        EXPECT_EQ(it->second->rel_path, expected_path)
+            << "rel_path mismatch for " << seg
+            << "\n  got:  " << it->second->rel_path
+            << "\n  want: " << expected_path;
+    }
+}
+
+// Verify drop() decrements partition_stats correctly
+TEST_F(DiskCacheTTLTest, DropUpdatesPartitionStats)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    DiskCacheTTL cache("test_drop_pstats", "test-uuid-drop", volume, nullptr, settings, strategy, 60 * 24 * 365, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm;
+    gmtime_r(&now, &tm);
+    String part1 = fmt::format("{:04d}{:02d}{:02d}_1_100_2", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    String part2 = fmt::format("{:04d}{:02d}{:02d}_2_200_2", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    String partition_id = fmt::format("{:04d}{:02d}{:02d}", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    const String uuid = "test-uuid-drop";
+
+    for (int i = 0; i < 3; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, part1, i);
+        String data(100, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+    for (int i = 0; i < 2; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, part2, i);
+        String data(100, 'b');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+    ASSERT_EQ(cache.getKeyCount(), 5);
+
+    {
+        auto pstats = cache.getPartitionStats();
+        bool found = false;
+        for (const auto & ps : pstats)
+        {
+            if (ps.partition_id == partition_id)
+            {
+                found = true;
+                ASSERT_EQ(ps.entry_count, 5u);
+                ASSERT_EQ(ps.total_bytes, 500u);
+            }
+        }
+        ASSERT_TRUE(found) << "partition not found before drop: " << partition_id;
+    }
+
+    cache.drop(uuid + "/" + part1);
+
+    ASSERT_EQ(cache.getKeyCount(), 2);
+    ASSERT_EQ(cache.getCachedSize(), 200u);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries, 2u);
+        ASSERT_EQ(gstats.total_bytes,   200u);
+    }
+
+    {
+        auto pstats = cache.getPartitionStats();
+        bool found = false;
+        for (const auto & ps : pstats)
+        {
+            if (ps.partition_id == partition_id)
+            {
+                found = true;
+                ASSERT_EQ(ps.entry_count, 2u);
+                ASSERT_EQ(ps.total_bytes, 200u);
+            }
+        }
+        ASSERT_TRUE(found) << "partition not found after drop: " << partition_id;
+    }
+}
+
+// Verify evictExpired() is a no-op on fresh entries and leaves partition_stats intact
+TEST_F(DiskCacheTTLTest, EvictExpiredNoOpKeepsPartitionStats)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    UInt64 ttl_minutes = 60;
+    DiskCacheTTL cache("test_evict_noop", "test-uuid-evict", volume, nullptr, settings, strategy, ttl_minutes, 0);
+
+    time_t now = time(nullptr);
+    struct tm tm;
+    gmtime_r(&now, &tm);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    String partition_id = fmt::format("{:04d}{:02d}{:02d}", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    const String uuid = "test-uuid-evict";
+
+    for (int i = 0; i < 4; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, part, i);
+        String data(100, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+    ASSERT_EQ(cache.getKeyCount(), 4);
+
+    cache.evictExpired();  // nothing should be evicted — entries are within TTL
+
+    ASSERT_EQ(cache.getKeyCount(), 4);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries,   4u);
+        ASSERT_EQ(gstats.total_bytes,   400u);
+        ASSERT_EQ(gstats.evicted_expired, 0u);
+    }
+
+    auto pstats = cache.getPartitionStats();
+    bool found = false;
+    for (const auto & ps : pstats)
+    {
+        if (ps.partition_id == partition_id)
+        {
+            found = true;
+            ASSERT_EQ(ps.entry_count, 4u);
+            ASSERT_EQ(ps.total_bytes, 400u);
+        }
+    }
+    ASSERT_TRUE(found) << "partition disappeared after no-op evictExpired: " << partition_id;
+}
+
+// Verify evictOldestPartitionsUntilSpace() decrements partition_stats for evicted partition
+TEST_F(DiskCacheTTLTest, SizeLimitEvictionUpdatesPartitionStats)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    // ttl_minutes=0: no TTL rejection so we can use different-day partitions freely
+    DiskCacheTTL cache("test_size_pstats", "test-uuid-size", volume, nullptr, settings, strategy, 0, 100 * 1024 * 1024);
+
+    time_t now = time(nullptr);
+    time_t yesterday = now - 25 * 3600;  // definitely the previous calendar day
+    struct tm tm_now, tm_yest;
+    gmtime_r(&now, &tm_now);
+    gmtime_r(&yesterday, &tm_yest);
+
+    String today_part = fmt::format("{:04d}{:02d}{:02d}_1_100_2", tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+    String yest_part  = fmt::format("{:04d}{:02d}{:02d}_1_100_2", tm_yest.tm_year + 1900, tm_yest.tm_mon + 1, tm_yest.tm_mday);
+
+    String today_pid = fmt::format("{:04d}{:02d}{:02d}", tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+    String yest_pid  = fmt::format("{:04d}{:02d}{:02d}", tm_yest.tm_year + 1900, tm_yest.tm_mon + 1, tm_yest.tm_mday);
+
+    if (today_pid == yest_pid)
+        GTEST_SKIP() << "test requires two distinct calendar days (running at midnight boundary)";
+
+    const String uuid = "test-uuid-size";
+    const size_t seg_size = 1024;
+
+    for (int i = 0; i < 4; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, yest_part, i);
+        String data(seg_size, 'y');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+    for (int i = 0; i < 3; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, today_part, i);
+        String data(seg_size, 't');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+
+    ASSERT_EQ(cache.getKeyCount(), 7);
+
+    {
+        auto pstats = cache.getPartitionStats();
+        bool fy = false, ft = false;
+        for (const auto & ps : pstats)
+        {
+            if (ps.partition_id == yest_pid)  { fy = true; ASSERT_EQ(ps.entry_count, 4u); }
+            if (ps.partition_id == today_pid) { ft = true; ASSERT_EQ(ps.entry_count, 3u); }
+        }
+        ASSERT_TRUE(fy) << "yesterday partition missing: " << yest_pid;
+        ASSERT_TRUE(ft) << "today partition missing: "     << today_pid;
+    }
+
+    // Free exactly 4 * seg_size bytes → should evict yesterday's 4 segments
+    cache.evictOldestPartitionsUntilSpace(4 * seg_size);
+
+    ASSERT_EQ(cache.getKeyCount(), 3);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries,     3u);
+        ASSERT_EQ(gstats.total_bytes,       3 * seg_size);
+        ASSERT_EQ(gstats.evicted_size_limit, 4u);
+        ASSERT_EQ(gstats.evicted_expired,    0u);  // TTL eviction was NOT used
+    }
+
+    {
+        auto pstats = cache.getPartitionStats();
+        for (const auto & ps : pstats)
+        {
+            if (ps.partition_id == yest_pid)
+            {
+                ASSERT_EQ(ps.entry_count, 0u) << "yesterday partition should be empty after eviction";
+                ASSERT_EQ(ps.total_bytes,  0u);
+            }
+            if (ps.partition_id == today_pid)
+            {
+                ASSERT_EQ(ps.entry_count, 3u) << "today partition should be untouched";
+            }
+        }
+    }
+}
+
+// Verify part_index is correctly rebuilt after drop + re-add; also tests that
+// cache_stats and partition_stats stay consistent across the full cycle.
+TEST_F(DiskCacheTTLTest, PartIndexRebuildAfterDrop)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    DiskCacheTTL cache("test_part_idx", "test-uuid-idx", volume, nullptr, settings, strategy, 60 * 24 * 365, 0);
+
+    const String uuid = "test-uuid-idx";
+    time_t now = time(nullptr);
+    struct tm tm;
+    gmtime_r(&now, &tm);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    String partition_id = fmt::format("{:04d}{:02d}{:02d}", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+    const size_t seg_bytes = 64;
+
+    // Phase 1: add 3 segments for 'part'
+    for (int i = 0; i < 3; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, part, i);
+        String data(seg_bytes, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+    ASSERT_EQ(cache.getKeyCount(), 3u);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries, 3u);
+        ASSERT_EQ(gstats.total_bytes,   3 * seg_bytes);
+    }
+
+    // Phase 2: drop clears part_index entry for 'part'
+    cache.drop(uuid + "/" + part);
+    ASSERT_EQ(cache.getKeyCount(), 0u);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries, 0u);
+        ASSERT_EQ(gstats.total_bytes,   0u);
+    }
+    {
+        auto pstats = cache.getPartitionStats();
+        for (const auto & ps : pstats)
+            if (ps.partition_id == partition_id)
+            {
+                ASSERT_EQ(ps.entry_count, 0u);
+                ASSERT_EQ(ps.total_bytes, 0u);
+            }
+    }
+
+    // Phase 3: re-add 2 segments — part_index must be re-populated from scratch
+    for (int i = 0; i < 2; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, part, i);
+        String data(seg_bytes, 'b');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false);
+    }
+    ASSERT_EQ(cache.getKeyCount(), 2u);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries, 2u);
+        ASSERT_EQ(gstats.total_bytes,   2 * seg_bytes);
+    }
+
+    // All 2 segments must be retrievable
+    for (int i = 0; i < 2; i++)
+    {
+        String seg = fmt::format("{}/{}/col.bin/offset_{}", uuid, part, i);
+        auto [disk, path] = cache.get(seg);
+        ASSERT_FALSE(path.empty()) << "segment " << i << " not found after re-add";
+    }
+
+    // Phase 4: second drop — part_index entry removed again, stats zeroed
+    cache.drop(uuid + "/" + part);
+    ASSERT_EQ(cache.getKeyCount(), 0u);
+    {
+        auto gstats = cache.getStats();
+        ASSERT_EQ(gstats.total_entries, 0u);
+        ASSERT_EQ(gstats.total_bytes,   0u);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// drop() must evict FDB forward + reverse entries for the dropped part
+// ---------------------------------------------------------------------------
+
+TEST_F(DiskCacheTTLTest, DropEvictsFDBEntries)
+{
+    auto volume = createTestVolume();
+    DiskCacheSettings settings;
+    settings.ttl_cache_max_size = 1024 * 1024;
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    // No underscores in these strings so Catalog::escapeString is a no-op
+    const String uuid   = "test-uuid-fdb";
+    const String ns     = "byconity";
+    const String worker = "test-worker";
+    const String key_prefix     = ns + "_DCI_" + worker + "_" + uuid;
+    const String rev_key_prefix = ns + "_DCIREV_" + uuid;
+
+    auto mock_store = std::make_shared<MockMetaStore>();
+    auto fdb_idx = std::make_shared<TTLCacheFDBIndex>(mock_store, ns, worker, uuid, worker);
+
+    DiskCacheTTL cache("test_fdb_drop", uuid, volume, nullptr, settings, strategy, 60 * 24 * 365, 0);
+    cache.setFDBIndex(fdb_idx);
+
+    time_t now = time(nullptr);
+    struct tm tm_now;
+    gmtime_r(&now, &tm_now);
+    String part = fmt::format("{:04d}{:02d}{:02d}_1_100_2", tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+    String partition_id = fmt::format("{:04d}{:02d}{:02d}", tm_now.tm_year + 1900, tm_now.tm_mon + 1, tm_now.tm_mday);
+
+    const size_t seg_bytes = 64;
+    const int num_segs = 3;
+
+    for (int i = 0; i < num_segs; i++)
+    {
+        String seg = makeSegKey(uuid, part, fmt::format("col{}", i), ".bin");
+        String data(seg_bytes, 'a');
+        ReadBufferFromString buf(data);
+        cache.set(seg, buf, data.size(), false, 0, now);
+
+        // Seed FDB store manually (batchWrite in MockMetaStore is a no-op).
+        // hexKey layout: first 16 chars = hex(items[1]=low), last 16 = hex(items[0]=high).
+        auto key  = DiskCacheTTL::hash(seg);
+        auto hex  = DiskCacheTTL::hexKey(key);
+        String high_hex = hex.substr(16, 16);   // items[0] = sipHash64(part_name)
+        String low_hex  = hex.substr(0, 16);    // items[1] = sipHash64(column)
+        mock_store->store[fmt::format("{}_{}_{}_{}",  key_prefix,     partition_id, high_hex, low_hex)]
+            = fmt::format("{}:{}:{}", static_cast<int64_t>(now), seg_bytes, seg);
+        mock_store->store[fmt::format("{}_{}_{}_{}",  rev_key_prefix, partition_id, high_hex, low_hex)]
+            = worker;
+    }
+
+    ASSERT_EQ(cache.getKeyCount(), static_cast<size_t>(num_segs));
+    ASSERT_EQ(mock_store->store.size(), static_cast<size_t>(num_segs * 2));  // fwd + rev per segment
+
+    cache.drop(uuid + "/" + part);
+    ASSERT_EQ(cache.getKeyCount(), 0u);
+
+    // Flush pending evictPart ops: detach fdb_idx from cache then destroy it.
+    // The destructor sets stopped=true, drains the queue, and joins the bg thread.
+    cache.setFDBIndex(nullptr);
+    fdb_idx.reset();
+
+    EXPECT_TRUE(mock_store->store.empty())
+        << "FDB entries not cleaned after drop(); remaining=" << mock_store->store.size();
+}
+
+} // namespace DB
diff --git a/src/Storages/DiskCache/tests/gtest_ttl_cache_fdb_index_test.cpp b/src/Storages/DiskCache/tests/gtest_ttl_cache_fdb_index_test.cpp
new file mode 100644
index 00000000000..52e24afb131
--- /dev/null
+++ b/src/Storages/DiskCache/tests/gtest_ttl_cache_fdb_index_test.cpp
@@ -0,0 +1,346 @@
+#include <filesystem>
+#include <map>
+#include <fmt/core.h>
+#include <gtest/gtest.h>
+#include <Disks/DiskLocal.h>
+#include <Disks/SingleDiskVolume.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+#include <Storages/DiskCache/DiskCacheSettings.h>
+#include <Storages/DiskCache/DiskCacheSimpleStrategy.h>
+#include <Storages/DiskCache/TTLCacheFDBIndex.h>
+#include <Catalog/IMetastore.h>
+#include <Common/tests/gtest_global_context.h>
+#include <Common/tests/gtest_utils.h>
+#include <IO/ReadBufferFromString.h>
+#include <Poco/ConsoleChannel.h>
+#include <Poco/FormattingChannel.h>
+#include <Poco/Logger.h>
+#include <Poco/PatternFormatter.h>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+static String fdbMakeSegKey(const String & uuid, const String & part, const String & col, const String & ext)
+{
+    return fmt::format("{}/{}/{}#0{}", uuid, part, col, ext);
+}
+
+static String fdbTodayPart()
+{
+    time_t now = time(nullptr);
+    struct tm t;
+    gmtime_r(&now, &t);
+    return fmt::format("{:04d}{:02d}{:02d}_1_100_2", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday);
+}
+
+// ---------------------------------------------------------------------------
+// Mock metastore — respects limit and start_key for pagination testing
+// ---------------------------------------------------------------------------
+
+class FDBMockMetaStore : public Catalog::IMetaStore
+{
+public:
+    struct MockIterator : public Iterator
+    {
+        std::vector<std::pair<String, String>> entries;
+        int pos = -1;
+        bool next() override { return ++pos < static_cast<int>(entries.size()); }
+        String key()   override { return entries[pos].first; }
+        String value() override { return entries[pos].second; }
+    };
+
+    void put(const String & key, const String & value, bool = false) override { store[key] = value; }
+    std::pair<bool, String> putCAS(const String &, const String &, const String &, bool) override { return {false, {}}; }
+    uint64_t get(const String & key, String & value) override
+    {
+        auto it = store.find(key);
+        if (it == store.end()) return 0;
+        value = it->second;
+        return 1;
+    }
+    std::vector<std::pair<String, UInt64>> multiGet(const std::vector<String> &) override { return {}; }
+    bool batchWrite(const Catalog::BatchCommitRequest & req, Catalog::BatchCommitResponse &) override
+    {
+        for (auto & d : req.deletes)
+            store.erase(d.key);
+        return true;
+    }
+    void drop(const String & key, const UInt64 &) override { store.erase(key); }
+    void drop(const String & key, const String &)  override { store.erase(key); }
+    IteratorPtr getAll() override { return getByPrefix(""); }
+    IteratorPtr getByPrefix(const String & prefix, const size_t & limit = 0, uint32_t = 0, const String & start_key = "") override
+    {
+        auto iter = std::make_shared<MockIterator>();
+        for (auto & [k, v] : store)
+        {
+            if (!k.starts_with(prefix))
+                continue;
+            if (!start_key.empty() && k < start_key)
+                continue;
+            iter->entries.emplace_back(k, v);
+            if (limit > 0 && iter->entries.size() >= limit)
+                break;
+        }
+        return iter;
+    }
+    IteratorPtr getByRange(const String &, const String &, bool, bool) override { return std::make_shared<MockIterator>(); }
+    void clean(const String & prefix) override
+    {
+        for (auto it = store.begin(); it != store.end(); )
+            it = it->first.starts_with(prefix) ? store.erase(it) : std::next(it);
+    }
+    void close() override {}
+    uint32_t getMaxBatchSize() override { return 1000; }
+    uint32_t getMaxKVSize()    override { return 1024 * 1024; }
+
+    std::map<String, String> store;
+};
+
+// ---------------------------------------------------------------------------
+// Test fixture
+// ---------------------------------------------------------------------------
+
+class TTLCacheFDBIndexTest : public ::testing::Test
+{
+public:
+    static void SetUpTestCase()
+    {
+        Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t"));
+        Poco::AutoPtr<Poco::ConsoleChannel> console_channel(new Poco::ConsoleChannel);
+        Poco::AutoPtr<Poco::FormattingChannel> channel(new Poco::FormattingChannel(formatter, console_channel));
+        Poco::Logger::root().setLevel("warning");
+        Poco::Logger::root().setChannel(channel);
+        ctx = getContext().context;
+    }
+
+    static void TearDownTestCase() { ctx->shutdown(); }
+
+    void SetUp() override
+    {
+        fs::remove_all("tmp_fdb/");
+        fs::create_directories("tmp_fdb/ttl_disk/");
+        UnitTest::initLogger();
+        DB::IDiskCache::init(*getContext().context);
+    }
+
+    void TearDown() override
+    {
+        fs::remove_all("tmp_fdb/");
+        DB::IDiskCache::close();
+    }
+
+    VolumePtr createVolume()
+    {
+        auto disk = std::make_shared<DiskLocal>("fdb_ttl_disk", "tmp_fdb/ttl_disk/", DiskStats{});
+        return std::make_shared<SingleDiskVolume>("fdb_ttl_volume", std::move(disk), 0);
+    }
+
+    DiskCacheSettings makeSettings(size_t max_bytes = 64 * 1024 * 1024) {
+        DiskCacheSettings s;
+        s.ttl_cache_max_size = max_bytes;
+        return s;
+    }
+
+    static std::shared_ptr<Context> ctx;
+};
+
+std::shared_ptr<Context> TTLCacheFDBIndexTest::ctx = nullptr;
+
+// ---------------------------------------------------------------------------
+// Helpers to seed the mock store with valid encoded FDB entries
+// ---------------------------------------------------------------------------
+
+static void seedFDBEntry(FDBMockMetaStore & store, const String & key_prefix,
+    const String & fdb_key_suffix, const String & seg, size_t size, time_t ts)
+{
+    store.store[key_prefix + fdb_key_suffix] = fmt::format("{}:{}:{}", static_cast<int64_t>(ts), size, seg);
+}
+
+// ---------------------------------------------------------------------------
+// Test: all entries restored, on_reconcile_batch called
+// ---------------------------------------------------------------------------
+
+TEST_F(TTLCacheFDBIndexTest, RestoresAllEntries)
+{
+    auto volume = createVolume();
+    auto settings = makeSettings();
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    const String uuid = "restore-uuid";
+    const String ns = "ns", worker = "w1";
+    const String kp = fmt::format("{}_DCI_{}_{}", ns, worker, uuid);
+    const time_t now = time(nullptr);
+
+    auto mock = std::make_shared<FDBMockMetaStore>();
+    const int N = 5;
+    std::vector<String> segs;
+    for (int i = 0; i < N; ++i)
+    {
+        String seg = fdbMakeSegKey(uuid, fdbTodayPart(), fmt::format("col{}", i), ".bin");
+        segs.push_back(seg);
+        seedFDBEntry(*mock, kp, fmt::format("_k{:04d}", i), seg, 64, now);
+    }
+
+    TTLCacheFDBIndex idx(mock, ns, worker, uuid, worker);
+    DiskCacheTTL cache("rc", uuid, volume, nullptr, settings, strategy, 60 * 24, 0);
+
+    size_t batch_calls = 0;
+    std::map<UInt128, std::shared_ptr<DiskCacheTTLMeta>> restored;
+    auto result = idx.reconcile(
+        volume,
+        [&](UInt128 key, const String & seg) { return cache.getRelativePath(key, seg); },
+        [&](time_t ts) { return ts > now - 3600; },
+        [&](TTLCacheFDBIndex::ReconcileBatch & batch) {
+            batch_calls++;
+            for (auto & [k, m] : batch) restored[k] = m;
+        }
+    );
+
+    ASSERT_TRUE(result.has_value());
+    EXPECT_EQ(result->first, static_cast<size_t>(N));
+    EXPECT_EQ(restored.size(), static_cast<size_t>(N));
+    EXPECT_GE(batch_calls, 1u);
+
+    for (auto & seg : segs)
+        EXPECT_NE(restored.find(DiskCacheTTL::hash(seg)), restored.end()) << "missing: " << seg;
+}
+
+// ---------------------------------------------------------------------------
+// Test: expired entries skipped and cleaned from FDB per page
+// ---------------------------------------------------------------------------
+
+TEST_F(TTLCacheFDBIndexTest, StaleEntriesCleanedFromFDB)
+{
+    auto volume = createVolume();
+    auto settings = makeSettings();
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    const String uuid = "stale-uuid";
+    const String ns = "ns", worker = "w1";
+    const String kp = fmt::format("{}_DCI_{}_{}", ns, worker, uuid);
+    const time_t now = time(nullptr);
+    const time_t old_ts = now - 7 * 24 * 3600; // 7 days ago
+
+    auto mock = std::make_shared<FDBMockMetaStore>();
+
+    // 3 fresh entries
+    for (int i = 0; i < 3; ++i)
+        seedFDBEntry(*mock, kp, fmt::format("_fresh_{:04d}", i),
+            fdbMakeSegKey(uuid, fdbTodayPart(), fmt::format("c{}", i), ".bin"), 64, now);
+
+    // 2 expired entries
+    for (int i = 0; i < 2; ++i)
+        seedFDBEntry(*mock, kp, fmt::format("_stale_{:04d}", i),
+            fdbMakeSegKey(uuid, fdbTodayPart(), fmt::format("s{}", i), ".bin"), 64, old_ts);
+
+    ASSERT_EQ(mock->store.size(), 5u);
+
+    TTLCacheFDBIndex idx(mock, ns, worker, uuid, worker);
+    DiskCacheTTL cache("stale", uuid, volume, nullptr, settings, strategy, 60 * 24, 0);
+
+    std::map<UInt128, std::shared_ptr<DiskCacheTTLMeta>> restored;
+    auto result = idx.reconcile(
+        volume,
+        [&](UInt128 key, const String & seg) { return cache.getRelativePath(key, seg); },
+        [&](time_t ts) { return ts > now - 3600; }, // only very recent
+        [&](TTLCacheFDBIndex::ReconcileBatch & batch) {
+            for (auto & [k, m] : batch) restored[k] = m;
+        }
+    );
+
+    ASSERT_TRUE(result.has_value());
+    EXPECT_EQ(result->first, 3u);
+    EXPECT_EQ(restored.size(), 3u);
+
+    // Stale entries must have been deleted from the mock store
+    for (auto & [k, v] : mock->store)
+        EXPECT_EQ(k.find("_stale_"), String::npos) << "stale key not cleaned: " << k;
+}
+
+// ---------------------------------------------------------------------------
+// Test: pagination — entries spanning multiple pages all restored, no duplicates
+// ---------------------------------------------------------------------------
+
+TEST_F(TTLCacheFDBIndexTest, PaginationRestoresAllEntries)
+{
+    auto volume = createVolume();
+    auto settings = makeSettings(256 * 1024 * 1024);
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    const String uuid = "page-uuid";
+    const String ns = "ns", worker = "w1";
+    const String kp = fmt::format("{}_DCI_{}_{}", ns, worker, uuid);
+    const time_t now = time(nullptr);
+
+    auto mock = std::make_shared<FDBMockMetaStore>();
+
+    // Seed PAGE_SIZE + 3 entries to force at least 2 pages (PAGE_SIZE = 100000).
+    // MockMetaStore respects limit + start_key, so pagination is exercised end-to-end.
+    const size_t PAGE_SIZE = 100'000;
+    const size_t TOTAL = PAGE_SIZE + 3;
+    for (size_t i = 0; i < TOTAL; ++i)
+    {
+        // Use zero-padded keys so std::map ordering matches FDB lexicographic ordering.
+        String seg = fdbMakeSegKey(uuid, fdbTodayPart(), fmt::format("col{:07d}", i), ".bin");
+        seedFDBEntry(*mock, kp, fmt::format("_{:07d}", i), seg, 32, now);
+    }
+    ASSERT_EQ(mock->store.size(), TOTAL);
+
+    TTLCacheFDBIndex idx(mock, ns, worker, uuid, worker);
+    DiskCacheTTL cache("page", uuid, volume, nullptr, settings, strategy, 60 * 24, 0);
+
+    size_t batch_calls = 0;
+    std::map<UInt128, std::shared_ptr<DiskCacheTTLMeta>> restored;
+    auto result = idx.reconcile(
+        volume,
+        [&](UInt128 key, const String & seg) { return cache.getRelativePath(key, seg); },
+        [&](time_t ts) { return ts > now - 3600; },
+        [&](TTLCacheFDBIndex::ReconcileBatch & batch) {
+            batch_calls++;
+            for (auto & [k, m] : batch) restored[k] = m;
+        }
+    );
+
+    ASSERT_TRUE(result.has_value());
+    EXPECT_EQ(result->first, TOTAL) << "got: " << result->first << ", want: " << TOTAL;
+    // No duplicates
+    EXPECT_EQ(restored.size(), TOTAL) << "duplicates detected: map size " << restored.size() << " vs total " << TOTAL;
+    // At least 2 batch calls (one per page)
+    EXPECT_GE(batch_calls, 2u) << "expected pagination but only got " << batch_calls << " batch call(s)";
+}
+
+// ---------------------------------------------------------------------------
+// Test: empty FDB returns nullopt
+// ---------------------------------------------------------------------------
+
+TEST_F(TTLCacheFDBIndexTest, EmptyFDBReturnsNullopt)
+{
+    auto volume = createVolume();
+    auto settings = makeSettings();
+    auto strategy = std::make_shared<DiskCacheSimpleStrategy>(settings);
+
+    const String uuid = "empty-uuid";
+    auto mock = std::make_shared<FDBMockMetaStore>();
+
+    TTLCacheFDBIndex idx(mock, "ns", "w1", uuid, "w1");
+    DiskCacheTTL cache("empty", uuid, volume, nullptr, settings, strategy, 60, 0);
+
+    bool batch_called = false;
+    auto result = idx.reconcile(
+        volume,
+        [&](UInt128 key, const String & seg) { return cache.getRelativePath(key, seg); },
+        [](time_t) { return true; },
+        [&](TTLCacheFDBIndex::ReconcileBatch &) { batch_called = true; }
+    );
+
+    EXPECT_FALSE(result.has_value());
+    EXPECT_FALSE(batch_called);
+}
+
+} // namespace DB
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp
index f05a21e9543..d1f3763cd10 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCNCH.cpp
@@ -355,7 +355,7 @@ void MergeTreeDataPartCNCH::loadFromFileSystem(bool load_hint_mutation)
         try
         {
             MetaInfoDiskCacheSegment metainfo_segment(shared_from_this());
-            auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache();
+            auto disk_cache = storage.getDiskCache()->getMetaCache();
             auto [cache_disk, segment_path] = disk_cache->get(metainfo_segment.getSegmentName());
             if (cache_disk && cache_disk->exists(segment_path))
             {
@@ -389,7 +389,7 @@ void MergeTreeDataPartCNCH::loadFromFileSystem(bool load_hint_mutation)
     if (parent_part && enableDiskCache())
     {
         auto segment = std::make_shared<MetaInfoDiskCacheSegment>(shared_from_this());
-        auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache();
+        auto disk_cache = storage.getDiskCache()->getMetaCache();
         disk_cache->cacheSegmentsToLocalDisk({std::move(segment)});
     }
 }
@@ -753,7 +753,7 @@ IMergeTreeDataPart::IndexPtr MergeTreeDataPartCNCH::loadIndexFromStorage() const
     /// first try to load index from local disk cache
     if (enableDiskCache())
     {
-        auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache();
+        auto disk_cache = storage.getDiskCache()->getMetaCache();
         PrimaryIndexDiskCacheSegment segment(shared_from_this());
         auto [cache_disk, segment_path] = disk_cache->get(segment.getSegmentName());
 
@@ -794,7 +794,7 @@ IMergeTreeDataPart::IndexPtr MergeTreeDataPartCNCH::loadIndexFromStorage() const
     if (enableDiskCache())
     {
         auto index_seg = std::make_shared<PrimaryIndexDiskCacheSegment>(shared_from_this());
-        auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache();
+        auto disk_cache = storage.getDiskCache()->getMetaCache();
         disk_cache->cacheSegmentsToLocalDisk({std::move(index_seg)});
     }
     return res;
@@ -812,7 +812,7 @@ IMergeTreeDataPart::ChecksumsPtr MergeTreeDataPartCNCH::loadChecksums([[maybe_un
     if (enableDiskCache())
     {
         ChecksumsDiskCacheSegment checksums_segment(shared_from_this());
-        auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache();
+        auto disk_cache = storage.getDiskCache()->getMetaCache();
         auto [cache_disk, segment_path] = disk_cache->get(checksums_segment.getSegmentName());
 
         if (cache_disk && cache_disk->exists(segment_path))
@@ -907,7 +907,7 @@ IMergeTreeDataPart::ChecksumsPtr MergeTreeDataPartCNCH::loadChecksumsFromRemote(
     if (enableDiskCache() && follow_part_chain)
     {
         auto segment = std::make_shared<ChecksumsDiskCacheSegment>(shared_from_this());
-        auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache();
+        auto disk_cache = storage.getDiskCache()->getMetaCache();
         disk_cache->cacheSegmentsToLocalDisk({std::move(segment)});
     }
 
@@ -1237,8 +1237,9 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, UInt64 submit_ts) cons
             return;
         }
 
-        auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree);
+        auto disk_cache = storage.getDiskCache();
         auto cache_strategy = disk_cache->getStrategy();
+        IDiskCache * mark_disk_cache = disk_cache->getMetaCache().get();
 
         MarkRanges all_mark_ranges{MarkRange(0, getMarksCount())};
         MarkCachePtr mark_cache_holder = storage.getContext()->getMarkCache();
@@ -1269,7 +1270,7 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, UInt64 submit_ts) cons
                     PartFileDiskCacheSegment::FileOffsetAndSize{getFileOffsetOrZero(mark_file_name), getFileSizeOrZero(mark_file_name)},
                     getMarksCount(),
                     mark_cache_holder.get(),
-                    disk_cache->getMetaCache().get(),
+                    mark_disk_cache,
                     stream_name,
                     DATA_FILE_EXTENSION,
                     PartFileDiskCacheSegment::FileOffsetAndSize{getFileOffsetOrZero(data_file_name), getFileSizeOrZero(data_file_name)},
@@ -1370,13 +1371,33 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, UInt64 submit_ts) cons
                         off_t mark_file_offset = source_data_part->getFileOffsetOrZero(mark_file_name);
                         size_t mark_file_size = source_data_part->getFileSizeOrZero(mark_file_name);
 
+                        if (mark_file_size == 0)
+                        {
+                            LOG_DEBUG(
+                                storage.log,
+                                "Skipping preload of index {} for part {}: not in checksums (written before index was added)",
+                                index_name,
+                                getFullRelativePath());
+                            continue;
+                        }
+
+                        // Skip indexes have GRANULARITY N: one index mark per N primary-key marks.
+                        // Their marks_count = ceil(data_marks / N), not getMarksCount() (which is data marks).
+                        // Derive from the actual mark file size to avoid a size mismatch in MergeTreeMarksLoader.
+                        size_t mark_size = source_data_part->index_granularity_info.getMarkSizeInBytes(1);
+                        size_t skip_index_marks_count = mark_size > 0 ? mark_file_size / mark_size : 0;
+                        if (skip_index_marks_count == 0)
+                            continue;
+
+                        MarkRanges index_mark_ranges{MarkRange(0, skip_index_marks_count)};
+
                         IDiskCacheSegmentsVector segs = cache_strategy->transferRangesToSegments<PartFileDiskCacheSegment>(
-                            all_mark_ranges,
+                            index_mark_ranges,
                             source_data_part,
                             PartFileDiskCacheSegment::FileOffsetAndSize{mark_file_offset, mark_file_size},
-                            getMarksCount(),
+                            skip_index_marks_count,
                             mark_cache_holder.get(),
-                            disk_cache->getMetaCache().get(),
+                            mark_disk_cache,
                             index_name,
                             INDEX_FILE_EXTENSION,
                             PartFileDiskCacheSegment::FileOffsetAndSize{data_file_offset, data_file_size},
@@ -1494,7 +1515,7 @@ void MergeTreeDataPartCNCH::preload(UInt64 preload_level, UInt64 submit_ts) cons
 
                 std::unique_ptr<IGinDataPartHelper> part_helper = std::make_unique<GinDataCNCHPartHelper>(
                     getMvccDataPart(index_helper->getFileName() + INDEX_FILE_EXTENSION),
-                    DiskCacheFactory::instance().get(DiskCacheType::MergeTree)->getMetaCache(),
+                    storage.getDiskCache()->getMetaCache(),
                     DiskCacheMode::USE_DISK_CACHE);
                 factory->get(index_helper->getFileName(), std::move(part_helper));
             }
@@ -1548,7 +1569,7 @@ void MergeTreeDataPartCNCH::dropDiskCache(ThreadPool & pool, bool drop_vw_disk_c
     }
 
     auto part_log = storage.getContext()->getPartLog(storage.getDatabaseName());
-    auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree);
+    auto disk_cache = storage.getDiskCache();
     auto cache_strategy = disk_cache->getStrategy();
 
     auto impl = [part_log, part = shared_from_this(), part_base_path, disk_cache] {
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index aa361706b3e..4ccc9edc381 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1978,13 +1978,13 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
                 range.begin / index_granularity,
                 (range.end + index_granularity - 1) / index_granularity);
 
+        total_granules += range.end - range.begin;
+
         index_time_watcher.watch(IndexTimeWatcher::Type::SEEK, [&](){
             if (last_index_mark != index_range.begin || !granule)
                 reader.seek(index_range.begin);
         });
 
-        total_granules += index_range.end - index_range.begin;
-
         for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
         {
             index_time_watcher.watch(IndexTimeWatcher::Type::READ, [&](){
@@ -2014,7 +2014,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
 
             if (!maybe_true)
             {
-                ++granules_dropped;
+                granules_dropped += data_range.end - data_range.begin;
                 continue;
             }
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
index 7270a2fc4bb..b8af3eb3f22 100644
--- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
@@ -20,7 +20,6 @@
  */
 
 #include <Storages/MergeTree/MergeTreeIndexReader.h>
-#include <Storages/DiskCache/DiskCacheFactory.h>
 #include <Storages/DiskCache/PartFileDiskCacheSegment.h>
 #include <Storages/DiskCache/IDiskCache.h>
 #include <Storages/DiskCache/IDiskCacheStrategy.h>
@@ -84,7 +83,7 @@ MergeTreeIndexReader::MergeTreeIndexReader(
             MergeTreeDataPartPtr source_data_part = part_->getMvccDataPart(index_name + INDEX_FILE_EXTENSION);
             if (source_data_part->enableDiskCache())
             {
-                auto disk_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree);
+                auto disk_cache = source_data_part->storage.getDiskCache();
 
                 segment_cache_strategy = disk_cache->getStrategy();
                 segment_cache = disk_cache;
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index a7523e092e6..f7482fc9494 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -171,7 +171,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
                     if (local_cache_disk && local_cache_disk->exists(local_cache_path) && settings.read_settings.disk_cache_mode != DiskCacheMode::FORCE_STEAL_DISK_CACHE)
                     {
                         from_disk_cache = true;
-                        LOG_TRACE(&Poco::Logger::get(__func__), "load from local disk cache {}, mrk_path {}", local_cache_disk->getPath(), local_cache_path);
+                        LOG_TRACE(&Poco::Logger::get(__func__), "marks cache hit: seg_key={} disk={} path={}", mrk_seg_key, local_cache_disk->getPath(), local_cache_path);
                         size_t cached_mark_file_size = local_cache_disk->getFileSize(local_cache_path);
                         if (expected_file_size != cached_mark_file_size)
                             throw Exception(
@@ -230,7 +230,9 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
                 }
             }
 
-            LOG_TRACE(&Poco::Logger::get(__func__), "load from remote filesystem mrk_path {}", mrk_path);
+            LOG_TRACE(&Poco::Logger::get(__func__), "marks cache miss: seg_key={} falling back to remote fs mrk_path={}",
+                IDiskCacheSegment::formatSegmentName(UUIDHelpers::UUIDToString(storage_uuid), part_name, stream_name, 0, index_granularity_info.marks_file_extension),
+                mrk_path);
             auto buf = disk->readFile(mrk_path, load_mark_read_settings);
             if (buf->seek(mark_file_offset) != mark_file_offset)
                 throw Exception("Cannot seek to mark file  " + mrk_path + " for stream " + stream_name, ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
diff --git a/src/Storages/MergeTree/MergeTreeReaderCNCH.cpp b/src/Storages/MergeTree/MergeTreeReaderCNCH.cpp
index 7c0eb1f6ff5..c4bf3f31415 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCNCH.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCNCH.cpp
@@ -27,7 +27,6 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/inplaceBlockConversions.h>
-#include <Storages/DiskCache/DiskCacheFactory.h>
 #include <Storages/DiskCache/DiskCache_fwd.h>
 #include <Storages/DiskCache/IDiskCacheStrategy.h>
 #include <Storages/DiskCache/PartFileDiskCacheSegment.h>
@@ -35,6 +34,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartCNCH.h>
 #include <Storages/MergeTree/MergeTreeDataPartWide.h>
 #include <Storages/MergeTree/MergeTreeReaderStreamWithSegmentCache.h>
+#include <MergeTreeCommon/MergeTreeMetaBase.h>
 #include <bits/types/clockid_t.h>
 #include <Poco/Logger.h>
 #include <common/getFQDNOrHostName.h>
@@ -96,7 +96,7 @@ MergeTreeReaderCNCH::MergeTreeReaderCNCH(
 {
     if (data_part->enableDiskCache())
     {
-        segment_cache = DiskCacheFactory::instance().get(DiskCacheType::MergeTree);
+        segment_cache = data_part->storage.getDiskCache();
         segment_cache_strategy = segment_cache->getStrategy();
     }
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 76b98f6e3b7..cda4aa14dab 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -505,6 +505,8 @@ enum StealingCacheMode : UInt64
     M(Bool, enable_parts_sync_preload, 0, "Enable sync preload parts", 0) \
     M(Bool, enable_gc_evict_disk_cache, false, "Enable gc evict disk cache", 0)      \
     M(UInt64, disk_cache_stealing_mode, 0, "Read/write remote vw local disk cache if cur local disk cache empty, 0: close; 1: read 2: write 3: read&write", 0) \
+    M(UInt64, disk_cache_ttl_hours, 0, "Per-table TTL cache: cache parts with max_timestamp within this age. 0 = disabled (use global LRU). >0 = enable per-table TTL cache.", 0) \
+    M(UInt64, disk_cache_max_size_bytes, 0, "Per-table cache size limit in bytes. 0 = unlimited (constrained by global limit). Only applies when disk_cache_ttl_hours > 0.", 0) \
     \
     /* Renamed settings - cannot be ignored */\
     M(Bool, enable_nullable_sorting_key, false, "Alias of `allow_nullable_key`", 0) \
diff --git a/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.cpp b/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.cpp
index 65c3b571fdc..316e0817619 100644
--- a/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.cpp
+++ b/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.cpp
@@ -13,12 +13,16 @@
  * limitations under the License.
  */
 
+#include <chrono>
 #include <cstddef>
 #include <memory>
 #include <optional>
 #include <math.h>
 #include <IO/createReadBufferFromFileBase.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
 #include <Storages/DiskCache/PartFileDiskCacheSegment.h>
+#include <Common/CurrentThread.h>
 #include <Storages/MergeTree/MergeTreeSuffix.h>
 #include <Storages/MergeTree/MergedReadBufferWithSegmentCache.h>
 #include "Compression/CachedCompressedReadBuffer.h"
@@ -149,11 +153,55 @@ MergedReadBufferWithSegmentCache::MergedReadBufferWithSegmentCache(
         total_segment_count(total_segment_count_), marks_loader(marks_loader_),
         current_segment_idx(0), current_compressed_offset(std::nullopt), part_host(part_host_),
         stream_extension(stream_extension_),
-        logger(&Poco::Logger::get("MergedReadBufferWithSegmentCache"))
+        is_idx(stream_extension_ == INDEX_FILE_EXTENSION),
+        logger(&Poco::Logger::get("MergedReadBufferWithSegmentCache")),
+        cached_query_id(CurrentThread::getQueryId().toString())
 {
+    bool is_ttl_cache = dynamic_cast<DiskCacheTTL *>(segment_cache_) != nullptr;
+    if (is_ttl_cache)
+    {
+        if (auto ctx = CurrentThread::get().getQueryContext())
+            collect_cache_stats = ctx->getSettingsRef().report_segment_profiles
+                               || ctx->getSettingsRef().log_segment_profiles;
+    }
+    LOG_DEBUG(logger, "MergedReadBufferWithSegmentCache: part={} stream={} query_id={} is_ttl={} collect_stats={}",
+        part_name_, stream_name_, cached_query_id, is_ttl_cache, collect_cache_stats);
     initialize();
 }
 
+MergedReadBufferWithSegmentCache::~MergedReadBufferWithSegmentCache()
+{
+    try { flushLocalCacheStats(); }
+    catch (...) { tryLogCurrentException(logger, "flushLocalCacheStats in destructor"); }
+}
+
+void MergedReadBufferWithSegmentCache::flushLocalCacheStats()
+{
+    if (!collect_cache_stats || local_cache_stats.empty())
+        return;
+    // Close out any open segment timer
+    if (active_segment_start_ms > 0)
+    {
+        uint64_t elapsed = static_cast<uint64_t>(
+            std::chrono::duration_cast<std::chrono::milliseconds>(
+                std::chrono::steady_clock::now().time_since_epoch()).count()) - active_segment_start_ms;
+        if (is_idx)
+        {
+            if (active_is_cache) local_cache_stats.idx_cache_read_ms += elapsed;
+            else local_cache_stats.idx_s3_read_ms += elapsed;
+        }
+        else
+        {
+            if (active_is_cache) local_cache_stats.cache_read_ms += elapsed;
+            else local_cache_stats.s3_read_ms += elapsed;
+        }
+        active_segment_start_ms = 0;
+    }
+    if (!cached_query_id.empty())
+        DiskCacheFactory::instance().mergeQueryCacheStats(cached_query_id, local_cache_stats);
+    local_cache_stats = {};
+}
+
 void MergedReadBufferWithSegmentCache::initialize() {
     if (settings.read_settings.remote_read_log)
         settings.read_settings.remote_read_context = stream_name + stream_extension;
@@ -190,6 +238,11 @@ bool MergedReadBufferWithSegmentCache::nextImpl()
 
             ProfileEvents::increment(ProfileEvents::CnchReadSizeFromDiskCache,
                 buf_size);
+            if (collect_cache_stats)
+            {
+                if (is_idx) local_cache_stats.idx_cache_bytes += buf_size;
+                else local_cache_stats.cache_bytes += buf_size;
+            }
             if (progress_callback)
                 progress_callback({0, 0, 0, 0, buf_size});
 
@@ -199,6 +252,32 @@ bool MergedReadBufferWithSegmentCache::nextImpl()
         current_compressed_offset = marks_loader.getMark(current_segment_idx * cache_segment_size).offset_in_compressed_file
             + cache_buffer.compressedOffset();
 
+        // Segment boundary: stop timer, then flush to DiskCacheFactory immediately.
+        // Flushing per-boundary ensures stats are available even for LIMIT queries
+        // that cancel before reaching EOF.
+        if (collect_cache_stats && active_segment_start_ms > 0)
+        {
+            uint64_t elapsed = static_cast<uint64_t>(
+                std::chrono::duration_cast<std::chrono::milliseconds>(
+                    std::chrono::steady_clock::now().time_since_epoch()).count()) - active_segment_start_ms;
+            if (is_idx)
+            {
+                if (active_is_cache) local_cache_stats.idx_cache_read_ms += elapsed;
+                else local_cache_stats.idx_s3_read_ms += elapsed;
+            }
+            else
+            {
+                if (active_is_cache) local_cache_stats.cache_read_ms += elapsed;
+                else local_cache_stats.s3_read_ms += elapsed;
+            }
+            active_segment_start_ms = 0;
+        }
+        if (collect_cache_stats && !local_cache_stats.empty())
+        {
+            DiskCacheFactory::instance().mergeQueryCacheStats(cached_query_id, local_cache_stats);
+            local_cache_stats = {};
+        }
+
         cache_buffer.reset();
 
         LOG_TRACE(logger, fmt::format("Cache buffer of segment {} encounter "
@@ -245,6 +324,19 @@ bool MergedReadBufferWithSegmentCache::nextImpl()
                 ProfileEvents::CnchReadSizeFromDiskCache
                 : ProfileEvents::CnchReadSizeFromRemote,
             buf_size);
+        if (collect_cache_stats)
+        {
+            if (is_idx)
+            {
+                if (cache_buffer.initialized()) local_cache_stats.idx_cache_bytes += buf_size;
+                else local_cache_stats.idx_s3_bytes += buf_size;
+            }
+            else
+            {
+                if (cache_buffer.initialized()) local_cache_stats.cache_bytes += buf_size;
+                else local_cache_stats.s3_bytes += buf_size;
+            }
+        }
             if (cache_buffer.initialized() && progress_callback)
                 progress_callback({0, 0, 0, 0, buf_size});
 
@@ -323,6 +415,16 @@ void MergedReadBufferWithSegmentCache::seekToPosition(size_t segment_idx,
     }
 
     // No segment cache, trying to use source reader
+    if (collect_cache_stats)
+    {
+        // For data: count s3_fallback_segs here (complements cache_miss_segs from seekToMarkInSegmentCache).
+        // For idx: miss already counted in seekToMarkInSegmentCache; skip here to avoid double-count.
+        if (!is_idx) ++local_cache_stats.s3_fallback_segs;
+        active_segment_start_ms = static_cast<uint64_t>(
+            std::chrono::duration_cast<std::chrono::milliseconds>(
+                std::chrono::steady_clock::now().time_since_epoch()).count());
+        active_is_cache = false;
+    }
     initSourceBufferIfNeeded();
 
     LOG_TRACE(logger, fmt::format("Seek to remote file {} in part {}, offset {}:{}, base offset {}, limit {}",
@@ -370,11 +472,38 @@ bool MergedReadBufferWithSegmentCache::seekToMarkInSegmentCache(size_t segment_i
         if (settings.read_settings.disk_cache_mode == DiskCacheMode::FORCE_DISK_CACHE)
             throw Exception(ErrorCodes::DISK_CACHE_NOT_USED, "Can't find disk cache {} but enable `FORCE_DISK_CACHE`", segment_key);
 
-        if ((settings.remote_disk_cache_stealing == StealingCacheMode::READ_WRITE
-             || settings.remote_disk_cache_stealing == StealingCacheMode::READ_ONLY)
-            && parsed_assign_compute_host.has_value() && parsed_disk_cache_host.has_value()
-            && removeBracketsIfIpv6(parsed_assign_compute_host.value()) != removeBracketsIfIpv6(parsed_disk_cache_host.value()))
-            return seekToMarkInRemoteSegmentCache(segment_idx, mark_pos, segment_key);
+        if (settings.remote_disk_cache_stealing == StealingCacheMode::READ_WRITE
+            || settings.remote_disk_cache_stealing == StealingCacheMode::READ_ONLY)
+        {
+            // FDB-backed peer lookup: topology-aware, fires on scale events and post-restart gaps.
+            // Takes precedence over routing-hint stealing for TTL caches.
+            auto * ttl_cache = dynamic_cast<DiskCacheTTL *>(segment_cache);
+            if (ttl_cache)
+            {
+                if (auto peer_endpoint = ttl_cache->findPeerOwner(segment_key))
+                {
+                    bool stolen = seekToMarkInRemoteSegmentCache(segment_idx, mark_pos, segment_key, *peer_endpoint);
+                    if (collect_cache_stats)
+                    {
+                        if (stolen) ++local_cache_stats.steal_segs;
+                        else if (is_idx) ++local_cache_stats.idx_miss_segs;
+                        else ++local_cache_stats.cache_miss_segs;
+                    }
+                    return stolen;
+                }
+            }
+            else if (parsed_assign_compute_host.has_value() && parsed_disk_cache_host.has_value()
+                && removeBracketsIfIpv6(parsed_assign_compute_host.value()) != removeBracketsIfIpv6(parsed_disk_cache_host.value()))
+            {
+                // Legacy routing-hint stealing for non-TTL caches.
+                return seekToMarkInRemoteSegmentCache(segment_idx, mark_pos, segment_key, {});
+            }
+        }
+        if (collect_cache_stats)
+        {
+            if (is_idx) ++local_cache_stats.idx_miss_segs;
+            else ++local_cache_stats.cache_miss_segs;
+        }
         LOG_TRACE(
             logger,
             "Can't find disk cache key {} and fallback to read from remote fs. (current buffer at {}), segment {}, offset {}:{}",
@@ -398,6 +527,15 @@ bool MergedReadBufferWithSegmentCache::seekToMarkInSegmentCache(size_t segment_i
         cache_buffer.seek(mark_pos.offset_in_compressed_file - segment_start_compressed_offset,
             mark_pos.offset_in_decompressed_block);
         current_segment_idx = segment_idx;
+        if (collect_cache_stats)
+        {
+            if (is_idx) ++local_cache_stats.idx_hit_segs;
+            else ++local_cache_stats.cache_hit_segs;
+            active_segment_start_ms = static_cast<uint64_t>(
+                std::chrono::duration_cast<std::chrono::milliseconds>(
+                    std::chrono::steady_clock::now().time_since_epoch()).count());
+            active_is_cache = true;
+        }
     }
     catch(...)
     {
@@ -410,11 +548,15 @@ bool MergedReadBufferWithSegmentCache::seekToMarkInSegmentCache(size_t segment_i
 }
 
 bool MergedReadBufferWithSegmentCache::seekToMarkInRemoteSegmentCache(size_t segment_idx,
-    const MarkInCompressedFile& mark_pos, const String & segment_key)
+    const MarkInCompressedFile& mark_pos, const String & segment_key, const String & endpoint)
 {
     if (!segment_cache)
         return false;
 
+    const String & peer = endpoint.empty() ? part_host.disk_cache_host_port : endpoint;
+    if (peer.empty())
+        return false;
+
     DistributedDataClientOption option{
         .max_request_rate = segment_cache->getSettings().stealing_max_request_rate,
         .connection_timeout_ms = segment_cache->getSettings().stealing_connection_timeout_ms,
@@ -423,7 +565,7 @@ bool MergedReadBufferWithSegmentCache::seekToMarkInRemoteSegmentCache(size_t seg
         .retry_sleep_ms = segment_cache->getSettings().stealing_retry_sleep_ms,
         .max_queue_count = segment_cache->getSettings().stealing_max_queue_count,
     };
-    auto remote_data_client = std::make_shared<DistributedDataClient>(part_host.disk_cache_host_port, segment_key, option);
+    auto remote_data_client = std::make_shared<DistributedDataClient>(peer, segment_key, option);
     auto remote_cache_file = std::make_unique<ReadBufferFromRpcStreamFile>(remote_data_client, settings.read_settings.remote_fs_buffer_size);
     if (remote_cache_file->getFileName().empty())
         return false;
@@ -435,7 +577,7 @@ bool MergedReadBufferWithSegmentCache::seekToMarkInRemoteSegmentCache(size_t seg
             logger,
             fmt::format(
                 "Seek to remote diskcache {}:{} (current buffer at {}), segment {}, offset {}:{}",
-                part_host.disk_cache_host_port,
+                peer,
                 remote_cache_file->getFileName(),
                 cache_buffer.initialized() ? cache_buffer.path() : "Uninitialized",
                 segment_idx,
diff --git a/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.h b/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.h
index 62e7e032d57..be0ca89d314 100644
--- a/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.h
+++ b/src/Storages/MergeTree/MergedReadBufferWithSegmentCache.h
@@ -18,6 +18,7 @@
 #include <cstddef>
 #include <ctime>
 #include <memory>
+#include <Storages/DiskCache/DiskCacheFactory.h>
 #include <Compression/CachedCompressedReadBuffer.h>
 #include <Compression/CompressedReadBufferFromFile.h>
 #include <Interpreters/StorageID.h>
@@ -49,6 +50,8 @@ class MergedReadBufferWithSegmentCache: public ReadBuffer
         clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE,
         String stream_extension_ = DATA_FILE_EXTENSION);
 
+    ~MergedReadBufferWithSegmentCache() override;
+
     virtual size_t readBig(char* to, size_t n) override;
     virtual bool nextImpl() override;
 
@@ -124,7 +127,8 @@ class MergedReadBufferWithSegmentCache: public ReadBuffer
     void seekToPosition(size_t segment_idx, const MarkInCompressedFile& mark_pos);
     bool seekToMarkInSegmentCache(size_t segment_idx, const MarkInCompressedFile& mark_pos);
     void initialize();
-    bool seekToMarkInRemoteSegmentCache(size_t segment_idx, const MarkInCompressedFile& mark_pos, const String & segment_key);
+    // endpoint: FDB-found peer address; empty = fall back to part_host.disk_cache_host_port
+    bool seekToMarkInRemoteSegmentCache(size_t segment_idx, const MarkInCompressedFile& mark_pos, const String & segment_key, const String & endpoint = {});
     void initCacheBufferIfNeeded(const DiskPtr & disk, const String & path, std::unique_ptr<ReadBufferFromRpcStreamFile> remote_cache = nullptr);
     void initSourceBufferIfNeeded();
 
@@ -169,10 +173,21 @@ class MergedReadBufferWithSegmentCache: public ReadBuffer
     PartHostInfo part_host;
 
     String stream_extension;
+    bool is_idx{false};  // true when stream_extension == ".idx" (skip-index segment)
 
     Poco::Logger* logger;
 
     off_t read_until_position = 0;
+
+    // Per-stream cache stats flushed to DiskCacheFactory registry at segment boundaries and in destructor.
+    // Only populated when segment_cache is a DiskCacheTTL instance AND query requested segment profiles.
+    bool collect_cache_stats{false};
+    String cached_query_id;
+    QueryCacheStatsSnapshot local_cache_stats;
+    uint64_t active_segment_start_ms{0};  // wall-clock ms when current segment read started
+    bool active_is_cache{false};          // true = cache_buffer active, false = source_buffer
+
+    void flushLocalCacheStats();
 };
 
 }
diff --git a/src/Storages/PartCacheManager.cpp b/src/Storages/PartCacheManager.cpp
index f2d3bde7044..11bad433730 100644
--- a/src/Storages/PartCacheManager.cpp
+++ b/src/Storages/PartCacheManager.cpp
@@ -1909,6 +1909,9 @@ PartCacheManager::getLastModificationTimeHints(const ConstStoragePtr & storage,
         }
 
         const auto * meta_storage = dynamic_cast<const StorageCnchMergeTree *>(storage.get());
+        if (!meta_storage)
+            throw Exception("Table is not a Meta Based MergeTree", ErrorCodes::UNKNOWN_TABLE);
+
         auto meta_partitions = table_meta->getPartitionList();
 
         // Skip if it passes TTL
@@ -1922,8 +1925,6 @@ PartCacheManager::getLastModificationTimeHints(const ConstStoragePtr & storage,
                 continue;
 
             Protos::LastModificationTimeHint hint = Protos::LastModificationTimeHint{};
-            if (!meta_storage)
-                throw Exception("Table is not a Meta Based MergeTree", ErrorCodes::UNKNOWN_TABLE);
 
 
             String partition = partition_info->getPartitionValue(*meta_storage);
diff --git a/src/Storages/StorageCloudMergeTree.cpp b/src/Storages/StorageCloudMergeTree.cpp
index 1b9a67846d8..00a06cc4c3b 100644
--- a/src/Storages/StorageCloudMergeTree.cpp
+++ b/src/Storages/StorageCloudMergeTree.cpp
@@ -15,6 +15,7 @@
 
 #include <Storages/StorageCloudMergeTree.h>
 
+#include <mutex>
 #include <Common/Exception.h>
 #include "Core/UUID.h"
 #include "Storages/IStorage.h"
@@ -43,6 +44,7 @@
 #include <CloudServices/CnchPartsHelper.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Storages/StorageReplicatedMergeTree.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
 
 namespace ProfileEvents
 {
@@ -103,6 +105,32 @@ void StorageCloudMergeTree::shutdown()
         dedup_worker->stop();
 }
 
+IDiskCachePtr StorageCloudMergeTree::getDiskCache() const
+{
+    // getDiskCache() is called per-part; compute the pointer once per storage lifetime.
+    std::call_once(disk_cache_flag, [this]
+    {
+        if (getSettings()->disk_cache_ttl_hours.value > 0)
+        {
+            disk_cache_ptr = DiskCacheFactory::instance().createDiskCacheFromTableSettings(
+                getStorageID().getFullNameNotQuoted(),
+                getStorageUUID(),
+                *getContext(),
+                getContext()->getDiskCacheThrottler(),
+                getSettings()->disk_cache_ttl_hours.value * 60,
+                getSettings()->disk_cache_max_size_bytes.value
+            );
+        }
+        else
+        {
+            // TTL disabled — evict any stale registry entry so re-enabling picks up fresh settings.
+            DiskCacheFactory::instance().removeTableTTLCache(getStorageUUID());
+            disk_cache_ptr = DiskCacheFactory::instance().get(DiskCacheType::MergeTree);
+        }
+    });
+    return disk_cache_ptr;
+}
+
 StorageCloudMergeTree::~StorageCloudMergeTree()
 {
 }
diff --git a/src/Storages/StorageCloudMergeTree.h b/src/Storages/StorageCloudMergeTree.h
index 95e6cbc72c3..715bdcc3f14 100644
--- a/src/Storages/StorageCloudMergeTree.h
+++ b/src/Storages/StorageCloudMergeTree.h
@@ -26,6 +26,10 @@ namespace DB
 
 class CloudMergeTreeDedupWorker;
 using CloudMergeTreeDedupWorkerPtr = std::unique_ptr<CloudMergeTreeDedupWorker>;
+
+class IDiskCache;
+using IDiskCachePtr = std::shared_ptr<IDiskCache>;
+
 namespace IngestColumnCnch
 {
     struct IngestPartitionParam;
@@ -105,6 +109,8 @@ class StorageCloudMergeTree : public shared_ptr_helper<StorageCloudMergeTree>, p
     CloudMergeTreeDedupWorker * tryGetDedupWorker() { return dedup_worker.get(); }
     CloudMergeTreeDedupWorker * getDedupWorker();
 
+    IDiskCachePtr getDiskCache() const override;
+
     QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
     bool getQueryProcessingStageWithAggregateProjection(ContextPtr query_context, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info) const;
 
@@ -136,6 +142,10 @@ class StorageCloudMergeTree : public shared_ptr_helper<StorageCloudMergeTree>, p
     std::set<Int64> required_bucket_numbers;
 
     CloudMergeTreeDedupWorkerPtr dedup_worker;
+
+    // Cached per-query disk cache pointer — getDiskCache() is called per-part so compute once.
+    mutable std::once_flag disk_cache_flag;
+    mutable IDiskCachePtr disk_cache_ptr;
 };
 
 }
diff --git a/src/Storages/StorageCnchMergeTree.cpp b/src/Storages/StorageCnchMergeTree.cpp
index 67d48a49c66..86cccc8e428 100644
--- a/src/Storages/StorageCnchMergeTree.cpp
+++ b/src/Storages/StorageCnchMergeTree.cpp
@@ -96,6 +96,8 @@
 #include <DataTypes/ObjectUtils.h>
 #include <Storages/StorageSnapshot.h>
 #include <Transaction/TxnTimestamp.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+#include <Storages/DiskCache/IDiskCache.h>
 
 
 namespace ProfileEvents
@@ -259,13 +261,6 @@ QueryProcessingStage::Enum StorageCnchMergeTree::getQueryProcessingStage(
     }
 }
 
-void StorageCnchMergeTree::startup()
-{
-}
-
-void StorageCnchMergeTree::shutdown()
-{
-}
 
 Pipe StorageCnchMergeTree::read(
     const Names & column_names,
diff --git a/src/Storages/StorageCnchMergeTree.h b/src/Storages/StorageCnchMergeTree.h
index 31d8bd4ee55..cf6d39ff0a7 100644
--- a/src/Storages/StorageCnchMergeTree.h
+++ b/src/Storages/StorageCnchMergeTree.h
@@ -31,6 +31,8 @@ namespace DB
 
 struct PrepareContextResult;
 class ASTSystemQuery;
+class IDiskCache;
+using IDiskCachePtr = std::shared_ptr<IDiskCache>;
 
 class StorageCnchMergeTree final : public shared_ptr_helper<StorageCnchMergeTree>, public MergeTreeMetaBase, public CnchStorageCommonHelper
 {
@@ -69,8 +71,7 @@ class StorageCnchMergeTree final : public shared_ptr_helper<StorageCnchMergeTree
     QueryProcessingStage::Enum
     getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
 
-    void startup() override;
-    void shutdown() override;
+    void startup() override {}
 
     Pipe read(
         const Names & /*column_names*/,
@@ -221,6 +222,7 @@ class StorageCnchMergeTree final : public shared_ptr_helper<StorageCnchMergeTree
     /// drop the memody_dict_cache of cnch table
     void dropMemoryDictCache(ContextMutablePtr & local_context);
 
+
 protected:
     StorageCnchMergeTree(
         const StorageID & table_id_,
@@ -299,6 +301,8 @@ class StorageCnchMergeTree final : public shared_ptr_helper<StorageCnchMergeTree
     void checkAlterInCnchServer(const AlterCommands & commands, ContextPtr local_context) const;
 
     std::unique_ptr<MergeTreeSettings> getDefaultSettings() const override;
+
+private:
 };
 
 using StorageCnchMergeTreePtr = std::shared_ptr<StorageCnchMergeTree>;
diff --git a/src/Storages/System/StorageSystemDiskTTLCachePartitions.cpp b/src/Storages/System/StorageSystemDiskTTLCachePartitions.cpp
new file mode 100644
index 00000000000..d4b70784870
--- /dev/null
+++ b/src/Storages/System/StorageSystemDiskTTLCachePartitions.cpp
@@ -0,0 +1,111 @@
+#include <Storages/System/StorageSystemDiskTTLCachePartitions.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/Context.h>
+#include <Common/HostWithPorts.h>
+#include <CloudServices/CnchWorkerClient.h>
+#include <CloudServices/CnchWorkerClientPools.h>
+#include <ResourceManagement/ResourceManagerClient.h>
+#include <ResourceManagement/VirtualWarehouseType.h>
+#include <Protos/cnch_worker_rpc.pb.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+
+namespace DB
+{
+
+NamesAndTypesList StorageSystemDiskTTLCachePartitions::getNamesAndTypes()
+{
+    return {
+        {"worker_id", std::make_shared<DataTypeString>()},
+        {"table_name", std::make_shared<DataTypeString>()},
+        {"table_uuid", std::make_shared<DataTypeString>()},
+        {"partition", std::make_shared<DataTypeString>()},
+        {"entry_count", std::make_shared<DataTypeUInt64>()},
+        {"bytes", std::make_shared<DataTypeUInt64>()},
+    };
+}
+
+StorageSystemDiskTTLCachePartitions::StorageSystemDiskTTLCachePartitions(const StorageID & table_id_)
+    : IStorageSystemOneBlock(table_id_)
+{
+}
+
+static void fillPartitionRow(MutableColumns & res_columns, const String & worker_id, const Protos::TTLCachePartitionStats & p)
+{
+    size_t col_idx = 0;
+    res_columns[col_idx++]->insert(worker_id);
+    res_columns[col_idx++]->insert(p.table_name());
+    res_columns[col_idx++]->insert(p.table_uuid());
+    res_columns[col_idx++]->insert(p.partition());
+    res_columns[col_idx++]->insert(p.entry_count());
+    res_columns[col_idx++]->insert(p.bytes());
+}
+
+void StorageSystemDiskTTLCachePartitions::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
+{
+    if (context->getServerType() == ServerType::cnch_server)
+    {
+        auto * log = &Poco::Logger::get("StorageSystemDiskTTLCachePartitions");
+        std::vector<WorkerNodeResourceData> all_workers;
+        try
+        {
+            auto rm_client = context->getResourceManagerClient();
+            if (!rm_client)
+            {
+                LOG_WARNING(log, "ResourceManager client unavailable, returning empty result");
+                return;
+            }
+            rm_client->getAllWorkers(all_workers);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Failed to get workers from ResourceManager");
+            return;
+        }
+
+        LOG_INFO(log, "Querying TTL partition stats from {} worker(s)", all_workers.size());
+        auto & pools = context->getCnchWorkerClientPools();
+        for (const auto & wd : all_workers)
+        {
+            if (wd.vw_name == ResourceManagement::toSystemVWName(ResourceManagement::VirtualWarehouseType::Write))
+                continue;
+            try
+            {
+                auto worker = pools.getWorker(wd.host_ports);
+                auto partitions = worker->getTTLCachePartitionStats();
+                for (const auto & p : partitions)
+                    fillPartitionRow(res_columns, wd.id.empty() ? wd.host_ports.getRPCAddress() : wd.id, p);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+        return;
+    }
+
+    // On worker: read directly from local DiskCacheFactory registry
+    String worker_id = getWorkerID(context);
+    auto ttl_caches = DiskCacheFactory::instance().getAllTableTTLCaches();
+    for (const auto & [uuid, cache_ptr] : ttl_caches)
+    {
+        auto * ttl_cache = dynamic_cast<DiskCacheTTL *>(cache_ptr.get());
+        if (!ttl_cache)
+            continue;
+
+        auto table_stats = ttl_cache->getStats();
+        for (const auto & ps : ttl_cache->getPartitionStats())
+        {
+            Protos::TTLCachePartitionStats p;
+            p.set_table_name(ttl_cache->getName());
+            p.set_table_uuid(table_stats.table_uuid);
+            p.set_partition(ps.partition_id);
+            p.set_entry_count(ps.entry_count);
+            p.set_bytes(ps.total_bytes);
+            fillPartitionRow(res_columns, worker_id, p);
+        }
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemDiskTTLCachePartitions.h b/src/Storages/System/StorageSystemDiskTTLCachePartitions.h
new file mode 100644
index 00000000000..4b1bd3005b3
--- /dev/null
+++ b/src/Storages/System/StorageSystemDiskTTLCachePartitions.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <common/shared_ptr_helper.h>
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+namespace DB
+{
+
+class Context;
+
+/** Implements system table disk_ttl_cache_partitions
+  * Shows per-partition TTL disk cache statistics (hits, misses, size)
+  */
+class StorageSystemDiskTTLCachePartitions final : public shared_ptr_helper<StorageSystemDiskTTLCachePartitions>,
+    public IStorageSystemOneBlock<StorageSystemDiskTTLCachePartitions>
+{
+    friend struct shared_ptr_helper<StorageSystemDiskTTLCachePartitions>;
+public:
+    std::string getName() const override { return "SystemDiskTTLCachePartitions"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    StorageSystemDiskTTLCachePartitions(const StorageID & table_id_);
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/StorageSystemDiskTTLCachePreloads.cpp b/src/Storages/System/StorageSystemDiskTTLCachePreloads.cpp
new file mode 100644
index 00000000000..b2408e2e224
--- /dev/null
+++ b/src/Storages/System/StorageSystemDiskTTLCachePreloads.cpp
@@ -0,0 +1,106 @@
+#include <Storages/System/StorageSystemDiskTTLCachePreloads.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/Context.h>
+#include <Common/HostWithPorts.h>
+#include <CloudServices/CnchWorkerClient.h>
+#include <CloudServices/CnchWorkerClientPools.h>
+#include <ResourceManagement/ResourceManagerClient.h>
+#include <ResourceManagement/VirtualWarehouseType.h>
+#include <Protos/cnch_worker_rpc.pb.h>
+#include <Storages/DiskCache/PreloadRegistry.h>
+
+namespace DB
+{
+
+NamesAndTypesList StorageSystemDiskTTLCachePreloads::getNamesAndTypes()
+{
+    return {
+        {"worker_id", std::make_shared<DataTypeString>()},
+        {"table_name", std::make_shared<DataTypeString>()},
+        {"table_uuid", std::make_shared<DataTypeString>()},
+        {"partition_id", std::make_shared<DataTypeString>()},
+        {"parts_in_flight", std::make_shared<DataTypeUInt64>()},
+        {"parts_submitted", std::make_shared<DataTypeUInt64>()},
+        {"elapsed_ms", std::make_shared<DataTypeUInt64>()},
+        {"preload_level", std::make_shared<DataTypeUInt64>()},
+    };
+}
+
+StorageSystemDiskTTLCachePreloads::StorageSystemDiskTTLCachePreloads(const StorageID & table_id_)
+    : IStorageSystemOneBlock(table_id_)
+{
+}
+
+static void fillPreloadRow(MutableColumns & res_columns, const String & worker_id, const Protos::PreloadPartitionStats & p)
+{
+    size_t col_idx = 0;
+    res_columns[col_idx++]->insert(worker_id);
+    res_columns[col_idx++]->insert(p.table_name());
+    res_columns[col_idx++]->insert(p.table_uuid());
+    res_columns[col_idx++]->insert(p.partition_id());
+    res_columns[col_idx++]->insert(p.parts_in_flight());
+    res_columns[col_idx++]->insert(p.parts_submitted());
+    res_columns[col_idx++]->insert(p.elapsed_ms());
+    res_columns[col_idx++]->insert(p.preload_level());
+}
+
+void StorageSystemDiskTTLCachePreloads::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
+{
+    if (context->getServerType() == ServerType::cnch_server)
+    {
+        auto * log = &Poco::Logger::get("StorageSystemDiskTTLCachePreloads");
+        std::vector<WorkerNodeResourceData> all_workers;
+        try
+        {
+            auto rm_client = context->getResourceManagerClient();
+            if (!rm_client)
+            {
+                LOG_WARNING(log, "ResourceManager client unavailable, returning empty result");
+                return;
+            }
+            rm_client->getAllWorkers(all_workers);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Failed to get workers from ResourceManager");
+            return;
+        }
+
+        auto & pools = context->getCnchWorkerClientPools();
+        for (const auto & wd : all_workers)
+        {
+            if (wd.vw_name == ResourceManagement::toSystemVWName(ResourceManagement::VirtualWarehouseType::Write))
+                continue;
+            try
+            {
+                auto worker = pools.getWorker(wd.host_ports);
+                auto partitions = worker->getPreloadStats();
+                for (const auto & p : partitions)
+                    fillPreloadRow(res_columns, wd.id.empty() ? wd.host_ports.getRPCAddress() : wd.id, p);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+        return;
+    }
+
+    // On worker: read directly from PreloadRegistry
+    String worker_id = getWorkerID(context);
+    for (const auto & snap : PreloadRegistry::instance().getSnapshot())
+    {
+        Protos::PreloadPartitionStats p;
+        p.set_table_name(snap.table_name);
+        p.set_table_uuid(snap.table_uuid);
+        p.set_partition_id(snap.partition_id);
+        p.set_parts_in_flight(snap.parts_in_flight);
+        p.set_parts_submitted(snap.parts_submitted);
+        p.set_elapsed_ms(snap.elapsed_ms);
+        p.set_preload_level(snap.preload_level);
+        fillPreloadRow(res_columns, worker_id, p);
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemDiskTTLCachePreloads.h b/src/Storages/System/StorageSystemDiskTTLCachePreloads.h
new file mode 100644
index 00000000000..b6264b61b09
--- /dev/null
+++ b/src/Storages/System/StorageSystemDiskTTLCachePreloads.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <common/shared_ptr_helper.h>
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+namespace DB
+{
+
+class Context;
+
+/** Implements system table disk_ttl_cache_preloads
+  * Shows in-flight async preload tasks grouped by (worker, table, partition)
+  */
+class StorageSystemDiskTTLCachePreloads final : public shared_ptr_helper<StorageSystemDiskTTLCachePreloads>,
+    public IStorageSystemOneBlock<StorageSystemDiskTTLCachePreloads>
+{
+    friend struct shared_ptr_helper<StorageSystemDiskTTLCachePreloads>;
+public:
+    std::string getName() const override { return "SystemDiskTTLCachePreloads"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    StorageSystemDiskTTLCachePreloads(const StorageID & table_id_);
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/StorageSystemDiskTTLCacheTables.cpp b/src/Storages/System/StorageSystemDiskTTLCacheTables.cpp
new file mode 100644
index 00000000000..667ea90d99b
--- /dev/null
+++ b/src/Storages/System/StorageSystemDiskTTLCacheTables.cpp
@@ -0,0 +1,202 @@
+#include <Storages/System/StorageSystemDiskTTLCacheTables.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeMap.h>
+#include <Columns/ColumnMap.h>
+#include <Interpreters/Context.h>
+#include <Common/HostWithPorts.h>
+#include <CloudServices/CnchWorkerClient.h>
+#include <CloudServices/CnchWorkerClientPools.h>
+#include <ResourceManagement/ResourceManagerClient.h>
+#include <ResourceManagement/VirtualWarehouseType.h>
+#include <Protos/cnch_worker_rpc.pb.h>
+#include <Storages/DiskCache/DiskCacheFactory.h>
+#include <Storages/DiskCache/DiskCacheTTL.h>
+
+namespace DB
+{
+
+NamesAndTypesList StorageSystemDiskTTLCacheTables::getNamesAndTypes()
+{
+    return {
+        {"worker_id", std::make_shared<DataTypeString>()},
+        {"table_name", std::make_shared<DataTypeString>()},
+        {"table_uuid", std::make_shared<DataTypeString>()},
+        {"ttl_minutes", std::make_shared<DataTypeUInt64>()},
+        {"max_size_bytes", std::make_shared<DataTypeUInt64>()},
+        {"last_eviction_run", std::make_shared<DataTypeDateTime>()},
+        {"eviction_stats", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
+        {"rejection_stats", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
+        {"write_stats", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
+        {"hit_stats", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
+    };
+}
+
+StorageSystemDiskTTLCacheTables::StorageSystemDiskTTLCacheTables(const StorageID & table_id_)
+    : IStorageSystemOneBlock(table_id_)
+{
+}
+
+static void dumpStatsToMapColumn(const std::unordered_map<String, UInt64> & map, IColumn * column)
+{
+    auto * column_map = column ? &typeid_cast<ColumnMap &>(*column) : nullptr;
+    if (!column_map)
+        return;
+
+    auto & offsets = column_map->getOffsets();
+    auto & key_column = column_map->getKey();
+    auto & value_column = column_map->getValue();
+
+    size_t size = 0;
+    for (const auto & entry : map)
+    {
+        key_column.insertData(entry.first.c_str(), entry.first.size());
+        value_column.insert(entry.second);
+        size++;
+    }
+
+    offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size);
+}
+
+static void fillRowFromProto(MutableColumns & res_columns, const String & worker_id, const Protos::TTLCacheTableStats & t)
+{
+    size_t col_idx = 0;
+
+    res_columns[col_idx++]->insert(worker_id);
+    res_columns[col_idx++]->insert(t.table_name());
+    res_columns[col_idx++]->insert(t.table_uuid());
+    res_columns[col_idx++]->insert(t.ttl_minutes());
+    res_columns[col_idx++]->insert(t.max_size_bytes());
+    res_columns[col_idx++]->insert(t.last_eviction_run());
+
+    {
+        std::unordered_map<String, UInt64> eviction_map;
+        eviction_map["expired"] = t.evicted_expired();
+        eviction_map["size_limit"] = t.evicted_size_limit();
+        eviction_map["async_triggered_evicted"] = t.async_triggered_evicted();
+        eviction_map["async_skipped_rate_limit_evicted"] = t.async_skipped_rate_limit_evicted();
+        dumpStatsToMapColumn(eviction_map, res_columns[col_idx++].get());
+    }
+
+    {
+        std::unordered_map<String, UInt64> rejection_map;
+        rejection_map["non_time_partition"] = t.rejected_non_time_partition();
+        rejection_map["too_old"] = t.rejected_too_old();
+        dumpStatsToMapColumn(rejection_map, res_columns[col_idx++].get());
+    }
+
+    {
+        std::unordered_map<String, UInt64> write_map;
+        write_map["count_preload"] = t.count_preload();
+        write_map["count_query"] = t.count_query();
+        write_map["bytes_preload"] = t.bytes_preload();
+        write_map["bytes_query"] = t.bytes_query();
+        write_map["count_restored"] = t.count_restored();
+        write_map["bytes_restored"] = t.bytes_restored();
+        write_map["idx_count_preload"] = t.idx_count_preload();
+        write_map["idx_bytes_preload"] = t.idx_bytes_preload();
+        write_map["idx_count_query"] = t.idx_count_query();
+        write_map["idx_bytes_query"] = t.idx_bytes_query();
+        dumpStatsToMapColumn(write_map, res_columns[col_idx++].get());
+    }
+    {
+        std::unordered_map<String, UInt64> hit_map;
+        hit_map["data_hits"] = t.data_hits();
+        hit_map["data_misses"] = t.data_misses();
+        hit_map["idx_hits"] = t.idx_hits();
+        hit_map["idx_misses"] = t.idx_misses();
+        dumpStatsToMapColumn(hit_map, res_columns[col_idx++].get());
+    }
+}
+
+void StorageSystemDiskTTLCacheTables::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
+{
+    if (context->getServerType() == ServerType::cnch_server)
+    {
+        // Fan out to all workers via RPC using RM worker list — same pattern as system.workers.
+        // This works in any context without requiring a VW to be set.
+        auto * log = &Poco::Logger::get("StorageSystemDiskTTLCacheTables");
+        std::vector<WorkerNodeResourceData> all_workers;
+        try
+        {
+            auto rm_client = context->getResourceManagerClient();
+            if (!rm_client)
+            {
+                LOG_WARNING(log, "ResourceManager client unavailable, returning empty result");
+                return;
+            }
+            rm_client->getAllWorkers(all_workers);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Failed to get workers from ResourceManager");
+            return;
+        }
+
+        LOG_INFO(log, "Querying TTL cache stats from {} worker(s)", all_workers.size());
+        auto & pools = context->getCnchWorkerClientPools();
+        for (const auto & wd : all_workers)
+        {
+            if (wd.vw_name == ResourceManagement::toSystemVWName(ResourceManagement::VirtualWarehouseType::Write))
+                continue;
+            LOG_INFO(log, "Sending getTTLCacheStats RPC to {}", wd.host_ports.getRPCAddress());
+            try
+            {
+                auto worker = pools.getWorker(wd.host_ports);
+                auto stats = worker->getTTLCacheStats();
+                LOG_INFO(log, "Got {} TTL cache entries from {}", stats.size(), wd.host_ports.getRPCAddress());
+                for (const auto & t : stats)
+                    fillRowFromProto(res_columns, wd.id.empty() ? wd.host_ports.getRPCAddress() : wd.id, t);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+        return;
+    }
+
+    // On worker: read directly from local DiskCacheFactory registry
+    String worker_id = getWorkerID(context);
+    auto ttl_caches = DiskCacheFactory::instance().getAllTableTTLCaches();
+    for (const auto & [uuid, cache_ptr] : ttl_caches)
+    {
+        auto * ttl_cache = dynamic_cast<DiskCacheTTL *>(cache_ptr.get());
+        if (!ttl_cache)
+            continue;
+
+        auto stats = ttl_cache->getStats();
+
+        Protos::TTLCacheTableStats t;
+        t.set_table_name(ttl_cache->getName());
+        t.set_table_uuid(stats.table_uuid);
+        t.set_ttl_minutes(ttl_cache->getTTLMinutes());
+        t.set_max_size_bytes(ttl_cache->getMaxSizeBytes());
+        t.set_last_eviction_run(stats.last_eviction_run);
+        t.set_evicted_expired(stats.evicted_expired);
+        t.set_evicted_size_limit(stats.evicted_size_limit);
+        t.set_async_triggered_evicted(stats.async_eviction_triggered);
+        t.set_async_skipped_rate_limit_evicted(stats.async_eviction_skipped_rate_limit);
+        t.set_rejected_non_time_partition(stats.rejected_non_time_partition);
+        t.set_rejected_too_old(stats.rejected_too_old);
+        t.set_count_preload(stats.cached_from_preload);
+        t.set_count_query(stats.cached_from_query);
+        t.set_bytes_preload(stats.cached_bytes_preload);
+        t.set_bytes_query(stats.cached_bytes_query);
+        t.set_count_restored(stats.cached_from_restored);
+        t.set_bytes_restored(stats.cached_bytes_restored);
+        t.set_idx_count_preload(stats.cached_idx_from_preload);
+        t.set_idx_bytes_preload(stats.cached_idx_bytes_preload);
+        t.set_idx_count_query(stats.cached_idx_from_query);
+        t.set_idx_bytes_query(stats.cached_idx_bytes_query);
+        t.set_data_hits(stats.data_hits);
+        t.set_data_misses(stats.data_misses);
+        t.set_idx_hits(stats.idx_hits);
+        t.set_idx_misses(stats.idx_misses);
+
+        fillRowFromProto(res_columns, worker_id, t);
+    }
+}
+
+}
diff --git a/src/Storages/System/StorageSystemDiskTTLCacheTables.h b/src/Storages/System/StorageSystemDiskTTLCacheTables.h
new file mode 100644
index 00000000000..d5f1255aa1e
--- /dev/null
+++ b/src/Storages/System/StorageSystemDiskTTLCacheTables.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <common/shared_ptr_helper.h>
+#include <Storages/System/IStorageSystemOneBlock.h>
+
+namespace DB
+{
+
+class Context;
+
+/** Implements system table disk_ttl_cache_tables
+  * Shows per-table TTL disk cache statistics
+  */
+class StorageSystemDiskTTLCacheTables final : public shared_ptr_helper<StorageSystemDiskTTLCacheTables>,
+    public IStorageSystemOneBlock<StorageSystemDiskTTLCacheTables>
+{
+    friend struct shared_ptr_helper<StorageSystemDiskTTLCacheTables>;
+public:
+    std::string getName() const override { return "SystemDiskTTLCacheTables"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+
+protected:
+    StorageSystemDiskTTLCacheTables(const StorageID & table_id_);
+
+    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+};
+
+}
diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp
index ef4d243666d..19fe4cee1c7 100644
--- a/src/Storages/System/attachSystemTables.cpp
+++ b/src/Storages/System/attachSystemTables.cpp
@@ -162,6 +162,9 @@
 #include <Storages/System/StorageSystemCnchMaterializedMySQL.h>
 #include <Storages/System/StorageSystemCnchTransactionCleanTasks.h>
 #include <Storages/System/StorageSystemSchemaInferenceCache.h>
+#include <Storages/System/StorageSystemDiskTTLCacheTables.h>
+#include <Storages/System/StorageSystemDiskTTLCachePartitions.h>
+#include <Storages/System/StorageSystemDiskTTLCachePreloads.h>
 
 namespace DB
 {
@@ -310,6 +313,9 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper)
 #endif
     attach<StorageSystemCnchTransactionCleanTasks>(system_database, "cnch_transaction_clean_tasks");
     attach<StorageSystemSchemaInferenceCache>(system_database, "schema_inference_cache");
+    attach<StorageSystemDiskTTLCacheTables>(system_database, "disk_ttl_cache_tables");
+    attach<StorageSystemDiskTTLCachePartitions>(system_database, "disk_ttl_cache_partitions");
+    attach<StorageSystemDiskTTLCachePreloads>(system_database, "disk_ttl_cache_preloads");
 }
 
 void attachSystemTablesAsync(IDatabase & system_database, AsynchronousMetrics & async_metrics)
diff --git a/src/Transaction/Actions/InsertAction.cpp b/src/Transaction/Actions/InsertAction.cpp
index a763cc8a76e..b8aebf718e8 100644
--- a/src/Transaction/Actions/InsertAction.cpp
+++ b/src/Transaction/Actions/InsertAction.cpp
@@ -149,8 +149,8 @@ void InsertAction::checkAndSetDedupMode(CnchDedupHelper::DedupMode dedup_mode_)
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
                 "Dedup mode is {}, but staged parts are not empty for table {}, it's a bug!",
-                table->getCnchStorageID().getNameForLogs(),
-                typeToString(dedup_mode_));
+                typeToString(dedup_mode_),
+                table->getCnchStorageID().getNameForLogs());
 
         LOG_TRACE(log, "Table {} is in {} mode.", table->getCnchStorageID().getNameForLogs(), typeToString(dedup_mode_));
     }