diff --git a/build/fbcode_builder/manifests/accel-config b/build/fbcode_builder/manifests/accel-config new file mode 100644 index 0000000000..1a51058147 --- /dev/null +++ b/build/fbcode_builder/manifests/accel-config @@ -0,0 +1,9 @@ +[manifest] +name = accel-config + +[rpms] +accel-config-devel + +[debs.distro=ubuntu] +libaccel-config-dev + diff --git a/build/fbcode_builder/manifests/cachelib b/build/fbcode_builder/manifests/cachelib index c340a2bb56..1386034f15 100644 --- a/build/fbcode_builder/manifests/cachelib +++ b/build/fbcode_builder/manifests/cachelib @@ -25,6 +25,7 @@ zstd mvfst numa libaio +dto # cachelib also depends on openssl but since the latter requires a platform- # specific configuration we rely on the folly manifest to provide this # dependency to avoid duplication. diff --git a/build/fbcode_builder/manifests/dto b/build/fbcode_builder/manifests/dto new file mode 100644 index 0000000000..9d171ef933 --- /dev/null +++ b/build/fbcode_builder/manifests/dto @@ -0,0 +1,14 @@ +[manifest] +name = dto + +[git] +repo_url = https://github.com/intel/DTO.git +branch = cachelib + +[build] +builder = cmake + +[dependencies] +accel-config +uuid +numa diff --git a/build/fbcode_builder/manifests/uuid b/build/fbcode_builder/manifests/uuid new file mode 100644 index 0000000000..d334fdd732 --- /dev/null +++ b/build/fbcode_builder/manifests/uuid @@ -0,0 +1,9 @@ +[manifest] +name = uuid + +[rpms] +libuuid-devel + +[debs] +uuid-dev + diff --git a/cachelib/CMakeLists.txt b/cachelib/CMakeLists.txt index 20598fc55e..8f20c921c6 100644 --- a/cachelib/CMakeLists.txt +++ b/cachelib/CMakeLists.txt @@ -44,6 +44,29 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) option(BUILD_TESTS "If enabled, compile the tests." ON) +option(BUILD_WITH_DTO "If enabled, build with the DTO library for DSA support." ON) + +if (BUILD_WITH_DTO) + find_package(DTO REQUIRED) + if (DTO_FOUND) + message(STATUS "DTO found, remember to configure DSA devices for acceleration. If no DSA device is found, cachelib will fallback to software path.") + endif() +endif () + +include(CMakeDependentOption) +# USE_DTO_API is only meaningful if BUILD_WITH_DTO is ON *and* DTO was found +cmake_dependent_option( + USE_DTO_API + "Use DTO library API functions for DSA acceleration." + OFF + "BUILD_WITH_DTO;DTO_FOUND" + OFF +) +if (USE_DTO_API) + message(STATUS "Using DTO API for offloading") + add_compile_definitions(DTO_API) +endif() + set(BIN_INSTALL_DIR bin CACHE STRING "The subdirectory where binaries should be installed") diff --git a/cachelib/cachebench/CMakeLists.txt b/cachelib/cachebench/CMakeLists.txt index 712957f4f0..385a988155 100644 --- a/cachelib/cachebench/CMakeLists.txt +++ b/cachelib/cachebench/CMakeLists.txt @@ -73,6 +73,10 @@ add_executable (binary_trace_gen binary_trace_gen.cpp) target_link_libraries(cachebench cachelib_cachebench) target_link_libraries(binary_trace_gen cachelib_binary_trace_gen) +if (BUILD_WITH_DTO) + target_link_libraries(cachebench accel-config DTO::dto) +endif() + install( TARGETS cachebench diff --git a/cachelib/cachebench/cache/Cache.h b/cachelib/cachebench/cache/Cache.h index 6f1d35683d..c129360e0b 100644 --- a/cachelib/cachebench/cache/Cache.h +++ b/cachelib/cachebench/cache/Cache.h @@ -1331,7 +1331,7 @@ void Cache::setStringItem(WriteHandle& handle, } auto ptr = reinterpret_cast(getMemory(handle)); - std::strncpy(ptr, str.c_str(), dataSize); + std::memmove(ptr, str.c_str(), dataSize); // Make sure the copied string ends with null char if (str.size() + 1 > dataSize) { diff --git a/cachelib/cachebench/runner/CacheStressor.h b/cachelib/cachebench/runner/CacheStressor.h index af3b8b9101..ddc04b38e8 100644 --- a/cachelib/cachebench/runner/CacheStressor.h +++ b/cachelib/cachebench/runner/CacheStressor.h @@ -28,6 +28,11 @@ #include #include +#ifdef DTO_API +#include +#define DTO_DSA_MIN_THRESHOLD (32 * 1024) +#endif + #include "cachelib/cachebench/cache/Cache.h" #include "cachelib/cachebench/cache/TimeStampTicker.h" #include "cachelib/cachebench/runner/Stressor.h" @@ -43,6 +48,13 @@ namespace cachebench { constexpr uint32_t kNvmCacheWarmUpCheckRate = 1000; +#ifdef DTO_API +void async_memcpy_callback(void *arg) { + auto &fn = *reinterpret_cast*>(arg); + fn(); +} +#endif + // Implementation of stressor that uses a workload generator to stress an // instance of the cache. All item's value in CacheStressor follows CacheValue // schema, which contains a few integers for sanity checks use. So it is invalid @@ -493,6 +505,17 @@ class CacheStressor : public Stressor { ++stats.setFailure; return OpResultType::kSetFailure; } else { +#ifdef DTO_API + if (config_.useDTOAsync && size >= DTO_DSA_MIN_THRESHOLD) { + auto insertToCache = [&] { + cache_->insertOrReplace(it); + }; + std::function fn = insertToCache; + dto_memcpy_async( + it->getMemory(), itemValue.data(), size, &async_memcpy_callback, &insertToCache); + return OpResultType::kSetSuccess; + } +#endif populateItem(it, itemValue); cache_->insertOrReplace(it); return OpResultType::kSetSuccess; diff --git a/cachelib/cachebench/util/Config.cpp b/cachelib/cachebench/util/Config.cpp index 133074e50c..c66bd5213d 100644 --- a/cachelib/cachebench/util/Config.cpp +++ b/cachelib/cachebench/util/Config.cpp @@ -68,6 +68,7 @@ StressorConfig::StressorConfig(const folly::dynamic& configJson) { JSONSetVal(configJson, checkNvmCacheWarmUp); JSONSetVal(configJson, useCombinedLockForIterators); + JSONSetVal(configJson, useDTOAsync); if (configJson.count("poolDistributions")) { for (auto& it : configJson["poolDistributions"]) { diff --git a/cachelib/cachebench/util/Config.h b/cachelib/cachebench/util/Config.h index dcb2ea3b63..1bfec941ae 100644 --- a/cachelib/cachebench/util/Config.h +++ b/cachelib/cachebench/util/Config.h @@ -332,6 +332,9 @@ struct StressorConfig : public JSONConfig { bool useCombinedLockForIterators{false}; + // if we want to use async DSA function + bool useDTOAsync{false}; + // admission policy for cache. std::shared_ptr admPolicy{}; diff --git a/cachelib/navy/CMakeLists.txt b/cachelib/navy/CMakeLists.txt index ace0ce07f0..92d9a8308b 100644 --- a/cachelib/navy/CMakeLists.txt +++ b/cachelib/navy/CMakeLists.txt @@ -61,6 +61,10 @@ target_link_libraries(cachelib_navy PUBLIC GTest::gmock ) +if (BUILD_WITH_DTO) + target_link_libraries(cachelib_navy PUBLIC DTO::dto) +endif() + install(TARGETS cachelib_navy EXPORT cachelib-exports DESTINATION ${LIB_INSTALL_DIR} ) diff --git a/cachelib/navy/block_cache/BlockCache.cpp b/cachelib/navy/block_cache/BlockCache.cpp index 5cae3fb3e8..deb4fc4456 100644 --- a/cachelib/navy/block_cache/BlockCache.cpp +++ b/cachelib/navy/block_cache/BlockCache.cpp @@ -23,6 +23,10 @@ #include #include +#ifdef DTO_API +#include +#endif + #include "cachelib/common/inject_pause.h" #include "cachelib/navy/block_cache/SparseMapIndex.h" #include "cachelib/navy/common/Hash.h" @@ -31,6 +35,14 @@ namespace facebook::cachelib::navy { +#ifdef DTO_API +void async_memcpy_crc_cb(void *arg) { + auto &fn = *reinterpret_cast*>(arg); + fn(); +} +#endif + + BlockCache::Config& BlockCache::Config::validate() { XDCHECK_NE(scheduler, nullptr); if (!device || !evictionPolicy) { @@ -702,11 +714,24 @@ Status BlockCache::writeEntry(RelAddress addr, auto desc = new (buffer.data() + descOffset) EntryDesc(hk.key().size(), value.size(), hk.keyHash()); if (checksumData_) { +#ifdef DTO_API + auto keyCopy = [hk, descOffset, &buffer]() { + // Copy the key to the buffer at the end + buffer.copyFrom(descOffset - hk.key().size(), makeView(hk.key())); + }; + std::function fn = keyCopy; + //buffer data is dest, value is src, keyCopy is function to execute while waiting + desc->cs = dto_memcpy_crc_async(buffer.data(), static_cast(value.data()), value.size(), &async_memcpy_crc_cb, &fn); +#else desc->cs = checksum(value); + buffer.copyFrom(descOffset - hk.key().size(), makeView(hk.key())); + buffer.copyFrom(0, value); +#endif + } else { + buffer.copyFrom(descOffset - hk.key().size(), makeView(hk.key())); + buffer.copyFrom(0, value); } - buffer.copyFrom(descOffset - hk.key().size(), makeView(hk.key())); - buffer.copyFrom(0, value); regionManager_.write(addr, std::move(buffer)); logicalWrittenCount_.add(hk.key().size() + value.size()); diff --git a/cachelib/navy/common/Hash.cpp b/cachelib/navy/common/Hash.cpp index 50ef925e46..4f9092af04 100644 --- a/cachelib/navy/common/Hash.cpp +++ b/cachelib/navy/common/Hash.cpp @@ -17,6 +17,9 @@ #include "cachelib/navy/common/Hash.h" #include +#ifdef DTO_API +#include +#endif namespace facebook::cachelib::navy { uint64_t hashBuffer(BufferView key, uint64_t seed) { @@ -24,6 +27,10 @@ uint64_t hashBuffer(BufferView key, uint64_t seed) { } uint32_t checksum(BufferView data, uint32_t startingChecksum) { +#ifdef DTO_API + return dto_crc(data.data(), data.size(), nullptr, nullptr); +#else return folly::crc32(data.data(), data.size(), startingChecksum); +#endif } } // namespace facebook::cachelib::navy