Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ if(DFLASH27B_TESTS)

# ─── dflash_server: native C++ HTTP server ─────────────────────────
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/server/server_main.cpp")
find_package(CURL REQUIRED)
add_executable(dflash_server
src/server/server_main.cpp
src/server/http_server.cpp
Expand All @@ -713,7 +714,7 @@ if(DFLASH27B_TESTS)
DFLASH27B_BACKEND_CUDA=1
DFLASH27B_CUDA_MIN_SM=${_dflash_cuda_min_sm})
endif()
target_link_libraries(dflash_server PRIVATE dflash_common ggml ${DFLASH27B_GGML_BACKEND_TARGET} pthread)
target_link_libraries(dflash_server PRIVATE dflash_common ggml ${DFLASH27B_GGML_BACKEND_TARGET} pthread CURL::libcurl)
if(DFLASH27B_GPU_BACKEND STREQUAL "cuda")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(dflash_server PRIVATE CUDA::cudart)
Expand Down Expand Up @@ -780,7 +781,7 @@ if(DFLASH27B_TESTS)
DFLASH27B_BACKEND_CUDA=1
DFLASH27B_CUDA_MIN_SM=${_dflash_cuda_min_sm})
endif()
target_link_libraries(test_server_unit PRIVATE dflash_common ggml ${DFLASH27B_GGML_BACKEND_TARGET})
target_link_libraries(test_server_unit PRIVATE dflash_common ggml ${DFLASH27B_GGML_BACKEND_TARGET} CURL::libcurl)
if(DFLASH27B_GPU_BACKEND STREQUAL "cuda")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(test_server_unit PRIVATE CUDA::cudart)
Expand Down
11 changes: 10 additions & 1 deletion server/src/qwen3/qwen3_drafter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,16 @@ bool load_drafter(const std::string & gguf_path, int /*gpu_layers*/,

bool load_drafter(const std::string & gguf_path, int /*gpu_layers*/,
int gpu, DrafterContext & out) {
return load_drafter(gguf_path, /*gpu_layers=*/999, DrafterArch::Qwen3_0p6b, gpu, out);
DrafterArch arch = DrafterArch::Qwen3_0p6b;
{
std::string lower = gguf_path;
for (auto & c : lower) c = (char)std::tolower((unsigned char)c);
if (lower.find("qwen3.5") != std::string::npos ||
lower.find("qwen35") != std::string::npos) {
arch = DrafterArch::Qwen35_0p8b;
}
}
return load_drafter(gguf_path, /*gpu_layers=*/999, arch, gpu, out);
}

bool load_drafter(const std::string & gguf_path, int /*gpu_layers*/,
Expand Down
Loading
Loading