Luce-Org · davide221 · May 21, 2026 · May 21, 2026
diff --git a/dflash/CMakeLists.txt b/dflash/CMakeLists.txt
diff --git a/dflash/README.md b/dflash/README.md
@@ -210,7 +210,7 @@ Full `bench_llm.py` suite on Qwen3.6-27B UD-Q4_K_XL, 10 prompts, n_gen=256, RTX
 and dispatches by arch:
 
   - `qwen35` / `qwen36` → existing DFlash + DDTree pipeline (no change).
-  - `laguna` → `dflash27b::run_laguna_daemon()` (no spec-decode, no DDTree).
+  - `laguna` → `dflash::common::run_laguna_daemon()` (no spec-decode, no DDTree).
 
 The daemon stdin/stream-fd protocol is identical, so `scripts/server.py`
 drives both arches end-to-end. The only thing the user changes is `--target`.

diff --git a/dflash/scripts/convert_dflash_to_gguf.py b/dflash/scripts/convert_dflash_to_gguf.py
@@ -21,7 +21,7 @@
 name.
 
 Usage:
-  PYTHONPATH=../../dflash27b_ggml/deps/llama.cpp/gguf-py python convert_dflash_to_gguf.py \
+  PYTHONPATH=../../dflash_ggml/deps/llama.cpp/gguf-py python convert_dflash_to_gguf.py \
     models/draft/model.safetensors \
     qwen3.5-27b-dflash-draft.gguf
 """

diff --git a/dflash/scripts/server.py b/dflash/scripts/server.py
@@ -132,7 +132,7 @@ def _extra_daemon_has_target_sharding(extra: list[str] | None) -> bool:
 # Architecture strings stored in `general.architecture` of every GGUF this
 # server can drive. test_dflash dispatches by GGUF arch internally:
 #   qwen35 / qwen36  -> existing DFlash + DDTree pipeline
-#   laguna           -> dflash27b::run_laguna_daemon() (no spec-decode)
+#   laguna           -> dflash::common::run_laguna_daemon() (no spec-decode)
 # server.py just needs to omit --draft + the DFlash/DDTree flags when the
 # arch doesn't support speculative decoding yet.
 _QWEN35_ARCHES = {"qwen35", "qwen36"}
@@ -843,7 +843,7 @@ async def _openai_compat_error_handler(_request: Request, exc: OpenAICompatError
 
     if arch in _LAGUNA_ARCHES:
         # test_dflash detects arch=laguna from the GGUF and dispatches
-        # internally to dflash27b::run_laguna_daemon(). No --draft, no
+        # internally to dflash::common::run_laguna_daemon(). No --draft, no
         # --fast-rollback, no --ddtree (no Laguna spec-decode draft yet).
         # Tokens stream as int32 LE on stream_fd terminated by -1, byte-
         # identical to the qwen35 path so SSE/stream consumers stay shared.

diff --git a/dflash/src/bsa_launcher.cu b/dflash/src/bsa_launcher.cu
@@ -28,7 +28,7 @@ template<typename T, int Headdim, bool Is_causal>
 void run_mha_fwd_block_(Flash_fwd_params &params, cudaStream_t stream);
 }
 
-namespace dflash27b {
+namespace dflash::common {
 namespace flashprefill {
 
 namespace {
@@ -275,4 +275,4 @@ fail:
 }
 
 }  // namespace flashprefill
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/bsa_launcher_hip.cu b/dflash/src/bsa_launcher_hip.cu
@@ -19,7 +19,7 @@
 #include <cstdint>
 #include <cstdlib>          // size_t
 
-namespace dflash27b {
+namespace dflash::common {
 namespace flashprefill {
 
 // Defined in flashprefill_kernels.hip.cu.
@@ -108,4 +108,4 @@ extern "C" int launch_bsa_sparse_flash_forward_bf16(
 }
 
 }  // namespace flashprefill
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/attn_masks.h b/dflash/src/common/attn_masks.h
@@ -10,7 +10,7 @@
 #include <cstdint>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // Minimum alignment required by ggml flash_attn_ext for mask rows.
 static constexpr int KQ_MASK_PAD = 32;
@@ -75,4 +75,4 @@ inline void build_tree_mask(const DDTree & tree, int past_length,
     }
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/backend_factory.cpp b/dflash/src/common/backend_factory.cpp
@@ -10,7 +10,7 @@
 
 #include <cstdio>
 
-namespace dflash27b {
+namespace dflash::common {
 
 std::string detect_arch(const char * model_path) {
     auto info = inspect_gguf_model_info(model_path);
@@ -107,4 +107,4 @@ std::unique_ptr<ModelBackend> create_backend(const BackendArgs & args) {
     }
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/backend_factory.h b/dflash/src/common/backend_factory.h
@@ -16,7 +16,7 @@
 #include <memory>
 #include <string>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ─── Backend creation arguments ─────────────────────────────────────────
 // A superset of all per-arch config fields. The factory reads only those
@@ -62,4 +62,4 @@ std::unique_ptr<ModelBackend> create_backend(const BackendArgs & args);
 // Useful for early dispatch (e.g. printing which backend will be used).
 std::string detect_arch(const char * model_path);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/daemon_loop.cpp b/dflash/src/common/daemon_loop.cpp
@@ -25,7 +25,7 @@
 #define ssize_t long
 #endif
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ── DaemonIO ────────────────────────────────────────────────────────────
 
@@ -424,4 +424,4 @@ int run_daemon(ModelBackend & backend, const DaemonLoopArgs & args) {
     return 0;
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/daemon_loop.h b/dflash/src/common/daemon_loop.h
@@ -11,7 +11,7 @@
 
 #include "model_backend.h"
 
-namespace dflash27b {
+namespace dflash::common {
 
 struct DaemonLoopArgs {
     int stream_fd = -1;
@@ -23,4 +23,4 @@ struct DaemonLoopArgs {
 // commands until `quit`, `exit`, or EOF.  Returns 0 on clean shutdown.
 int run_daemon(ModelBackend & backend, const DaemonLoopArgs & args);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/ddtree.cpp b/dflash/src/common/ddtree.cpp
@@ -7,7 +7,7 @@
 #include <cmath>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 void extract_draft_topk(const float * logits,
                         int n_positions, int vocab, int K,
@@ -223,4 +223,4 @@ std::vector<int> follow_verified_tree(const DDTree & tree,
     return accepted;
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/ddtree.h b/dflash/src/common/ddtree.h
@@ -13,7 +13,7 @@
 #include <unordered_map>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // A flat DFS-ordered tree built from the draft's top-K softmax distributions.
 // Slot 0 is the tree root (the bonus token from the previous spec round);
@@ -61,4 +61,4 @@ std::vector<int> follow_verified_tree(const DDTree & tree,
                                       int & out_next_token,
                                       int * out_node_idx = nullptr);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/device_placement.h b/dflash/src/common/device_placement.h
@@ -9,7 +9,7 @@
 
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 struct DevicePlacement {
     int gpu = 0;                              // primary GPU (single-GPU mode)
@@ -28,4 +28,4 @@ struct DevicePlacement {
     }
 };
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_capture.cpp b/dflash/src/common/dflash_capture.cpp
@@ -1,6 +1,6 @@
 #include "dflash_capture.h"
 
-namespace dflash27b {
+namespace dflash::common {
 
 int target_capture_index(const int * capture_layer_ids,
                          int n_capture_layers,
@@ -12,4 +12,4 @@ int target_capture_index(const int * capture_layer_ids,
     return -1;
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_capture.h b/dflash/src/common/dflash_capture.h
@@ -7,12 +7,12 @@
 
 #pragma once
 
-namespace dflash27b {
+namespace dflash::common {
 
 // Linear search for layer_idx in capture_layer_ids[0..n_capture_layers).
 // Returns the capture index (0..n_capture_layers-1) on hit, -1 on miss.
 int target_capture_index(const int * capture_layer_ids,
                          int n_capture_layers,
                          int layer_idx);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_draft_graph.cpp b/dflash/src/common/dflash_draft_graph.cpp
@@ -5,7 +5,7 @@
 
 #include <cstdio>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // Build draft graph at a given ctx_len into sg. Does NOT touch sg.alloc.
 // mirror_view: if true, uses a view into mirror->target_feat at slot0.
@@ -128,4 +128,4 @@ bool build_draft_step(
     return ggml_gallocr_alloc_graph(sg.alloc, sg.gf);
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_draft_graph.h b/dflash/src/common/dflash_draft_graph.h
@@ -15,7 +15,7 @@
 #include "ggml.h"
 #include "ggml-backend.h"
 
-namespace dflash27b {
+namespace dflash::common {
 
 // Draft forward: speculative next-token prediction using target features.
 //   lm_head: optional target lm_head tensor for fused projection. When
@@ -33,4 +33,4 @@ bool build_draft_step(
     int committed = 0,
     int ctx_len_max = 0);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_draft_ipc.cpp b/dflash/src/common/dflash_draft_ipc.cpp
@@ -17,7 +17,7 @@
 #include <string>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ── DFlashDraftIpcClient ────────────────────────────────────────────
 
@@ -248,4 +248,4 @@ bool copy_capture_slice_to_remote_draft(
     return remote.send_feature_slice(capture_idx, start_pos, n_tokens, host);
 }
 
-} // namespace dflash27b
+} // namespace dflash::common
diff --git a/dflash/src/common/dflash_draft_ipc.h b/dflash/src/common/dflash_draft_ipc.h
@@ -31,7 +31,7 @@
 #  include <unistd.h>
 #endif
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ── IPC Client (parent process) ─────────────────────────────────────
 
@@ -120,4 +120,4 @@ int run_dflash_draft_ipc_daemon(const char * draft_path,
                                 int draft_gpu,
                                 int stream_fd);
 
-} // namespace dflash27b
+} // namespace dflash::common
diff --git a/dflash/src/common/dflash_draft_ipc_daemon.cpp b/dflash/src/common/dflash_draft_ipc_daemon.cpp
@@ -24,7 +24,7 @@
 #include <string>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 int run_dflash_draft_ipc_daemon(const char * draft_path,
                                 int ring_cap,
@@ -208,4 +208,4 @@ int run_dflash_draft_ipc_daemon(const char * draft_path,
 #endif
 }
 
-} // namespace dflash27b
+} // namespace dflash::common
diff --git a/dflash/src/common/dflash_feature_ring.cpp b/dflash/src/common/dflash_feature_ring.cpp
@@ -16,7 +16,7 @@ extern "C++" to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);
 
 #include "gpu_runtime_compat.h"
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ── internal helpers ────────────────────────────────────────────
 
@@ -236,4 +236,4 @@ bool copy_feature_ring_range_to_tensor(
     return cudaDeviceSynchronize() == cudaSuccess;
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_feature_ring.h b/dflash/src/common/dflash_feature_ring.h
@@ -19,7 +19,7 @@
 #include <cstddef>
 #include <cstdint>
 
-namespace dflash27b {
+namespace dflash::common {
 
 struct DraftFeatureMirror {
     ggml_context * ctx = nullptr;
@@ -88,4 +88,4 @@ bool copy_feature_ring_range_to_tensor(
     int start_pos,
     int n_tokens);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
diff --git a/dflash/src/common/dflash_layer_split_runtime.h b/dflash/src/common/dflash_layer_split_runtime.h
@@ -13,7 +13,7 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ── Runtime configuration (replaces globals) ────────────────────────
 
@@ -74,4 +74,4 @@ inline bool activation_pair_init(ActivationPair & p,
     return true;
 }
 
-} // namespace dflash27b
+} // namespace dflash::common
diff --git a/dflash/src/common/dflash_spec_decode.cpp b/dflash/src/common/dflash_spec_decode.cpp
@@ -12,7 +12,7 @@
 #include <cstdio>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 namespace {
 // RAII guard so any early `return false` path frees the per-call draft graph.
@@ -205,5 +205,5 @@ bool run_dflash_spec_decode(
     return true;
 }
 
-} // namespace dflash27b
+} // namespace dflash::common
 
diff --git a/dflash/src/common/dflash_spec_decode.h b/dflash/src/common/dflash_spec_decode.h
@@ -21,7 +21,7 @@
 #include <cstdint>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 struct DraftWeights;  // forward-decl from internal.h
 
@@ -50,4 +50,4 @@ bool run_dflash_spec_decode(
         int stream_fd = -1,
         DFlashDraftIpcClient * remote_draft = nullptr);
 
-} // namespace dflash27b
+} // namespace dflash::common
diff --git a/dflash/src/common/dflash_target.h b/dflash/src/common/dflash_target.h
@@ -14,7 +14,7 @@
 #include <cstdint>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 struct DFlashTarget {
     virtual ~DFlashTarget() = default;
@@ -74,4 +74,4 @@ struct DFlashTarget {
     virtual const std::vector<int> & capture_layer_ids() const = 0;
 };
 
-} // namespace dflash27b
+} // namespace dflash::common
diff --git a/dflash/src/common/gguf_inspect.cpp b/dflash/src/common/gguf_inspect.cpp
@@ -5,7 +5,7 @@
 #include <cstring>
 #include <string>
 
-namespace dflash27b {
+namespace dflash::common {
 
 GgufModelInfo inspect_gguf_model_info(const char * path) {
     GgufModelInfo info;
@@ -36,4 +36,4 @@ GgufModelInfo inspect_gguf_model_info(const char * path) {
     return info;
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common