Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
93f9006
docs: ship-in-days PLAN for adaptive keep_ratio MVP (mrciffa scope)
dusterbloom May 21, 2026
ccf32be
feat(pflash): plumb DFlash accept_rate into GenerateResult (Day 1 of …
dusterbloom May 21, 2026
a2ae89c
feat(pflash): adaptive keep_ratio bandit state machine (Day 2)
dusterbloom May 21, 2026
b28a45f
feat(pflash): integrate adaptive bandit into live HTTP path (Day 3)
dusterbloom May 21, 2026
b31049d
bench(mvp): Day 4 — adaptive bandit vs fixed-keep A/B on claude_code
dusterbloom May 21, 2026
fe2a6b7
feat(harness): inject extra_body.session_id via PFLASH_SESSION_ID env
dusterbloom May 21, 2026
588ca57
bench(mvp): Day 5 — adaptive bandit vs fixed-keep on identical prompts
dusterbloom May 21, 2026
00f9307
fix(pflash): adaptive_keep_ratio uses dflash::common namespace (follo…
dusterbloom May 23, 2026
4038332
fix(pflash): replace dead EMA log line with real post-update EMA value
dusterbloom May 23, 2026
7b9397c
fix(pflash): bandit updates on spec_decode_ran, not accept_rate>0 (0-…
dusterbloom May 23, 2026
692064f
chore(mvp): trim bench-result noise + move PLAN.md to thoughts/ per p…
dusterbloom May 23, 2026
8d5cc04
ci: fix submodule fetch for fork PRs
dusterbloom May 23, 2026
a8d41e0
bench(mvp): NIAH 16K/32K + 3-seed Day-5 for PR #264 evidence
dusterbloom May 23, 2026
713300f
feat(pflash): plumb DFlash accept_rate into GenerateResult (Day 1 of …
dusterbloom May 21, 2026
49b6061
test: anthropic /v1/messages round-trip regression lock + stub server…
dusterbloom May 23, 2026
ba138f3
feat+test: OpenAI /v1/chat/completions + /v1/responses injection (see…
dusterbloom May 23, 2026
4cdf069
harness: preflight_require_bin in common.sh + exit-78 test (seed #3)
dusterbloom May 23, 2026
3614aac
harness: wire preflight_require_bin + cpp backend into 5 run scripts
dusterbloom May 23, 2026
9f30905
harness: typed metrics parser with Optional[float] for missing fields…
dusterbloom May 23, 2026
fda452b
harness: ClientAdapter protocol + 5 concrete adapters + bandit subcom…
dusterbloom May 23, 2026
0702e62
harness: --adapter flag + CLI smoke tests for bandit subcommand
dusterbloom May 23, 2026
8898a89
harness: add live_run to adapters, delete 4 bash launchers, update RE…
dusterbloom May 23, 2026
dfc7e6f
fix(harness): start_server respects LUCEBOX_SERVER_BACKEND (cpp default)
dusterbloom May 23, 2026
1bf0781
fix(harness): preflight invokes binary (not shutil.which) to catch br…
dusterbloom May 23, 2026
674c949
fix(harness): live_run drives client binary directly (was shelling in…
dusterbloom May 23, 2026
7b1622e
docs(harness): replace stale bash-launcher refs with Python adapter p…
dusterbloom May 23, 2026
da99475
fix(harness): wire metrics_parser into adapter result (accept_rate wa…
dusterbloom May 23, 2026
7b33d7c
fix(harness): preflight uses same env as live_run (catches asdf shim …
dusterbloom May 23, 2026
9e45a7a
fix(harness): enable pflash on bandit-condition server launches (band…
dusterbloom May 23, 2026
ee96e6c
fix(harness): bandit server launch — positional model arg + drop unkn…
dusterbloom May 23, 2026
05aa36b
fix(harness): migrate ClaudeCodeAdapter to direct invocation, skip he…
dusterbloom May 23, 2026
129cfaa
feat(harness): bandit-session subcommand + multi-turn adaptive evidence
dusterbloom May 23, 2026
9595ad1
bench: add server.log for adaptive evidence run (force-add over .giti…
dusterbloom May 23, 2026
9a82a8b
fix(harness): wire HermesAdapter end-to-end; fix proxy port collision
dusterbloom May 23, 2026
ade419b
chore(harness): move followups to thoughts/ per project convention
dusterbloom May 23, 2026
9ed4a56
fix(harness): opencode/pi/codex adapter binary resolution under isola…
dusterbloom May 23, 2026
80f4a16
feat(harness): wire session-inject proxy into bandit-session; revert …
dusterbloom May 23, 2026
fbcabb1
fix(harness): skip --draft for pflash-only server profiles
dusterbloom May 24, 2026
17525ea
fix(bandit): bound HttpServerSessions to cap LRU eviction
dusterbloom May 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- name: Fetch submodules (PAT required for private org repos)
env:
SUBMODULE_PAT: ${{ secrets.SUBMODULE_PAT }}
run: |
if [ -n "${SUBMODULE_PAT}" ]; then
git config --global url."https://x-access-token:${SUBMODULE_PAT}@github.com/".insteadOf "https://github.com/"
fi
git submodule update --init --recursive --depth=1

- uses: Jimver/cuda-toolkit@v0.2.35
with:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ env/
*.qdrep
*.sqlite
bench-out/
dflash/bench/results/
profile-out/

# Model weights and caches (pull fresh from HF)
Expand Down
12 changes: 12 additions & 0 deletions dflash/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,18 @@ if(DFLASH27B_TESTS)
target_include_directories(test_gguf_mmap PRIVATE ${DFLASH27B_SRC_INCLUDE_DIRS})
target_link_libraries(test_gguf_mmap PRIVATE dflash_common)
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/test_adaptive_keep_ratio.cpp")
add_executable(test_adaptive_keep_ratio test/test_adaptive_keep_ratio.cpp)
target_include_directories(test_adaptive_keep_ratio PRIVATE ${DFLASH27B_SRC_INCLUDE_DIRS})
target_link_libraries(test_adaptive_keep_ratio PRIVATE dflash_common)
add_test(NAME adaptive_keep COMMAND test_adaptive_keep_ratio)
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/test_bandit_integration.cpp")
add_executable(test_bandit_integration test/test_bandit_integration.cpp)
target_include_directories(test_bandit_integration PRIVATE ${DFLASH27B_SRC_INCLUDE_DIRS})
target_link_libraries(test_bandit_integration PRIVATE dflash_common)
add_test(NAME bandit_integration COMMAND test_bandit_integration)
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/test_draft_vs_reference.cpp")
add_executable(test_draft_vs_reference test/test_draft_vs_reference.cpp)
target_link_libraries(test_draft_vs_reference PRIVATE dflash_common)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":15505,"duration_api_ms":15469,"ttft_ms":15503,"num_turns":1,"result":"The `clamp` function restricts a value `x` to lie within the inclusive range `[lo, hi]`:\n\n1. **Validation:** If the lower bound `lo` is greater than the upper bound `hi`, it raises a `ValueError` since the bounds are invalid.\n2. **Clamping:** It first applies `max(x, lo)` to ensure the value is at least `lo`, then applies `min(..., hi)` to ensure it doesn't exceed `hi`.\n\nIn short, it \"clamps\" or \"clips\" `x` so that the result is always between `lo` and `hi` (inclusive).\n\nOK_DONE","stop_reason":"end_turn","session_id":"369e1eff-7cf4-4b2c-a708-1e42e545c5c1","total_cost_usd":0.058980000000000005,"usage":{"input_tokens":11096,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":140,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"luce-dflash":{"inputTokens":11096,"outputTokens":140,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.058980000000000005,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"4d133762-5689-47e1-9575-9de7245f1a61"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
label=A_fixed_low
keep_ratio=0.05
session_id=
wall_s=20
ok_done=YES
accept_rate=N/A
mean_drafter_fwd_ms=N/A
N/A
bandit_log:
none
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11518,"duration_api_ms":11475,"ttft_ms":11516,"num_turns":1,"result":"Takes a value `x` and constrains it to the range `[lo, hi]`. If `lo > hi` it raises; otherwise it returns `x` clamped to `[lo, hi]` using `max` then `min`.\n\nOK_DONE.","stop_reason":"end_turn","session_id":"33055a15-378f-424e-91f7-8ae0dfcbe5ac","total_cost_usd":0.056905000000000004,"usage":{"input_tokens":11096,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":57,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"luce-dflash":{"inputTokens":11096,"outputTokens":57,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.056905000000000004,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"427220ae-d402-4e5c-9ccb-a83998c72a3f"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
label=B_fixed_high
keep_ratio=0.20
session_id=
wall_s=18
ok_done=YES
accept_rate=N/A
mean_drafter_fwd_ms=N/A
N/A
bandit_log:
none
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"content":[{"text":"The `clamp` function restricts a value `x` to lie within a specified range defined by `lo` (lower bound) and `hi` (upper bound).\n\nHere is a step-by-step breakdown:\n\n1. **Validation**: It first checks if `lo` is greater than `hi`. If so, it raises a `ValueError` because the lower bound cannot be greater than the upper bound.\n2. **Clamping Logic**: It uses `max(x, lo)` to ensure the value is at least `lo`, and then `min(..., hi)` to ensure the result is no more than `hi`.\n - If `x` is less than `lo`, it returns `lo`.\n - If `x` is greater than `hi`, it returns `hi`.\n - If `x` is between `lo` and `hi` (inclusive), it returns `x`.\n\nIn essence, it \"clamps\" or \"caps\" the value `x` to the interval `[lo, hi]`.\n\nOK_DONE","type":"text"}],"id":"msg_0000000000000000","model":"luce-dflash","role":"assistant","stop_reason":"end_turn","type":"message","usage":{"accept_rate":0.34742647409439087,"input_tokens":62,"output_tokens":221}}
10 changes: 10 additions & 0 deletions dflash/bench/results/2026-05-21_mvp_day4_v2/C_bandit/metrics.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
label=C_bandit
keep_ratio=0.10
session_id=claude_code_s1
wall_s=12
ok_done=YES
accept_rate=0.34742647409439087
mean_drafter_fwd_ms=N/A
N/A
bandit_log:
[pflash-bandit] session=claude_code_s1 turn=1 keep=0.1000->0.1100 (accept=0.347)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":12759,"duration_api_ms":12725,"ttft_ms":12757,"num_turns":1,"result":"The `clamp` function constrains a value `x` to lie within the closed interval `[lo, hi]`.\n\n1. **Bounds check**: If `lo > hi`, it raises a `ValueError` because the lower bound would be greater than the upper bound, which is invalid.\n2. **Clamping**: It first applies `max(x, lo)` to ensure the value is at least `lo`, then applies `min(..., hi)` to ensure the result is at most `hi`. The net effect is that `x` is \"clamped\" (pinned) to the range `[lo, hi]`.\n\n**Examples:**\n- `clamp(5, 1, 10)` → `5` (already in range)\n- `clamp(-3, 0, 10)` → `0` (below `lo`, so it returns `lo`)\n- `clamp(20, 1, 10)` → `10` (above `hi`, so it returns `hi`)\n- `clamp(5, 10, 1)` → raises `ValueError`\n\nOK_DONE","stop_reason":"end_turn","session_id":"17411438-6c28-45ab-876e-0141de566865","total_cost_usd":0.061655,"usage":{"input_tokens":11126,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":241,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"luce-dflash":{"inputTokens":11126,"outputTokens":241,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.061655,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"8f175dfd-4dd7-4aa2-9514-ed9bd71ab9f0"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
label=A_fixed_low
keep_ratio=0.05
session_id=
wall_s=17
ok_done=YES
accept_rate=31.7
mean_drafter_fwd_ms=1610 ms (n=1)
bandit_log:
none
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11247,"duration_api_ms":11201,"ttft_ms":11245,"num_turns":1,"result":"It constrains `x` to the range `[lo, hi]`:\n\n- If `lo > hi`, it raises a `ValueError`.\n- Otherwise it returns `x` clamped: `lo` if `x < lo`, `hi` if `x > hi`, or `x` itself.\n\nOK_DONE.","stop_reason":"end_turn","session_id":"9a753ecc-fdb7-48c9-bdfb-d25797bcc80e","total_cost_usd":0.05738,"usage":{"input_tokens":11126,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":70,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"luce-dflash":{"inputTokens":11126,"outputTokens":70,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.05738,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"315cc3b2-95cc-46ea-bc4c-8443e8d0ebed"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
label=B_fixed_high
keep_ratio=0.20
session_id=
wall_s=19
ok_done=YES
accept_rate=25.4
mean_drafter_fwd_ms=1620 ms (n=1)
bandit_log:
none
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"result","subtype":"success","is_error":false,"api_error_status":null,"duration_ms":11537,"duration_api_ms":11490,"ttft_ms":11535,"num_turns":1,"result":"The `clamp` function constrains a value `x` to lie within the inclusive range `[lo, hi]`:\n\n1. **Bounds validation**: If `lo > hi`, it raises a `ValueError` because the lower bound can't exceed the upper bound.\n2. **Clamping**: `max(x, lo)` ensures the result is at least `lo`, then `min(..., hi)` ensures it doesn't exceed `hi`.\n\n**Examples:**\n- `clamp(5, 0, 10)` → `5` (already in range)\n- `clamp(-3, 0, 10)` → `0` (below range, clamped to `lo`)\n- `clamp(15, 0, 10)` → `10` (above range, clamped to `hi`)\n\nOK_DONE","stop_reason":"end_turn","session_id":"d1227ee7-b90b-41ab-9526-97b3b4742e11","total_cost_usd":0.060155,"usage":{"input_tokens":11126,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":181,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"},"modelUsage":{"luce-dflash":{"inputTokens":11126,"outputTokens":181,"cacheReadInputTokens":0,"cacheCreationInputTokens":0,"webSearchRequests":0,"costUSD":0.060155,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"terminal_reason":"completed","fast_mode_state":"off","uuid":"c6311f54-3ef7-4448-91d6-1070fb5fe8bf"}
9 changes: 9 additions & 0 deletions dflash/bench/results/2026-05-22_mvp_day5/C_bandit/metrics.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
label=C_bandit
keep_ratio=0.10
session_id=claude_code_day5_s1
wall_s=16
ok_done=YES
accept_rate=31.9
mean_drafter_fwd_ms=1630 ms (n=1)
bandit_log:
[pflash-bandit] session=claude_code_day5_s1 turn=1 keep=0.1000->0.1100 (accept=0.319)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
client,turn,session_id,prompt,keep_before,accept_rate,keep_after,ema,wall_s
claude_code,1,adaptive-evidence-20260523,decode_check.txt,0.1,0.062,0.11,0.062,7.409
claude_code,2,adaptive-evidence-20260523,logic_check.txt,0.11,0.062,0.12,0.062,4.928
claude_code,3,adaptive-evidence-20260523,math_check.txt,0.12,0.062,0.13,0.062,4.888
claude_code,4,adaptive-evidence-20260523,code_gen.txt,0.13,0.062,0.14,0.062,5.185
claude_code,5,adaptive-evidence-20260523,explain_algo.txt,0.14,0.062,0.15,0.062,5.335
Loading