diff --git a/hindsight-clients/python/hindsight_client/hindsight_client.py b/hindsight-clients/python/hindsight_client/hindsight_client.py
index b4e4441fe..c88768a98 100644
--- a/hindsight-clients/python/hindsight_client/hindsight_client.py
+++ b/hindsight-clients/python/hindsight_client/hindsight_client.py
@@ -204,7 +204,7 @@ def retain_batch(
 
         request_obj = retain_request.RetainRequest(
             items=memory_items,
-            async_=retain_async,
+            var_async=retain_async,
             document_tags=document_tags,
         )
 
@@ -618,7 +618,7 @@ async def aretain_batch(
 
         request_obj = retain_request.RetainRequest(
             items=memory_items,
-            async_=retain_async,
+            var_async=retain_async,
             document_tags=document_tags,
         )
 
diff --git a/hindsight-clients/python/tests/test_retain_request_async.py b/hindsight-clients/python/tests/test_retain_request_async.py
new file mode 100644
index 000000000..afe53c38a
--- /dev/null
+++ b/hindsight-clients/python/tests/test_retain_request_async.py
@@ -0,0 +1,42 @@
+"""
+Test that RetainRequest correctly serializes the async field.
+
+Regression test for a bug where the client passed async_=True (invalid kwarg)
+instead of var_async=True, causing async mode to be silently ignored.
+"""
+
+from hindsight_client_api.models.memory_item import MemoryItem
+from hindsight_client_api.models.retain_request import RetainRequest
+
+
+def _make_item():
+    return MemoryItem(content="test content")
+
+
+def test_retain_request_async_true_serialized():
+    """var_async=True must appear as 'async': True in the serialized dict."""
+    req = RetainRequest(items=[_make_item()], var_async=True)
+    d = req.to_dict()
+    assert d["async"] is True
+
+
+def test_retain_request_async_false_serialized():
+    """var_async=False (default) must appear as 'async': False."""
+    req = RetainRequest(items=[_make_item()], var_async=False)
+    d = req.to_dict()
+    assert d["async"] is False
+
+
+def test_retain_request_default_is_sync():
+    """Omitting var_async should default to synchronous (async=False)."""
+    req = RetainRequest(items=[_make_item()])
+    d = req.to_dict()
+    assert d["async"] is False
+
+
+def test_retain_request_async_json_roundtrip():
+    """async=True must survive a JSON serialization roundtrip."""
+    req = RetainRequest(items=[_make_item()], var_async=True)
+    json_str = req.to_json()
+    restored = RetainRequest.from_json(json_str)
+    assert restored.var_async is True
diff --git a/hindsight-docs/docs/sdks/integrations/claude-code.md b/hindsight-docs/docs/sdks/integrations/claude-code.md
index 0cfb8a142..88b3c0c6e 100644
--- a/hindsight-docs/docs/sdks/integrations/claude-code.md
+++ b/hindsight-docs/docs/sdks/integrations/claude-code.md
@@ -81,7 +81,7 @@ export ANTHROPIC_API_KEY="your-key"
 export HINDSIGHT_LLM_PROVIDER=claude-code # No API key needed
 ```
 
-The model is selected automatically by the Hindsight API. To override, set `HINDSIGHT_API_LLM_MODEL`.
+The model is selected automatically by the Hindsight API. To override, set `HINDSIGHT_LLM_MODEL`.
 
 ### 3. Existing Local Server
 
@@ -89,61 +89,99 @@ If you already have `hindsight-embed` running, leave `hindsightApiUrl` empty and
 
 ## Configuration
 
-All settings are in `~/.hindsight/claude-code.json`. Every setting can also be overridden via environment variables.
+All settings live in `~/.hindsight/claude-code.json`. Every setting can also be overridden via environment variables. The plugin ships with sensible defaults — you only need to configure what you want to change.
+
+**Loading order** (later entries win):
+1. Built-in defaults (hardcoded in the plugin)
+2. Plugin `settings.json` (ships with the plugin, at `CLAUDE_PLUGIN_ROOT/settings.json`)
+3. User config (`~/.hindsight/claude-code.json` — recommended for your overrides)
+4. Environment variables
+
+---
 
 ### Connection & Daemon
 
-| Setting | Default | Env Var | Description |
+These settings control how the plugin connects to the Hindsight API.
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `hindsightApiUrl` | `""` | `HINDSIGHT_API_URL` | External Hindsight API URL. Empty = use local daemon. |
-| `hindsightApiToken` | `null` | `HINDSIGHT_API_TOKEN` | Auth token for external API |
-| `apiPort` | `9077` | `HINDSIGHT_API_PORT` | Port for local Hindsight daemon |
-| `daemonIdleTimeout` | `0` | `HINDSIGHT_DAEMON_IDLE_TIMEOUT` | Seconds before idle daemon shuts down (0 = never) |
-| `embedVersion` | `"latest"` | `HINDSIGHT_EMBED_VERSION` | `hindsight-embed` version for `uvx` |
+| `hindsightApiUrl` | `HINDSIGHT_API_URL` | `""` (empty) | URL of an external Hindsight API server. When empty, the plugin uses a local daemon instead. |
+| `hindsightApiToken` | `HINDSIGHT_API_TOKEN` | `null` | Authentication token for the external API. Only needed when `hindsightApiUrl` is set. |
+| `apiPort` | `HINDSIGHT_API_PORT` | `9077` | Port used by the local `hindsight-embed` daemon. Change this if you run multiple instances or have a port conflict. |
+| `daemonIdleTimeout` | `HINDSIGHT_DAEMON_IDLE_TIMEOUT` | `0` | Seconds of inactivity before the local daemon shuts itself down. `0` means the daemon stays running until the session ends. |
+| `embedVersion` | `HINDSIGHT_EMBED_VERSION` | `"latest"` | Which version of `hindsight-embed` to install via `uvx`. Pin to a specific version (e.g. `"0.5.2"`) for reproducibility. |
+| `embedPackagePath` | `HINDSIGHT_EMBED_PACKAGE_PATH` | `null` | Local filesystem path to a `hindsight-embed` checkout. When set, the plugin runs from this path instead of installing via `uvx`. Useful for development. |
 
-### LLM Provider (daemon mode only)
+---
 
-| Setting | Default | Env Var | Description |
+### LLM Provider (local daemon only)
+
+These settings configure which LLM the local daemon uses for fact extraction. They are **ignored** when connecting to an external API (the server uses its own LLM configuration).
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `llmProvider` | auto-detect | `HINDSIGHT_LLM_PROVIDER` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `ollama`, `openai-codex`, `claude-code` |
-| `llmModel` | provider default | `HINDSIGHT_LLM_MODEL` | Model override |
+| `llmProvider` | `HINDSIGHT_LLM_PROVIDER` | auto-detect | Which LLM provider to use. Supported values: `openai`, `anthropic`, `gemini`, `groq`, `ollama`, `openai-codex`, `claude-code`. When omitted, the plugin auto-detects by checking for API key env vars in order: `OPENAI_API_KEY` → `ANTHROPIC_API_KEY` → `GEMINI_API_KEY` → `GROQ_API_KEY`. |
+| `llmModel` | `HINDSIGHT_LLM_MODEL` | provider default | Override the default model for the chosen provider (e.g. `"gpt-4o"`, `"claude-sonnet-4-20250514"`). When omitted, the Hindsight API picks a sensible default for each provider. |
+| `llmApiKeyEnv` | — | provider standard | Name of the environment variable that holds the API key. Normally auto-detected (e.g. `OPENAI_API_KEY` for the `openai` provider). Set this only if your key is in a non-standard env var. |
 
-Auto-detection checks these env vars in order: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `GROQ_API_KEY`.
+---
 
 ### Memory Bank
 
-| Setting | Default | Env Var | Description |
+A **bank** is an isolated memory store — like a separate "brain." These settings control which bank the plugin reads from and writes to.
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `bankId` | `"claude_code"` | `HINDSIGHT_BANK_ID` | Static bank ID (when `dynamicBankId` is false) |
-| `bankMission` | generic assistant | `HINDSIGHT_BANK_MISSION` | Agent identity/purpose for the memory bank |
-| `retainMission` | extraction prompt | — | Custom retain mission (what to extract from conversations) |
-| `dynamicBankId` | `false` | `HINDSIGHT_DYNAMIC_BANK_ID` | Enable per-context memory banks |
-| `dynamicBankGranularity` | `["agent", "project"]` | — | Fields for dynamic bank ID: `agent`, `project`, `session`, `channel`, `user` |
-| `bankIdPrefix` | `""` | — | Prefix for all bank IDs (e.g. `"prod"`) |
+| `bankId` | `HINDSIGHT_BANK_ID` | `"claude_code"` | The bank ID to use when `dynamicBankId` is `false`. All sessions share this single bank. |
+| `bankMission` | `HINDSIGHT_BANK_MISSION` | generic assistant prompt | A short description of the agent's identity and purpose. Sent to Hindsight when creating or updating the bank, and used during recall to contextualize results. |
+| `retainMission` | — | extraction prompt | Instructions for the fact extraction LLM — tells it *what* to extract from conversations (e.g. "Extract technical decisions and user preferences"). |
+| `dynamicBankId` | `HINDSIGHT_DYNAMIC_BANK_ID` | `false` | When `true`, the plugin derives a unique bank ID from context fields (see `dynamicBankGranularity`), giving each combination its own isolated memory. |
+| `dynamicBankGranularity` | — | `["agent", "project"]` | Which context fields to combine when building a dynamic bank ID. Available fields: `agent` (agent name), `project` (working directory), `session` (session ID), `channel` (channel ID), `user` (user ID). |
+| `bankIdPrefix` | — | `""` | A string prepended to all bank IDs — both static and dynamic. Useful for namespacing (e.g. `"prod"` or `"staging"`). |
+| `agentName` | `HINDSIGHT_AGENT_NAME` | `"claude-code"` | Name used for the `agent` field in dynamic bank ID derivation. |
+
+---
 
 ### Auto-Recall
 
-| Setting | Default | Env Var | Description |
+Auto-recall runs on every user prompt. It queries Hindsight for relevant memories and injects them into Claude's context as invisible `additionalContext` (the user doesn't see them in the chat transcript).
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `autoRecall` | `true` | `HINDSIGHT_AUTO_RECALL` | Enable automatic memory recall |
-| `recallBudget` | `"mid"` | `HINDSIGHT_RECALL_BUDGET` | Recall effort: `low`, `mid`, `high` |
-| `recallMaxTokens` | `1024` | `HINDSIGHT_RECALL_MAX_TOKENS` | Max tokens in recall response |
-| `recallContextTurns` | `1` | `HINDSIGHT_RECALL_CONTEXT_TURNS` | Prior turns for query composition |
+| `autoRecall` | `HINDSIGHT_AUTO_RECALL` | `true` | Master switch for auto-recall. Set to `false` to disable memory retrieval entirely. |
+| `recallBudget` | `HINDSIGHT_RECALL_BUDGET` | `"mid"` | Controls how hard Hindsight searches for memories. `"low"` = fast, fewer strategies; `"mid"` = balanced; `"high"` = thorough, slower. Affects latency directly. |
+| `recallMaxTokens` | `HINDSIGHT_RECALL_MAX_TOKENS` | `1024` | Maximum number of tokens in the recalled memory block. Lower values reduce context usage but may truncate relevant memories. |
+| `recallTypes` | — | `["world", "experience"]` | Which memory types to retrieve. `"world"` = general facts; `"experience"` = personal experiences; `"observation"` = raw observations. |
+| `recallContextTurns` | `HINDSIGHT_RECALL_CONTEXT_TURNS` | `1` | How many prior conversation turns to include when composing the recall query. `1` = only the latest user message; higher values give more context but may dilute the query. |
+| `recallMaxQueryChars` | `HINDSIGHT_RECALL_MAX_QUERY_CHARS` | `800` | Maximum character length of the query sent to Hindsight. Longer queries are truncated. |
+| `recallRoles` | — | `["user", "assistant"]` | Which message roles to include when building the recall query from prior turns. |
+| `recallPromptPreamble` | — | built-in string | Text placed above the recalled memories in the injected context block. Customize this to change how Claude interprets the memories. |
+
+---
 
 ### Auto-Retain
 
-| Setting | Default | Env Var | Description |
+Auto-retain runs after Claude responds. It extracts the conversation transcript and sends it to Hindsight for long-term storage and fact extraction.
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `autoRetain` | `true` | `HINDSIGHT_AUTO_RETAIN` | Enable automatic retention |
-| `retainEveryNTurns` | `10` | — | Retain every Nth turn (sliding window) |
-| `retainOverlapTurns` | `2` | — | Extra overlap turns for continuity |
-| `retainRoles` | `["user", "assistant"]` | — | Which message roles to retain |
+| `autoRetain` | `HINDSIGHT_AUTO_RETAIN` | `true` | Master switch for auto-retain. Set to `false` to disable memory storage entirely. |
+| `retainMode` | `HINDSIGHT_RETAIN_MODE` | `"full-session"` | Retention strategy. `"full-session"` sends the full conversation transcript (with chunking). |
+| `retainEveryNTurns` | — | `10` | How often to retain. `1` = every turn; `10` = every 10th turn. Higher values reduce API calls but delay memory capture. Values > 1 enable **chunked retention** with a sliding window. |
+| `retainOverlapTurns` | — | `2` | When chunked retention fires, this many extra turns from the previous chunk are included for continuity. Total window size = `retainEveryNTurns + retainOverlapTurns`. |
+| `retainRoles` | — | `["user", "assistant"]` | Which message roles to include in the retained transcript. |
+| `retainToolCalls` | — | `true` | Whether to include tool calls (function invocations and results) in the retained transcript. Captures structured actions like file reads, searches, and code edits. |
+| `retainTags` | — | `["{session_id}"]` | Tags attached to the retained document. Supports `{session_id}` placeholder which is replaced with the current session ID at runtime. |
+| `retainMetadata` | — | `{}` | Arbitrary key-value metadata attached to the retained document. |
+| `retainContext` | — | `"claude-code"` | A label attached to retained memories identifying their source. Useful when multiple integrations write to the same bank. |
+
+---
 
-### Miscellaneous
+### Debug
 
-| Setting | Default | Env Var | Description |
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `debug` | `false` | `HINDSIGHT_DEBUG` | Enable debug logging to stderr |
+| `debug` | `HINDSIGHT_DEBUG` | `false` | Enable verbose logging to stderr. All log lines are prefixed with `[Hindsight]`. Useful for diagnosing connection issues, recall/retain behavior, and bank ID derivation. |
 
 ## Claude Code Channels
 
diff --git a/hindsight-integrations/claude-code/README.md b/hindsight-integrations/claude-code/README.md
index ab588cd6d..10a5efd3e 100644
--- a/hindsight-integrations/claude-code/README.md
+++ b/hindsight-integrations/claude-code/README.md
@@ -30,9 +30,6 @@ claude
 
 That's it! The plugin will automatically start capturing and recalling memories.
 
-> **Tip:** Once available in the official Claude Code plugin directory, installation will be a single command:
-> `claude plugin install hindsight-memory`
-
 ## Features
 
 - **Auto-recall** — on every user prompt, queries Hindsight for relevant memories and injects them as context (invisible to the chat transcript, visible to Claude)
@@ -123,70 +120,99 @@ If you already have `hindsight-embed` running, leave `hindsightApiUrl` empty and
 
 ## Configuration
 
-All settings are in `settings.json` at the plugin root. Every setting can also be overridden via environment variables.
+All settings live in `~/.hindsight/claude-code.json`. Every setting can also be overridden via environment variables. The plugin ships with sensible defaults — you only need to configure what you want to change.
+
+**Loading order** (later entries win):
+1. Built-in defaults (hardcoded in the plugin)
+2. Plugin `settings.json` (ships with the plugin, at `CLAUDE_PLUGIN_ROOT/settings.json`)
+3. User config (`~/.hindsight/claude-code.json` — recommended for your overrides)
+4. Environment variables
+
+---
 
 ### Connection & Daemon
 
-| Setting | Default | Env Var | Description |
+These settings control how the plugin connects to the Hindsight API.
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `hindsightApiUrl` | `""` | `HINDSIGHT_API_URL` | External Hindsight API URL. Empty = use local daemon. |
-| `hindsightApiToken` | `null` | `HINDSIGHT_API_TOKEN` | Auth token for external API |
-| `apiPort` | `9077` | `HINDSIGHT_API_PORT` | Port for local Hindsight daemon |
-| `daemonIdleTimeout` | `0` | `HINDSIGHT_DAEMON_IDLE_TIMEOUT` | Seconds before idle daemon shuts down (0 = never) |
-| `embedVersion` | `"latest"` | `HINDSIGHT_EMBED_VERSION` | `hindsight-embed` version for `uvx` |
-| `embedPackagePath` | `null` | `HINDSIGHT_EMBED_PACKAGE_PATH` | Local path to `hindsight-embed` for development |
+| `hindsightApiUrl` | `HINDSIGHT_API_URL` | `""` (empty) | URL of an external Hindsight API server. When empty, the plugin uses a local daemon instead. |
+| `hindsightApiToken` | `HINDSIGHT_API_TOKEN` | `null` | Authentication token for the external API. Only needed when `hindsightApiUrl` is set. |
+| `apiPort` | `HINDSIGHT_API_PORT` | `9077` | Port used by the local `hindsight-embed` daemon. Change this if you run multiple instances or have a port conflict. |
+| `daemonIdleTimeout` | `HINDSIGHT_DAEMON_IDLE_TIMEOUT` | `0` | Seconds of inactivity before the local daemon shuts itself down. `0` means the daemon stays running until the session ends. |
+| `embedVersion` | `HINDSIGHT_EMBED_VERSION` | `"latest"` | Which version of `hindsight-embed` to install via `uvx`. Pin to a specific version (e.g. `"0.5.2"`) for reproducibility. |
+| `embedPackagePath` | `HINDSIGHT_EMBED_PACKAGE_PATH` | `null` | Local filesystem path to a `hindsight-embed` checkout. When set, the plugin runs from this path instead of installing via `uvx`. Useful for development. |
+
+---
+
+### LLM Provider (local daemon only)
 
-### LLM Provider (daemon mode only)
+These settings configure which LLM the local daemon uses for fact extraction. They are **ignored** when connecting to an external API (the server uses its own LLM configuration).
 
-| Setting | Default | Env Var | Description |
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `llmProvider` | auto-detect | `HINDSIGHT_LLM_PROVIDER` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `ollama`, `openai-codex`, `claude-code` |
-| `llmModel` | provider default | `HINDSIGHT_LLM_MODEL` | Model override |
-| `llmApiKeyEnv` | provider standard | — | Custom env var name for API key |
+| `llmProvider` | `HINDSIGHT_LLM_PROVIDER` | auto-detect | Which LLM provider to use. Supported values: `openai`, `anthropic`, `gemini`, `groq`, `ollama`, `openai-codex`, `claude-code`. When omitted, the plugin auto-detects by checking for API key env vars in order: `OPENAI_API_KEY` → `ANTHROPIC_API_KEY` → `GEMINI_API_KEY` → `GROQ_API_KEY`. |
+| `llmModel` | `HINDSIGHT_LLM_MODEL` | provider default | Override the default model for the chosen provider (e.g. `"gpt-4o"`, `"claude-sonnet-4-20250514"`). When omitted, the Hindsight API picks a sensible default for each provider. |
+| `llmApiKeyEnv` | — | provider standard | Name of the environment variable that holds the API key. Normally auto-detected (e.g. `OPENAI_API_KEY` for the `openai` provider). Set this only if your key is in a non-standard env var. |
 
-Auto-detection checks these env vars in order: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `GROQ_API_KEY`.
+---
 
 ### Memory Bank
 
-| Setting | Default | Env Var | Description |
+A **bank** is an isolated memory store — like a separate "brain." These settings control which bank the plugin reads from and writes to.
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `bankId` | `"claude_code"` | `HINDSIGHT_BANK_ID` | Static bank ID (when `dynamicBankId` is false) |
-| `bankMission` | generic assistant | `HINDSIGHT_BANK_MISSION` | Agent identity/purpose for the memory bank |
-| `retainMission` | extraction prompt | — | Custom retain mission (what to extract from conversations) |
-| `dynamicBankId` | `false` | `HINDSIGHT_DYNAMIC_BANK_ID` | Enable per-context memory banks |
-| `dynamicBankGranularity` | `["agent", "project"]` | — | Fields for dynamic bank ID: `agent`, `project`, `session`, `channel`, `user` |
-| `bankIdPrefix` | `""` | — | Prefix for all bank IDs (e.g. `"prod"`) |
-| `agentName` | `""` | `HINDSIGHT_AGENT_NAME` | Agent name for dynamic bank ID derivation |
+| `bankId` | `HINDSIGHT_BANK_ID` | `"claude_code"` | The bank ID to use when `dynamicBankId` is `false`. All sessions share this single bank. |
+| `bankMission` | `HINDSIGHT_BANK_MISSION` | generic assistant prompt | A short description of the agent's identity and purpose. Sent to Hindsight when creating or updating the bank, and used during recall to contextualize results. |
+| `retainMission` | — | extraction prompt | Instructions for the fact extraction LLM — tells it *what* to extract from conversations (e.g. "Extract technical decisions and user preferences"). |
+| `dynamicBankId` | `HINDSIGHT_DYNAMIC_BANK_ID` | `false` | When `true`, the plugin derives a unique bank ID from context fields (see `dynamicBankGranularity`), giving each combination its own isolated memory. |
+| `dynamicBankGranularity` | — | `["agent", "project"]` | Which context fields to combine when building a dynamic bank ID. Available fields: `agent` (agent name), `project` (working directory), `session` (session ID), `channel` (channel ID), `user` (user ID). |
+| `bankIdPrefix` | — | `""` | A string prepended to all bank IDs — both static and dynamic. Useful for namespacing (e.g. `"prod"` or `"staging"`). |
+| `agentName` | `HINDSIGHT_AGENT_NAME` | `"claude-code"` | Name used for the `agent` field in dynamic bank ID derivation. |
+
+---
 
 ### Auto-Recall
 
-| Setting | Default | Env Var | Description |
+Auto-recall runs on every user prompt. It queries Hindsight for relevant memories and injects them into Claude's context as invisible `additionalContext` (the user doesn't see them in the chat transcript).
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `autoRecall` | `true` | `HINDSIGHT_AUTO_RECALL` | Enable automatic memory recall |
-| `recallBudget` | `"mid"` | `HINDSIGHT_RECALL_BUDGET` | Recall effort: `low`, `mid`, `high` |
-| `recallMaxTokens` | `1024` | `HINDSIGHT_RECALL_MAX_TOKENS` | Max tokens in recall response |
-| `recallTypes` | `["world", "experience"]` | — | Memory types: `world`, `experience`, `observation` |
-| `recallContextTurns` | `1` | `HINDSIGHT_RECALL_CONTEXT_TURNS` | Prior turns for query composition (1 = latest only) |
-| `recallMaxQueryChars` | `800` | `HINDSIGHT_RECALL_MAX_QUERY_CHARS` | Max query length |
-| `recallRoles` | `["user", "assistant"]` | — | Roles included in query context |
-| `recallTopK` | `null` | — | Hard cap on memories per turn |
-| `recallPromptPreamble` | built-in string | — | Text placed above recalled memories |
+| `autoRecall` | `HINDSIGHT_AUTO_RECALL` | `true` | Master switch for auto-recall. Set to `false` to disable memory retrieval entirely. |
+| `recallBudget` | `HINDSIGHT_RECALL_BUDGET` | `"mid"` | Controls how hard Hindsight searches for memories. `"low"` = fast, fewer strategies; `"mid"` = balanced; `"high"` = thorough, slower. Affects latency directly. |
+| `recallMaxTokens` | `HINDSIGHT_RECALL_MAX_TOKENS` | `1024` | Maximum number of tokens in the recalled memory block. Lower values reduce context usage but may truncate relevant memories. |
+| `recallTypes` | — | `["world", "experience"]` | Which memory types to retrieve. `"world"` = general facts; `"experience"` = personal experiences; `"observation"` = raw observations. |
+| `recallContextTurns` | `HINDSIGHT_RECALL_CONTEXT_TURNS` | `1` | How many prior conversation turns to include when composing the recall query. `1` = only the latest user message; higher values give more context but may dilute the query. |
+| `recallMaxQueryChars` | `HINDSIGHT_RECALL_MAX_QUERY_CHARS` | `800` | Maximum character length of the query sent to Hindsight. Longer queries are truncated. |
+| `recallRoles` | — | `["user", "assistant"]` | Which message roles to include when building the recall query from prior turns. |
+| `recallPromptPreamble` | — | built-in string | Text placed above the recalled memories in the injected context block. Customize this to change how Claude interprets the memories. |
+
+---
 
 ### Auto-Retain
 
-| Setting | Default | Env Var | Description |
+Auto-retain runs after Claude responds. It extracts the conversation transcript and sends it to Hindsight for long-term storage and fact extraction.
+
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `autoRetain` | `true` | `HINDSIGHT_AUTO_RETAIN` | Enable automatic retention |
-| `retainRoles` | `["user", "assistant"]` | — | Which roles to retain |
-| `retainEveryNTurns` | `10` | — | Retain every Nth turn. Values >1 enable chunked retention with a sliding window. |
-| `retainOverlapTurns` | `2` | — | Extra overlap turns included when chunked retention fires. Window = `retainEveryNTurns + retainOverlapTurns` (default: 12 turns). |
-| `retainContext` | `"claude-code"` | — | Context label for retained memories |
+| `autoRetain` | `HINDSIGHT_AUTO_RETAIN` | `true` | Master switch for auto-retain. Set to `false` to disable memory storage entirely. |
+| `retainMode` | `HINDSIGHT_RETAIN_MODE` | `"full-session"` | Retention strategy. `"full-session"` sends the full conversation transcript (with chunking). |
+| `retainEveryNTurns` | — | `10` | How often to retain. `1` = every turn; `10` = every 10th turn. Higher values reduce API calls but delay memory capture. Values > 1 enable **chunked retention** with a sliding window. |
+| `retainOverlapTurns` | — | `2` | When chunked retention fires, this many extra turns from the previous chunk are included for continuity. Total window size = `retainEveryNTurns + retainOverlapTurns`. |
+| `retainRoles` | — | `["user", "assistant"]` | Which message roles to include in the retained transcript. |
+| `retainToolCalls` | — | `true` | Whether to include tool calls (function invocations and results) in the retained transcript. Captures structured actions like file reads, searches, and code edits. |
+| `retainTags` | — | `["{session_id}"]` | Tags attached to the retained document. Supports `{session_id}` placeholder which is replaced with the current session ID at runtime. |
+| `retainMetadata` | — | `{}` | Arbitrary key-value metadata attached to the retained document. |
+| `retainContext` | — | `"claude-code"` | A label attached to retained memories identifying their source. Useful when multiple integrations write to the same bank. |
+
+---
 
-### Miscellaneous
+### Debug
 
-| Setting | Default | Env Var | Description |
+| Setting | Env Var | Default | Description |
 |---------|---------|---------|-------------|
-| `debug` | `false` | `HINDSIGHT_DEBUG` | Enable debug logging to stderr |
+| `debug` | `HINDSIGHT_DEBUG` | `false` | Enable verbose logging to stderr. All log lines are prefixed with `[Hindsight]`. Useful for diagnosing connection issues, recall/retain behavior, and bank ID derivation. |
 
 ## Claude Code Channels
 
@@ -214,7 +240,7 @@ And enable dynamic bank IDs:
 ### Plugin not activating
 
 - Verify installation: check that `.claude-plugin/plugin.json` exists in the installed plugin directory
-- Check Claude Code logs for `[Hindsight]` messages (enable `"debug": true` in settings.json)
+- Check Claude Code logs for `[Hindsight]` messages (enable `"debug": true` in `~/.hindsight/claude-code.json`)
 
 ### Recall returning no memories
 
diff --git a/hindsight-integrations/claude-code/scripts/lib/config.py b/hindsight-integrations/claude-code/scripts/lib/config.py
index f1dba531a..73eb0eeb6 100644
--- a/hindsight-integrations/claude-code/scripts/lib/config.py
+++ b/hindsight-integrations/claude-code/scripts/lib/config.py
@@ -22,7 +22,6 @@
         "conflicting). Only use memories that are directly useful to continue "
         "this conversation; ignore the rest:"
     ),
-    "recallTopK": None,
     # Retain
     "autoRetain": True,
     "retainMode": "full-session",
diff --git a/hindsight-integrations/claude-code/scripts/recall.py b/hindsight-integrations/claude-code/scripts/recall.py
index 415e172b3..efdec453c 100755
--- a/hindsight-integrations/claude-code/scripts/recall.py
+++ b/hindsight-integrations/claude-code/scripts/recall.py
@@ -161,11 +161,6 @@ def _dbg(*a):
         debug_log(config, "No memories found")
         return
 
-    # Apply topK limit
-    top_k = config.get("recallTopK")
-    if top_k and isinstance(top_k, int):
-        results = results[:top_k]
-
     debug_log(config, f"Injecting {len(results)} memories")
 
     # Format context message — exact match of Openclaw's format
diff --git a/hindsight-integrations/claude-code/settings.json b/hindsight-integrations/claude-code/settings.json
index 8cf6210df..dcd41d72a 100644
--- a/hindsight-integrations/claude-code/settings.json
+++ b/hindsight-integrations/claude-code/settings.json
@@ -13,7 +13,6 @@
   "recallMaxQueryChars": 800,
   "recallRoles": ["user", "assistant"],
   "recallPromptPreamble": "Relevant memories from past conversations (prioritize recent when conflicting). Only use memories that are directly useful to continue this conversation; ignore the rest:",
-  "recallTopK": null,
   "retainRoles": ["user", "assistant"],
   "retainEveryNTurns": 10,
   "retainOverlapTurns": 2,