diff --git a/.doctor.exs b/.doctor.exs new file mode 100644 index 0000000..c023292 --- /dev/null +++ b/.doctor.exs @@ -0,0 +1,19 @@ +%Doctor.Config{ + ignore_modules: [ + # These modules use macros that generate code inside quote blocks. + # Doctor incorrectly counts def statements inside quote as module functions. + Jido.Agent, + Jido.Skill + ], + ignore_paths: [], + min_module_doc_coverage: 100, + min_module_spec_coverage: 100, + min_overall_doc_coverage: 100, + min_overall_moduledoc_coverage: 100, + min_overall_spec_coverage: 100, + exception_moduledoc_required: true, + raise: false, + reporter: Doctor.Reporters.Full, + struct_type_spec_required: true, + umbrella: false +} diff --git a/README.md b/README.md index a791e98..0dee0e8 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,6 @@ Jido is the core package of the Jido ecosystem. The ecosystem is built around th | [jido_signal](https://github.com/agentjido/jido_signal) | CloudEvents-based message envelope and supporting utilities for routing and pub/sub messaging | | [jido](https://github.com/agentjido/jido) | Core agent framework with state management, directives, and runtime | | [jido_ai](https://github.com/agentjido/jido_ai) | AI/LLM integration for agents | -| [jido_coder](https://github.com/agentjido/jido_coder) | AI coding agent with file operations, git integration, and test execution | For demos and examples of what you can build with the Jido Ecosystem, see [https://agentjido.xyz](https://agentjido.xyz). @@ -273,11 +272,13 @@ State operations are internal state transitions handled by the strategy layer du - [Signals & Routing](guides/signals.md) - Signal-based communication - [Agent Directives](guides/directives.md) - Effect descriptions for the runtime - [Runtime and AgentServer](guides/runtime.md) - Process-based agent execution +- [Persistence & Storage](guides/storage.md) - Hibernate, thaw, and InstanceManager lifecycle - [Skills](guides/skills.md) - Composable capability bundles - [Strategies](guides/strategies.md) - Execution strategies (Direct, FSM) **Advanced:** - [FSM Strategy Deep Dive](guides/fsm-strategy.livemd) - State machine workflows +- [Worker Pools](guides/worker-pools.md) - Pre-warmed agent pools for throughput - [Testing Agents](guides/testing.md) - Testing patterns and best practices **API Reference:** [hexdocs.pm/jido](https://hexdocs.pm/jido) diff --git a/guides/configuration.md b/guides/configuration.md index 3f8a106..4c26588 100644 --- a/guides/configuration.md +++ b/guides/configuration.md @@ -136,19 +136,21 @@ config :my_app, MyApp.Jido, ```elixir # Simple call - handles checkout/checkin automatically -{:ok, result} = Jido.AgentPool.call(MyApp.Jido, :fast_search, signal) +{:ok, result} = Jido.Agent.WorkerPool.call(MyApp.Jido, :fast_search, signal) # Transaction-style for multiple operations -Jido.AgentPool.with_agent(MyApp.Jido, :fast_search, fn pid -> +Jido.Agent.WorkerPool.with_agent(MyApp.Jido, :fast_search, fn pid -> Jido.AgentServer.call(pid, signal1) Jido.AgentServer.call(pid, signal2) end) # Check pool status -status = Jido.AgentPool.status(MyApp.Jido, :fast_search) +status = Jido.Agent.WorkerPool.status(MyApp.Jido, :fast_search) # => %{state: :ready, available: 5, overflow: 0, checked_out: 3} ``` +See [Worker Pools](worker-pools.md) for detailed pool configuration and usage patterns. + ### Pool State Semantics Pooled agents are **long-lived stateful workers**. State persists across checkouts unless the agent crashes. Design your agent to accept request-specific data via signals rather than storing it in agent state if you need per-request isolation. @@ -164,7 +166,7 @@ Configure timeouts based on your workload: Jido.AgentServer.call(pid, signal, 10_000) # Pool checkout timeout -Jido.AgentPool.call(MyApp.Jido, :pool, signal, timeout: 10_000) +Jido.Agent.WorkerPool.call(MyApp.Jido, :pool, signal, timeout: 10_000) ``` ### Graceful Shutdown @@ -304,6 +306,8 @@ See [Testing](testing.md) for more patterns. ## Related -- [Runtime](runtime.md) - AgentServer and process-based execution -- [Testing](testing.md) - Testing patterns and best practices -- [Strategies](strategies.md) - Execution strategies configuration +- [Persistence & Storage](storage.md) — Hibernate/thaw and InstanceManager lifecycle +- [Worker Pools](worker-pools.md) — Pre-warmed agent pools for throughput +- [Runtime](runtime.md) — AgentServer and process-based execution +- [Testing](testing.md) — Testing patterns and best practices +- [Strategies](strategies.md) — Execution strategies configuration diff --git a/guides/persistence.md b/guides/persistence.md deleted file mode 100644 index 30124c6..0000000 --- a/guides/persistence.md +++ /dev/null @@ -1,347 +0,0 @@ -# Persistence - -**After:** Your agents can survive restarts (or you explicitly decide they shouldn't). - -```elixir -# Configure persistence with InstanceManager -Jido.Agent.InstanceManager.child_spec( - name: :sessions, - agent: MyApp.SessionAgent, - idle_timeout: :timer.minutes(15), - persistence: [ - store: {Jido.Agent.Store.File, path: "priv/agent_state"} - ] -) - -# Agents hibernate on idle, thaw on demand -{:ok, pid} = Jido.Agent.InstanceManager.get(:sessions, "user-123") -# If agent was hibernated, state is restored automatically -``` - -This guide covers agent state persistence: when to use it, how to configure it, and how to build custom stores. - -## The Store Behaviour - -`Jido.Agent.Store` defines three callbacks for persisting agent state: - -```elixir -@callback get(key(), opts()) :: {:ok, dump()} | :not_found | {:error, term()} -@callback put(key(), dump(), opts()) :: :ok | {:error, term()} -@callback delete(key(), opts()) :: :ok | {:error, term()} -``` - -Keys are typically `{agent_module, agent_id}` tuples. The `dump` is the serialized agent state (by default, the entire agent struct). - -## Built-in Stores - -### ETS Store — Fast, Ephemeral - -In-memory storage using ETS. Data is lost when the BEAM stops. - -```elixir -persistence: [ - store: {Jido.Agent.Store.ETS, table: :agent_cache} -] -``` - -**Use for:** Development, testing, and production scenarios where losing state on restart is acceptable. - -**Characteristics:** -- Concurrent reads via `read_concurrency: true` -- Table created automatically if missing -- No serialization overhead (terms stored directly) - -### File Store — Durable, Simple - -File-based storage with atomic writes. Survives restarts. - -```elixir -persistence: [ - store: {Jido.Agent.Store.File, path: "priv/agent_state"} -] -``` - -**Use for:** Production deployments, development with state preservation. - -**Characteristics:** -- One file per agent (hashed filename) -- Atomic writes via temp file + rename -- Erlang term format (`:erlang.term_to_binary/1`) -- Directory created automatically - -## InstanceManager Integration - -The `Jido.Agent.InstanceManager` handles persistence automatically: - -```elixir -# In your supervision tree -children = [ - Jido.Agent.InstanceManager.child_spec( - name: :sessions, - agent: MyApp.SessionAgent, - idle_timeout: :timer.minutes(15), - persistence: [ - store: {Jido.Agent.Store.File, path: "priv/sessions"} - ] - ) -] -``` - -### Lifecycle - -1. **Get/Start**: `InstanceManager.get/3` looks up by key in Registry -2. **Thaw**: If not running but persistence exists, state is restored -3. **Fresh**: If no persisted state, starts a fresh agent -4. **Attach**: Callers track interest via `AgentServer.attach/1` -5. **Idle**: When all attachments detach, idle timer starts -6. **Hibernate**: On timeout, agent state is persisted then process stops - -```elixir -# Get or start an agent (thaws if hibernated) -{:ok, pid} = Jido.Agent.InstanceManager.get(:sessions, "user-123") - -# Track this caller's interest -:ok = Jido.AgentServer.attach(pid) - -# When done, detach (starts idle timer if no other attachments) -:ok = Jido.AgentServer.detach(pid) -``` - -## What Gets Persisted - -By default, the entire agent struct is persisted: - -- `agent.id` -- `agent.state` (your application state) -- `agent.__struct__` (agent module) - -### Custom Serialization - -Implement `dump/2` and `load/2` callbacks in your agent for custom serialization: - -```elixir -defmodule MyApp.SessionAgent do - use Jido.Agent, - name: "session_agent", - schema: [ - user_id: [type: :string, required: true], - cart: [type: {:list, :map}, default: []] - ] - - @impl true - def dump(agent, _context) do - # Persist only essential state - {:ok, %{ - id: agent.id, - user_id: agent.state.user_id, - cart: agent.state.cart, - version: 1 - }} - end - - @impl true - def load(data, _context) do - # Reconstruct from persisted data - {:ok, agent} = new(id: data.id) - {:ok, set(agent, %{user_id: data.user_id, cart: data.cart})} - end -end -``` - -## Schema Evolution - -When your agent schema changes, handle version migrations in `load/2`: - -```elixir -def load(%{version: 1} = data, context) do - # Migrate v1 to current schema - migrated = %{ - id: data.id, - user_id: data.user_id, - cart: data.cart, - preferences: %{} # New field with default - } - load(%{migrated | version: 2}, context) -end - -def load(%{version: 2} = data, _context) do - {:ok, agent} = new(id: data.id) - {:ok, set(agent, Map.drop(data, [:version, :id]))} -end -``` - -## Direct Persistence API - -Use `Jido.Agent.Persistence` for direct control outside InstanceManager: - -```elixir -config = [store: {Jido.Agent.Store.File, path: "priv/agents"}] - -# Hibernate an agent -:ok = Jido.Agent.Persistence.hibernate(config, MyAgent, "agent-123", agent) - -# Thaw an agent -case Jido.Agent.Persistence.thaw(config, MyAgent, "agent-123") do - {:ok, agent} -> agent - :not_found -> MyAgent.new!(id: "agent-123") - {:error, reason} -> raise "Failed to thaw: #{inspect(reason)}" -end -``` - -### Custom Key Function - -Override the default key generation: - -```elixir -config = [ - store: {Jido.Agent.Store.File, path: "priv/agents"}, - key_fun: fn module, id -> "#{module}:#{id}" end -] -``` - -## Example: Persist Workflow Results - -A workflow agent that persists progress and resumes after restart: - -```elixir -defmodule MyApp.WorkflowAgent do - use Jido.Agent, - name: "workflow_agent", - schema: [ - workflow_id: [type: :string, required: true], - steps_completed: [type: {:list, :atom}, default: []], - current_step: [type: :atom, default: :init], - results: [type: :map, default: %{}] - ] - - @impl true - def dump(agent, _context) do - {:ok, %{ - id: agent.id, - workflow_id: agent.state.workflow_id, - steps_completed: agent.state.steps_completed, - current_step: agent.state.current_step, - results: agent.state.results, - version: 1 - }} - end - - @impl true - def load(data, _context) do - {:ok, agent} = new(id: data.id) - {:ok, set(agent, %{ - workflow_id: data.workflow_id, - steps_completed: data.steps_completed, - current_step: data.current_step, - results: data.results - })} - end -end -``` - -Usage with InstanceManager: - -```elixir -# Start workflow (or resume if hibernated) -{:ok, pid} = Jido.Agent.InstanceManager.get(:workflows, "order-456", - initial_state: %{workflow_id: "order-456"} -) - -# Process steps - state persists on idle -Jido.AgentServer.call(pid, Signal.new!("workflow.step.complete", %{step: :validate})) - -# After restart, agent resumes from last known state -{:ok, pid} = Jido.Agent.InstanceManager.get(:workflows, "order-456") -{:ok, state} = Jido.AgentServer.state(pid) -# state.agent.state.steps_completed => [:validate] -``` - -## Custom Store: Redis Example - -Implement `Jido.Agent.Store` for your infrastructure: - -```elixir -defmodule MyApp.RedisStore do - @behaviour Jido.Agent.Store - - @impl true - def get(key, opts) do - pool = Keyword.get(opts, :pool, :redix) - redis_key = serialize_key(key) - - case Redix.command(pool, ["GET", redis_key]) do - {:ok, nil} -> :not_found - {:ok, data} -> {:ok, :erlang.binary_to_term(data, [:safe])} - {:error, reason} -> {:error, reason} - end - end - - @impl true - def put(key, dump, opts) do - pool = Keyword.get(opts, :pool, :redix) - ttl = Keyword.get(opts, :ttl, 3600) - redis_key = serialize_key(key) - data = :erlang.term_to_binary(dump) - - case Redix.command(pool, ["SETEX", redis_key, ttl, data]) do - {:ok, "OK"} -> :ok - {:error, reason} -> {:error, reason} - end - end - - @impl true - def delete(key, opts) do - pool = Keyword.get(opts, :pool, :redix) - redis_key = serialize_key(key) - - case Redix.command(pool, ["DEL", redis_key]) do - {:ok, _} -> :ok - {:error, reason} -> {:error, reason} - end - end - - defp serialize_key({module, id}) do - "jido:agent:#{module}:#{id}" - end -end -``` - -Use it: - -```elixir -Jido.Agent.InstanceManager.child_spec( - name: :sessions, - agent: MyApp.SessionAgent, - persistence: [ - store: {MyApp.RedisStore, pool: :redix, ttl: 86_400} - ] -) -``` - -## When NOT to Persist - -**Ephemeral workers** don't need persistence: - -```elixir -# Fire-and-forget task agents -Jido.Agent.InstanceManager.child_spec( - name: :tasks, - agent: MyApp.TaskAgent, - idle_timeout: :timer.seconds(30) - # No persistence: option - agent dies on idle, no restore -) -``` - -Skip persistence when: - -- **Agents are stateless** — they fetch state from external sources on start -- **State is cheap to rebuild** — re-running init is faster than I/O -- **Short-lived workers** — task duration < hibernate overhead -- **Sensitive data** — secrets shouldn't hit disk/cache -- **High-churn agents** — frequent start/stop makes persistence overhead costly - -## Related - -- [Runtime](runtime.md) — AgentServer and process-based execution -- [Configuration](configuration.md) — Jido instance configuration -- [Testing](testing.md) — Testing patterns (ETS store for tests) diff --git a/guides/phoenix-integration.md b/guides/phoenix-integration.md index 9366065..87d96ef 100644 --- a/guides/phoenix-integration.md +++ b/guides/phoenix-integration.md @@ -438,5 +438,7 @@ Visit `/counter/my-counter` in multiple browser tabs. Changes sync in real-time. ## Next Steps - [Signals](signals.md) — Signal routing and creation +- [Persistence & Storage](storage.md) — Hibernate/thaw and InstanceManager for session agents +- [Worker Pools](worker-pools.md) — Pre-warmed agent pools for high-throughput APIs - [Runtime](runtime.md) — AgentServer lifecycle and parent-child hierarchies - [Await & Coordination](await.md) — Wait for agent completion diff --git a/guides/runtime.md b/guides/runtime.md index ef92e95..29a99e5 100644 --- a/guides/runtime.md +++ b/guides/runtime.md @@ -130,3 +130,9 @@ Jido.cancel(pid) # Cancel a running agent ``` For detailed await patterns, fan-out coordination, and testing without `Process.sleep`, see the [Await & Coordination](await.md) guide. + +## Related + +- [Persistence & Storage](storage.md) — Hibernate/thaw and InstanceManager lifecycle +- [Worker Pools](worker-pools.md) — Pre-warmed agent pools for throughput +- [Await & Coordination](await.md) — Waiting on agent completion diff --git a/guides/storage.md b/guides/storage.md new file mode 100644 index 0000000..2823cc4 --- /dev/null +++ b/guides/storage.md @@ -0,0 +1,922 @@ +# Persistence & Storage + +**After:** Your agents can survive restarts, hibernate on idle, and preserve conversation history. + +```elixir +defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: {Jido.Storage.File, path: "priv/jido/storage"} +end + +# Manual: Hibernate an agent (flushes thread, writes checkpoint) +:ok = MyApp.Jido.hibernate(agent) + +# Manual: Thaw an agent (loads checkpoint, rehydrates thread) +{:ok, agent} = MyApp.Jido.thaw(MyAgent, "user-123") + +# Automatic: InstanceManager hibernates on idle, thaws on demand +{:ok, pid} = Jido.Agent.InstanceManager.get(:sessions, "user-123") +``` + +This guide covers Jido's unified persistence system: checkpoints, thread journals, manual and automatic lifecycle management. + +## Choosing Your Persistence Model + +| Approach | When to Use | API | +|----------|-------------|-----| +| **Manual** | Explicit control over when to persist | `MyApp.Jido.hibernate/1`, `thaw/2` | +| **Automatic** | Idle-based lifecycle for per-user/entity agents | `InstanceManager.get/3` with `idle_timeout` | +| **None** | Stateless agents, cheap rebuilds, short-lived tasks | Skip storage config | + +Both manual and automatic approaches use the same underlying `Jido.Storage` behaviour. + +## Overview + +Jido Storage provides a simple, composable persistence model built on two core concepts: + +| Concept | Metaphor | Description | +|---------|----------|-------------| +| **Thread** | Journal | Append-only event log, source of truth for what happened | +| **Checkpoint** | Snapshot | Serialized agent state for fast resume | + +The relationship: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Source of Truth │ +│ ┌───────────────────────────────────────────────────────────┐ │ +│ │ Thread (Journal) │ │ +│ │ - Append-only entries with monotonic seq │ │ +│ │ - What happened, in order │ │ +│ │ - Replayable, auditable │ │ +│ └───────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ projection │ +│ ┌───────────────────────────────────────────────────────────┐ │ +│ │ Agent State (In-Memory) │ │ +│ │ - Current computed state │ │ +│ │ - Includes state[:__thread__] reference │ │ +│ └───────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ checkpoint │ +│ ┌───────────────────────────────────────────────────────────┐ │ +│ │ Checkpoint (Snapshot Store) │ │ +│ │ - Serialized agent state (without full thread) │ │ +│ │ - Thread pointer: {thread_id, thread_rev} │ │ +│ │ - For fast resume │ │ +│ └───────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Key Invariant + +**Never persist the full Thread inside the Agent checkpoint.** Store a pointer instead: + +```elixir +%{ + thread_id: "thread_abc123", + thread_rev: 42 +} +``` + +This prevents: + +- Data duplication between checkpoint and journal +- Consistency drift when checkpoint and journal get out of sync +- Memory bloat in serialized checkpoints + +### Terminology + +| Operation | Description | +|-----------|-------------| +| **hibernate** | Flush journal, write checkpoint, persist agent for later | +| **thaw** | Load checkpoint, rehydrate thread, resume agent | +| **checkpoint** | Agent callback to serialize state | +| **restore** | Agent callback to deserialize state | + +## Quick Start + +### Default (ETS, Ephemeral) + +With no configuration, Jido uses ETS storage (fast, in-memory, lost on restart): + +```elixir +defmodule MyApp.Jido do + use Jido, otp_app: :my_app + # Uses Jido.Storage.ETS by default +end + +# Create an agent with a thread +{:ok, agent} = MyAgent.new(id: "user-123") +thread = Jido.Thread.new() +agent = put_in(agent.state[:__thread__], thread) + +# Do some work, add entries to the thread... +thread = Jido.Thread.append(thread, :message, %{content: "Hello!"}) +agent = put_in(agent.state[:__thread__], thread) + +# Hibernate - agent can now be garbage collected +:ok = MyApp.Jido.hibernate(agent) + +# Later... thaw the agent +{:ok, restored_agent} = MyApp.Jido.thaw(MyAgent, "user-123") +# restored_agent.state[:__thread__] is rehydrated with entries +``` + +### File-Based (Simple Production) + +For persistence across restarts: + +```elixir +defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: {Jido.Storage.File, path: "priv/jido/storage"} +end + +# Same API +:ok = MyApp.Jido.hibernate(agent) +{:ok, agent} = MyApp.Jido.thaw(MyAgent, "user-123") +``` + +## Configuration + +Storage is configured per Jido instance via `use Jido`: + +```elixir +defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: {Jido.Storage.ETS, table: :my_storage} +end +``` + +Or just the module (options default to `[]`): + +```elixir +storage: Jido.Storage.ETS +``` + +### Built-in Adapters + +| Adapter | Durability | Use Case | +|---------|------------|----------| +| `Jido.Storage.ETS` | Ephemeral | Development, testing | +| `Jido.Storage.File` | Disk | Simple production | + +### ETS Storage Options + +```elixir +storage: {Jido.Storage.ETS, table: :my_jido_storage} +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `:table` | `:jido_storage` | Base table name. Creates three ETS tables: `{table}_checkpoints`, `{table}_threads`, `{table}_thread_meta` | + +### File Storage Options + +```elixir +storage: {Jido.Storage.File, path: "priv/jido/storage"} +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `:path` | (required) | Base directory path. Created automatically if it doesn't exist. | + +Directory layout: + +``` +priv/jido/storage/ +├── checkpoints/ +│ └── {key_hash}.term # Serialized checkpoint +└── threads/ + └── {thread_id}/ + ├── meta.term # {rev, created_at, updated_at, metadata} + └── entries.log # Length-prefixed binary frames +``` + +## API Reference + +### High-Level API (Jido Instance) + +When you `use Jido`, you get `hibernate/1` and `thaw/2` functions: + +```elixir +defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: {Jido.Storage.ETS, []} +end + +# Hibernate an agent +:ok = MyApp.Jido.hibernate(agent) + +# Thaw an agent by module and ID +{:ok, agent} = MyApp.Jido.thaw(MyAgent, "user-123") +``` + +#### `hibernate/1` + +Persists an agent to storage: + +1. Extracts thread from `agent.state[:__thread__]` +2. Flushes thread entries to journal storage +3. Calls `agent_module.checkpoint/2` to serialize state +4. Stores checkpoint (with thread pointer, not full thread) + +**Returns:** + +- `:ok` — Successfully hibernated +- `{:error, reason}` — Failed to hibernate + +#### `thaw/2` + +Restores an agent from storage: + +1. Loads checkpoint by `{agent_module, key}` +2. Calls `agent_module.restore/2` to deserialize +3. If checkpoint has thread pointer, loads and attaches thread +4. Verifies thread revision matches checkpoint pointer + +**Returns:** + +- `{:ok, agent}` — Successfully restored +- `:not_found` — No checkpoint exists for this key +- `{:error, :missing_thread}` — Checkpoint references a thread that doesn't exist +- `{:error, :thread_mismatch}` — Loaded thread.rev doesn't match checkpoint pointer + +### Direct API (Jido.Persist) + +For direct control without a Jido instance: + +```elixir +storage = {Jido.Storage.ETS, table: :my_storage} + +# Hibernate +:ok = Jido.Persist.hibernate(storage, agent) + +# Thaw +{:ok, agent} = Jido.Persist.thaw(storage, MyAgent, "user-123") +``` + +Or pass a struct with a `:storage` field: + +```elixir +jido_instance = %{storage: {Jido.Storage.ETS, []}} +:ok = Jido.Persist.hibernate(jido_instance, agent) +``` + +## How It Works + +### Hibernate Flow + +``` +Agent (in memory) + │ + ▼ +┌──────────────────────────────────────────────────┐ +│ 1. Extract thread from agent.state[:__thread__] │ +│ 2. Flush thread to Journal Store │ +│ 3. Call agent_module.checkpoint/2 │ +│ - Excludes full thread, includes pointer │ +│ 4. Write checkpoint to Snapshot Store │ +└──────────────────────────────────────────────────┘ + │ + ▼ + Persisted +``` + +The key insight: journal is flushed **before** checkpoint is written. This ensures the thread entries exist before any checkpoint references them. + +### Thaw Flow + +``` + Persisted + │ + ▼ +┌──────────────────────────────────────────────────┐ +│ 1. Load checkpoint from Snapshot Store │ +│ 2. Call agent_module.restore/2 │ +│ 3. If checkpoint has thread pointer: │ +│ - Load thread from Journal Store │ +│ - Verify rev matches checkpoint pointer │ +│ - Attach to agent.state[:__thread__] │ +│ 4. Return hydrated agent │ +└──────────────────────────────────────────────────┘ + │ + ▼ +Agent (in memory) +``` + +### Thread Pointer Concept + +The checkpoint stores a **pointer** to the thread, not the thread itself: + +```elixir +# Checkpoint structure +%{ + version: 1, + agent_module: MyAgent, + id: "user-123", + state: %{name: "Alice", status: :active}, # No __thread__ key! + thread: %{id: "thread_abc123", rev: 42} # Just a pointer +} +``` + +On thaw, the thread is loaded separately from the journal store and verified: + +```elixir +# If checkpoint says thread.rev = 42, but stored thread has rev = 41 +# → {:error, :thread_mismatch} +``` + +This catches consistency issues between checkpoint and journal. + +## Agent Callbacks + +Agents can customize serialization via two optional callbacks: + +### `checkpoint/2` + +Called during hibernate to serialize the agent: + +```elixir +defmodule MyAgent do + use Jido.Agent, + name: "my_agent", + schema: [ + user_id: [type: :string, required: true], + session_data: [type: :map, default: %{}], + temp_cache: [type: :map, default: %{}] # Don't persist this + ] + + @impl true + def checkpoint(agent, _ctx) do + thread = agent.state[:__thread__] + + {:ok, %{ + version: 1, + agent_module: __MODULE__, + id: agent.id, + # Exclude temp_cache and __thread__ + state: agent.state |> Map.drop([:__thread__, :temp_cache]), + thread: thread && %{id: thread.id, rev: thread.rev} + }} + end +end +``` + +**Parameters:** + +- `agent` — The agent struct to serialize +- `ctx` — Context map (currently empty, reserved for future use) + +**Returns:** + +- `{:ok, checkpoint_data}` — Map with version, agent_module, id, state, and thread pointer + +### `restore/2` + +Called during thaw to deserialize the agent: + +```elixir +@impl true +def restore(data, _ctx) do + case new(id: data[:id] || data["id"]) do + {:ok, agent} -> + state = data[:state] || data["state"] || %{} + # Restore defaults for non-persisted fields + restored_state = Map.merge(state, %{temp_cache: %{}}) + {:ok, %{agent | state: Map.merge(agent.state, restored_state)}} + + error -> + error + end +end +``` + +**Parameters:** + +- `data` — The checkpoint data from storage +- `ctx` — Context map (currently empty) + +**Returns:** + +- `{:ok, agent}` — The restored agent struct + +### Default Behavior + +If you don't implement these callbacks, the default implementations: + +1. `checkpoint/2` — Serializes the full agent state (minus `__thread__`) with a thread pointer +2. `restore/2` — Creates a new agent via `new/1` and merges the stored state + +```elixir +# Default checkpoint +def checkpoint(agent, _ctx) do + thread = agent.state[:__thread__] + + {:ok, %{ + version: 1, + agent_module: __MODULE__, + id: agent.id, + state: Map.delete(agent.state, :__thread__), + thread: thread && %{id: thread.id, rev: thread.rev} + }} +end + +# Default restore +def restore(data, _ctx) do + case new(id: data[:id] || data["id"]) do + {:ok, agent} -> + state = data[:state] || data["state"] || %{} + {:ok, %{agent | state: Map.merge(agent.state, state)}} + error -> + error + end +end +``` + +### Schema Evolution + +Handle version migrations in `restore/2`: + +```elixir +@impl true +def restore(%{version: 1} = data, ctx) do + # Migrate v1 → v2: add new preferences field + migrated = %{data | version: 2} + migrated = put_in(migrated[:state][:preferences], %{theme: :light}) + restore(migrated, ctx) +end + +@impl true +def restore(%{version: 2} = data, _ctx) do + {:ok, agent} = new(id: data.id) + {:ok, %{agent | state: Map.merge(agent.state, data.state)}} +end +``` + +## Building Custom Storage Adapters + +Implement the `Jido.Storage` behaviour for your backend: + +```elixir +defmodule MyApp.Storage do + @behaviour Jido.Storage + + # Checkpoint operations (key-value, overwrite semantics) + + @impl true + def get_checkpoint(key, opts) do + # Return {:ok, data} | :not_found | {:error, reason} + end + + @impl true + def put_checkpoint(key, data, opts) do + # Return :ok | {:error, reason} + end + + @impl true + def delete_checkpoint(key, opts) do + # Return :ok | {:error, reason} + end + + # Journal operations (append-only, sequence ordering) + + @impl true + def load_thread(thread_id, opts) do + # Return {:ok, %Jido.Thread{}} | :not_found | {:error, reason} + end + + @impl true + def append_thread(thread_id, entries, opts) do + # Handle opts[:expected_rev] for optimistic concurrency + # Return {:ok, %Jido.Thread{}} | {:error, :conflict} | {:error, reason} + end + + @impl true + def delete_thread(thread_id, opts) do + # Return :ok | {:error, reason} + end +end +``` + +### Example: Ecto/Postgres Adapter + +```elixir +# Ecto schemas +defmodule MyApp.Jido.Checkpoint do + use Ecto.Schema + + schema "jido_checkpoints" do + field :key, :string + field :agent_module, :string + field :data, :map + field :thread_id, :string + field :thread_rev, :integer + timestamps() + end +end + +defmodule MyApp.Jido.ThreadEntry do + use Ecto.Schema + + schema "jido_thread_entries" do + field :thread_id, :string + field :seq, :integer + field :kind, :string + field :at, :integer + field :payload, :map + field :refs, :map + timestamps() + end +end + +# Storage adapter +defmodule MyApp.JidoStorage do + @behaviour Jido.Storage + + import Ecto.Query + alias MyApp.Repo + alias MyApp.Jido.{Checkpoint, ThreadEntry} + alias Jido.Thread + alias Jido.Thread.Entry + + # Checkpoint operations + + @impl true + def get_checkpoint(key, _opts) do + case Repo.get_by(Checkpoint, key: serialize_key(key)) do + nil -> :not_found + record -> {:ok, record.data} + end + end + + @impl true + def put_checkpoint(key, data, _opts) do + Repo.insert!( + %Checkpoint{key: serialize_key(key), data: data}, + on_conflict: {:replace, [:data, :updated_at]}, + conflict_target: :key + ) + :ok + end + + @impl true + def delete_checkpoint(key, _opts) do + Repo.delete_all(from c in Checkpoint, where: c.key == ^serialize_key(key)) + :ok + end + + # Journal operations + + @impl true + def load_thread(thread_id, _opts) do + entries = + from(e in ThreadEntry, where: e.thread_id == ^thread_id, order_by: e.seq) + |> Repo.all() + |> Enum.map(&record_to_entry/1) + + case entries do + [] -> :not_found + entries -> {:ok, reconstruct_thread(thread_id, entries)} + end + end + + @impl true + def append_thread(thread_id, entries, opts) do + expected_rev = Keyword.get(opts, :expected_rev) + + Repo.transaction(fn -> + current_max = get_max_seq(thread_id) + + # Optimistic concurrency check + if expected_rev && current_max + 1 != expected_rev do + Repo.rollback(:conflict) + end + + entries + |> Enum.with_index(current_max + 1) + |> Enum.each(fn {entry, seq} -> + Repo.insert!(%ThreadEntry{ + thread_id: thread_id, + seq: seq, + kind: to_string(entry.kind), + at: entry.at, + payload: entry.payload, + refs: entry.refs + }) + end) + + {:ok, _} = load_thread(thread_id, []) + end) + end + + @impl true + def delete_thread(thread_id, _opts) do + Repo.delete_all(from e in ThreadEntry, where: e.thread_id == ^thread_id) + :ok + end + + # Private helpers + + defp serialize_key({module, id}), do: "#{module}:#{id}" + + defp get_max_seq(thread_id) do + from(e in ThreadEntry, where: e.thread_id == ^thread_id, select: max(e.seq)) + |> Repo.one() || -1 + end + + defp record_to_entry(record) do + %Entry{ + id: "entry_#{record.id}", + seq: record.seq, + at: record.at, + kind: String.to_existing_atom(record.kind), + payload: record.payload || %{}, + refs: record.refs || %{} + } + end + + defp reconstruct_thread(thread_id, entries) do + %Thread{ + id: thread_id, + rev: length(entries), + entries: entries, + created_at: List.first(entries).at, + updated_at: List.last(entries).at, + metadata: %{}, + stats: %{entry_count: length(entries)} + } + end +end +``` + +Configure it: + +```elixir +defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: MyApp.JidoStorage +end +``` + +### Ash Framework Adapter + +For Ash, create a similar adapter using `Ash.read/2` and `Ash.create/2` instead of Ecto queries. The pattern is identical—implement the `Jido.Storage` behaviour. + +### Testing Your Adapter + +```elixir +defmodule MyApp.JidoStorageTest do + use ExUnit.Case + + alias Jido.Thread + alias Jido.Thread.Entry + + @storage {MyApp.JidoStorage, []} + + describe "checkpoints" do + test "put and get" do + key = {TestAgent, "test-123"} + data = %{version: 1, id: "test-123", state: %{foo: "bar"}} + + assert :ok = MyApp.JidoStorage.put_checkpoint(key, data, []) + assert {:ok, ^data} = MyApp.JidoStorage.get_checkpoint(key, []) + end + + test "not found" do + assert :not_found = MyApp.JidoStorage.get_checkpoint({TestAgent, "missing"}, []) + end + end + + describe "threads" do + test "append and load" do + thread_id = "thread_#{System.unique_integer()}" + entries = [%Entry{kind: :message, payload: %{text: "hello"}}] + + assert {:ok, thread} = MyApp.JidoStorage.append_thread(thread_id, entries, []) + assert thread.rev == 1 + assert length(thread.entries) == 1 + + assert {:ok, loaded} = MyApp.JidoStorage.load_thread(thread_id, []) + assert loaded.rev == 1 + end + + test "optimistic concurrency" do + thread_id = "thread_#{System.unique_integer()}" + entries = [%Entry{kind: :message, payload: %{}}] + + # First append succeeds + {:ok, _} = MyApp.JidoStorage.append_thread(thread_id, entries, expected_rev: 0) + + # Second append with wrong expected_rev fails + assert {:error, :conflict} = + MyApp.JidoStorage.append_thread(thread_id, entries, expected_rev: 0) + end + end +end +``` + +## Production Patterns + +### Optimistic Concurrency with `expected_rev` + +The `append_thread/3` callback accepts an `:expected_rev` option: + +```elixir +# Only append if current rev is 5 +case adapter.append_thread(thread_id, entries, expected_rev: 5) do + {:ok, thread} -> # Success, thread now at rev 6+ + {:error, :conflict} -> # Someone else appended first +end +``` + +This enables safe concurrent access. The ETS and File adapters both support this. + +### Handling Thread Mismatches + +When thaw returns `{:error, :thread_mismatch}`: + +```elixir +case MyApp.Jido.thaw(MyAgent, "user-123") do + {:ok, agent} -> + agent + + {:error, :thread_mismatch} -> + # Checkpoint and journal are out of sync + # Options: + # 1. Delete checkpoint and start fresh + # 2. Load thread only and rebuild agent + # 3. Alert ops team for investigation + Logger.error("Thread mismatch for user-123") + {:ok, agent} = MyAgent.new(id: "user-123") + agent + + :not_found -> + {:ok, agent} = MyAgent.new(id: "user-123") + agent +end +``` + +### Thread Memory Management + +For long-running agents, threads can grow large. Future enhancements will include: + +- `load_thread_tail/3` — Load only the last N entries +- Thread compaction — Snapshot and truncate old entries + +For now, consider periodic cleanup in your domain logic. + +## Consistency Guardrails + +| Problem | Solution | +|---------|----------| +| **Snapshot/Journal mismatch** | Coordinator flushes journal before checkpoint; stores `thread_rev` in checkpoint for verification on thaw | +| **Optimistic concurrency** | `expected_rev` option in `append_thread` — adapter rejects if current rev doesn't match | +| **Thread memory bloat** | Never persist full thread in checkpoint; future: `load_thread_tail` for bounded loading | + +## Automatic Lifecycle with InstanceManager + +For per-user or per-entity agents, `Jido.Agent.InstanceManager` provides automatic hibernate/thaw based on idle timeouts. + +### Configuration + +```elixir +# In your supervision tree +children = [ + Jido.Agent.InstanceManager.child_spec( + name: :sessions, + agent: MyApp.SessionAgent, + idle_timeout: :timer.minutes(15), + storage: {Jido.Storage.File, path: "priv/sessions"} + ) +] +``` + +### Lifecycle Flow + +1. **Get/Start**: `InstanceManager.get/3` looks up by key in Registry +2. **Thaw**: If not running but storage exists, agent is restored via `thaw` +3. **Fresh**: If no stored checkpoint, starts a fresh agent +4. **Attach**: Callers track interest via `AgentServer.attach/1` +5. **Idle**: When all attachments detach, idle timer starts +6. **Hibernate**: On timeout, agent is persisted via `hibernate`, then process stops + +```elixir +# Get or start an agent (thaws if hibernated) +{:ok, pid} = Jido.Agent.InstanceManager.get(:sessions, "user-123") + +# Track this caller's interest +:ok = Jido.AgentServer.attach(pid) + +# When done, detach (starts idle timer if no other attachments) +:ok = Jido.AgentServer.detach(pid) +``` + +### Example: Session Agent with Auto-Hibernate + +```elixir +defmodule MyApp.SessionAgent do + use Jido.Agent, + name: "session_agent", + schema: [ + user_id: [type: :string, required: true], + cart: [type: {:list, :map}, default: []] + ] + + @impl true + def checkpoint(agent, _ctx) do + thread = agent.state[:__thread__] + {:ok, %{ + version: 1, + agent_module: __MODULE__, + id: agent.id, + state: Map.drop(agent.state, [:__thread__]), + thread: thread && %{id: thread.id, rev: thread.rev} + }} + end + + @impl true + def restore(data, _ctx) do + {:ok, agent} = new(id: data.id) + {:ok, %{agent | state: Map.merge(agent.state, data.state)}} + end +end +``` + +Usage with InstanceManager: + +```elixir +# Start session (or resume if hibernated) +{:ok, pid} = Jido.Agent.InstanceManager.get(:sessions, "user-123", + initial_state: %{user_id: "user-123"} +) + +# Process requests - state persists on idle +Jido.AgentServer.call(pid, Signal.new!("cart.add", %{item: "widget"})) + +# After app restart, agent resumes from last checkpoint +{:ok, pid} = Jido.Agent.InstanceManager.get(:sessions, "user-123") +``` + +## When NOT to Persist + +Skip persistence when: + +- **Agents are stateless** — they fetch state from external sources on start +- **State is cheap to rebuild** — re-running init is faster than I/O +- **Short-lived workers** — task duration < hibernate overhead +- **Sensitive data** — secrets shouldn't hit disk/cache +- **High-churn agents** — frequent start/stop makes persistence overhead costly + +```elixir +# Fire-and-forget task agents (no storage config) +Jido.Agent.InstanceManager.child_spec( + name: :tasks, + agent: MyApp.TaskAgent, + idle_timeout: :timer.seconds(30) + # No storage: - agent dies on idle, no restore +) +``` + +## Migration from Legacy API + +If migrating from the older `Jido.Agent.Persistence` / `Jido.Agent.Store` API: + +| Old API | New API | +|---------|---------| +| `Jido.Agent.Persistence.hibernate/4` | `MyApp.Jido.hibernate/1` or `Jido.Persist.hibernate/2` | +| `Jido.Agent.Persistence.thaw/3` | `MyApp.Jido.thaw/2` or `Jido.Persist.thaw/3` | +| `Jido.Agent.Store` behaviour (3 callbacks) | `Jido.Storage` behaviour (6 callbacks) | +| `dump/2` callback | `checkpoint/2` callback | +| `load/2` callback | `restore/2` callback | + +Key differences: + +1. **Unified storage** — One adapter handles both checkpoints and threads +2. **Thread-aware** — Automatically flushes journal before checkpoint +3. **Thread pointer** — Checkpoint stores pointer, not full thread +4. **Configured on Jido instance** — Not per-call configuration + +## Summary + +| Question | Answer | +|----------|--------| +| **Configuration?** | `use Jido, otp_app: :my_app, storage: {Adapter, opts}` | +| **Manual API?** | `MyApp.Jido.hibernate(agent)` / `thaw(MyAgent, key)` | +| **Automatic API?** | `InstanceManager.get(:pool, key)` with `idle_timeout` | +| **Default?** | `Jido.Storage.ETS` (ephemeral) | +| **Production?** | Implement `Jido.Storage` behaviour with Ecto/Ash | +| **Key invariant?** | Never persist full thread in checkpoint; use pointer | + +## Related + +- [Agents](agents.md) — Agent module documentation +- [Runtime](runtime.md) — AgentServer and process-based execution +- [Configuration](configuration.md) — Jido instance configuration +- [Worker Pools](worker-pools.md) — Pre-warmed agent pools for throughput diff --git a/guides/worker-pools.md b/guides/worker-pools.md index fe43c38..950767b 100644 --- a/guides/worker-pools.md +++ b/guides/worker-pools.md @@ -490,5 +490,6 @@ agent_pools: [ ## Related - [Configuration](configuration.md) — Instance setup and pool configuration +- [Persistence & Storage](storage.md) — Hibernate/thaw and InstanceManager lifecycle - [Runtime](runtime.md) — AgentServer process model - [Observability](observability-intro.md) — Monitoring and telemetry diff --git a/lib/jido.ex b/lib/jido.ex index 966d019..1815997 100644 --- a/lib/jido.ex +++ b/lib/jido.ex @@ -76,9 +76,15 @@ defmodule Jido do """ defmacro __using__(opts) do otp_app = Keyword.fetch!(opts, :otp_app) + storage = Keyword.get(opts, :storage, {Jido.Storage.ETS, [table: :jido_storage]}) quote location: :keep do @otp_app unquote(otp_app) + @jido_storage Jido.Storage.normalize_storage(unquote(Macro.escape(storage))) + + @doc "Returns the storage configuration for this Jido instance." + @spec __jido_storage__() :: {module(), keyword()} + def __jido_storage__, do: @jido_storage @doc false def child_spec(init_arg \\ []) do @@ -154,11 +160,83 @@ defmodule Jido do @doc "Returns the TaskSupervisor name for this Jido instance." @spec task_supervisor_name() :: atom() def task_supervisor_name, do: Jido.task_supervisor_name(__MODULE__) + + @doc "Hibernate an agent to storage." + @spec hibernate(Jido.Agent.t()) :: :ok | {:error, term()} + def hibernate(agent) do + Jido.Persist.hibernate(__jido_storage__(), agent) + end + + @doc "Thaw an agent from storage." + @spec thaw(module(), term()) :: {:ok, Jido.Agent.t()} | :not_found | {:error, term()} + def thaw(agent_module, key) do + Jido.Persist.thaw(__jido_storage__(), agent_module, key) + end end end @type agent_id :: String.t() | atom() + # Default instance name for scripts/Livebook + @default_instance Jido.Default + + @doc """ + Returns the default Jido instance name. + + Used by `Jido.start/1` for scripts and Livebook quick-start. + """ + @spec default_instance() :: atom() + def default_instance, do: @default_instance + + @doc """ + Start the default Jido instance for scripts and Livebook. + + This is an idempotent convenience function - safe to call multiple times + (returns `{:ok, pid}` even if already started). + + ## Examples + + # In a script or Livebook + {:ok, _} = Jido.start() + {:ok, pid} = Jido.start_agent(Jido.default_instance(), MyAgent) + + # With custom options + {:ok, _} = Jido.start(max_tasks: 2000) + + ## Options + + Same as `start_link/1`, but `:name` defaults to `Jido.Default`. + """ + @spec start(keyword()) :: {:ok, pid()} | {:error, term()} + def start(opts \\ []) do + opts = Keyword.put_new(opts, :name, @default_instance) + + case start_link(opts) do + {:ok, pid} -> {:ok, pid} + {:error, {:already_started, pid}} -> {:ok, pid} + other -> other + end + end + + @doc """ + Stop a Jido instance. + + Defaults to stopping the default instance (`Jido.Default`). + + ## Examples + + Jido.stop() + Jido.stop(MyApp.Jido) + + """ + @spec stop(atom()) :: :ok + def stop(name \\ @default_instance) do + case Process.whereis(name) do + nil -> :ok + pid -> Supervisor.stop(pid) + end + end + @doc """ Starts a Jido instance supervisor. @@ -321,6 +399,22 @@ defmodule Jido do |> Map.get(:active, 0) end + # --------------------------------------------------------------------------- + # Persistence + # --------------------------------------------------------------------------- + + @doc "Hibernate an agent using the given Jido instance." + @spec hibernate(atom(), Jido.Agent.t()) :: :ok | {:error, term()} + def hibernate(jido_instance, agent) when is_atom(jido_instance) do + Jido.Persist.hibernate(jido_instance, agent) + end + + @doc "Thaw an agent using the given Jido instance." + @spec thaw(atom(), module(), term()) :: {:ok, Jido.Agent.t()} | :not_found | {:error, term()} + def thaw(jido_instance, agent_module, key) when is_atom(jido_instance) do + Jido.Persist.thaw(jido_instance, agent_module, key) + end + # --------------------------------------------------------------------------- # Discovery # --------------------------------------------------------------------------- diff --git a/lib/jido/agent.ex b/lib/jido/agent.ex index b023218..16f0355 100644 --- a/lib/jido/agent.ex +++ b/lib/jido/agent.ex @@ -293,13 +293,64 @@ defmodule Jido.Agent do """ @callback signal_routes() :: [Jido.Signal.Router.route_spec()] - @optional_callbacks [on_before_cmd: 2, on_after_cmd: 3, signal_routes: 0] + @doc """ + Serializes the agent for persistence. + + Called by `Jido.Persist.hibernate/2` before writing to storage. + The returned data should NOT include the full Thread - only a pointer. + + If not implemented, a default serialization is used that: + - Excludes `:__thread__` from state + - Stores thread pointer as `%{id: thread.id, rev: thread.rev}` + + ## Parameters + + - `agent` - The agent to serialize + - `ctx` - Context map (may contain jido instance, options) + + ## Returns + + - `{:ok, serializable_data}` - Data to persist + - `{:error, reason}` - Serialization failed + """ + @callback checkpoint(agent :: t(), ctx :: map()) :: {:ok, map()} | {:error, term()} + + @doc """ + Restores an agent from persisted data. + + Called by `Jido.Persist.thaw/3` after loading from storage. + The Thread is reattached separately by Persist after restore. + + If not implemented, a default restoration is used that: + - Creates a new agent with the persisted id + - Merges the persisted state + + ## Parameters + + - `data` - The persisted data (from checkpoint/2) + - `ctx` - Context map (may contain jido instance, options) + + ## Returns + + - `{:ok, agent}` - Restored agent (without thread attached) + - `{:error, reason}` - Restoration failed + """ + @callback restore(data :: map(), ctx :: map()) :: {:ok, t()} | {:error, term()} + + @optional_callbacks [ + on_before_cmd: 2, + on_after_cmd: 3, + signal_routes: 0, + checkpoint: 2, + restore: 2 + ] # Helper functions that generate quoted code for the __using__ macro. # This approach reduces the size of the main quote block to avoid # "long quote blocks" and "nested too deep" Credo warnings. @doc false + @spec __quoted_module_setup__() :: Macro.t() def __quoted_module_setup__ do quote location: :keep do @behaviour Jido.Agent @@ -315,6 +366,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_basic_accessors__() :: Macro.t() def __quoted_basic_accessors__ do quote location: :keep do @doc "Returns the agent's name." @@ -344,6 +396,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_skill_accessors__() :: Macro.t() def __quoted_skill_accessors__ do basic_skill_accessors = __quoted_basic_skill_accessors__() computed_skill_accessors = __quoted_computed_skill_accessors__() @@ -435,6 +488,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_skill_config_accessors__() :: Macro.t() def __quoted_skill_config_accessors__ do skill_config_public = __quoted_skill_config_public__() skill_config_helpers = __quoted_skill_config_helpers__() @@ -529,6 +583,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_strategy_accessors__() :: Macro.t() def __quoted_strategy_accessors__ do quote location: :keep do @doc "Returns the execution strategy module for this agent." @@ -552,6 +607,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_new_function__() :: Macro.t() def __quoted_new_function__ do new_fn = __quoted_new_fn_definition__() mount_skills_fn = __quoted_mount_skills_definition__() @@ -658,6 +714,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_cmd_function__() :: Macro.t() def __quoted_cmd_function__ do quote location: :keep do @doc """ @@ -705,6 +762,7 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_utility_functions__() :: Macro.t() def __quoted_utility_functions__ do quote location: :keep do @doc """ @@ -765,22 +823,54 @@ defmodule Jido.Agent do end @doc false + @spec __quoted_callbacks__() :: Macro.t() def __quoted_callbacks__ do quote location: :keep do # Default callback implementations + @impl true @spec on_before_cmd(Agent.t(), Agent.action()) :: {:ok, Agent.t(), Agent.action()} def on_before_cmd(agent, action), do: {:ok, agent, action} + @impl true @spec on_after_cmd(Agent.t(), Agent.action(), [Agent.directive()]) :: {:ok, Agent.t(), [Agent.directive()]} def on_after_cmd(agent, _action, directives), do: {:ok, agent, directives} + @impl true @spec signal_routes() :: list() def signal_routes, do: [] + @impl true + def checkpoint(agent, _ctx) do + thread = agent.state[:__thread__] + + {:ok, + %{ + version: 1, + agent_module: __MODULE__, + id: agent.id, + state: Map.delete(agent.state, :__thread__), + thread: thread && %{id: thread.id, rev: thread.rev} + }} + end + + @impl true + def restore(data, _ctx) do + case new(id: data[:id] || data["id"]) do + {:ok, agent} -> + state = data[:state] || data["state"] || %{} + {:ok, %{agent | state: Map.merge(agent.state, state)}} + + error -> + error + end + end + defoverridable on_before_cmd: 2, on_after_cmd: 3, + checkpoint: 2, + restore: 2, signal_routes: 0, name: 0, description: 0, @@ -947,6 +1037,7 @@ defmodule Jido.Agent do end @doc false + @spec __normalize_skill_instances__([module() | {module(), map()}]) :: [SkillInstance.t()] def __normalize_skill_instances__(skills) do Enum.map(skills, &__validate_and_create_skill_instance__/1) end diff --git a/lib/jido/agent/strategy/direct.ex b/lib/jido/agent/strategy/direct.ex index c87b2f4..8b17e93 100644 --- a/lib/jido/agent/strategy/direct.ex +++ b/lib/jido/agent/strategy/direct.ex @@ -7,8 +7,20 @@ defmodule Jido.Agent.Strategy.Direct do - Merges results into agent state - Applies state operations (e.g., `StateOp.SetState`) to the agent - Returns only external directives to the caller + - Optionally tracks instruction execution in Thread when `thread?` is enabled This is the default strategy and provides the simplest execution model. + + ## Thread Tracking + + When `thread?` option is enabled via `ctx[:strategy_opts][:thread?]` or if a thread + already exists in agent state, the strategy will: + - Ensure a Thread exists in agent state + - Append `:instruction_start` entry before each instruction + - Append `:instruction_end` entry after each instruction (with status :ok or :error) + + Example: + agent = Agent.cmd(agent, MyAction, strategy_opts: [thread?: true]) """ use Jido.Agent.Strategy @@ -18,32 +30,94 @@ defmodule Jido.Agent.Strategy.Direct do alias Jido.Agent.StateOps alias Jido.Error alias Jido.Instruction + alias Jido.Thread.Agent, as: ThreadAgent @impl true - def cmd(%Agent{} = agent, instructions, _ctx) when is_list(instructions) do + def cmd(%Agent{} = agent, instructions, ctx) when is_list(instructions) do + agent = maybe_ensure_thread(agent, ctx) + {final_agent, reversed_directives} = Enum.reduce(instructions, {agent, []}, fn instruction, {acc_agent, acc_directives} -> - {new_agent, new_directives} = run_instruction(acc_agent, instruction) + {new_agent, new_directives} = run_instruction_with_tracking(acc_agent, instruction) {new_agent, Enum.reverse(new_directives) ++ acc_directives} end) {final_agent, Enum.reverse(reversed_directives)} end + defp maybe_ensure_thread(agent, ctx) do + opts = ctx[:strategy_opts] || [] + thread_enabled? = Keyword.get(opts, :thread?, false) + + if thread_enabled? or ThreadAgent.has_thread?(agent) do + ThreadAgent.ensure(agent) + else + agent + end + end + + defp run_instruction_with_tracking(agent, %Instruction{} = instruction) do + if ThreadAgent.has_thread?(agent) do + agent = append_instruction_start(agent, instruction) + {agent, directives, status} = run_instruction(agent, instruction) + agent = append_instruction_end(agent, instruction, status) + {agent, directives} + else + {agent, directives, _status} = run_instruction(agent, instruction) + {agent, directives} + end + end + defp run_instruction(agent, %Instruction{} = instruction) do instruction = %{instruction | context: Map.put(instruction.context, :state, agent.state)} case Jido.Exec.run(instruction) do {:ok, result} when is_map(result) -> - {StateOps.apply_result(agent, result), []} + {StateOps.apply_result(agent, result), [], :ok} {:ok, result, effects} when is_map(result) -> agent = StateOps.apply_result(agent, result) - StateOps.apply_state_ops(agent, List.wrap(effects)) + {agent, directives} = StateOps.apply_state_ops(agent, List.wrap(effects)) + {agent, directives, :ok} {:error, reason} -> error = Error.execution_error("Instruction failed", %{reason: reason}) - {agent, [%Directive.Error{error: error, context: :instruction}]} + {agent, [%Directive.Error{error: error, context: :instruction}], :error} + end + end + + defp append_instruction_start(agent, %Instruction{} = instruction) do + entry = %{ + kind: :instruction_start, + payload: instruction_payload(instruction) + } + + ThreadAgent.append(agent, entry) + end + + defp append_instruction_end(agent, %Instruction{} = instruction, status) do + entry = %{ + kind: :instruction_end, + payload: Map.put(instruction_payload(instruction), :status, status) + } + + ThreadAgent.append(agent, entry) + end + + defp instruction_payload(%Instruction{} = instruction) do + payload = %{action: instruction.action} + + payload = + if is_map(instruction.params) and map_size(instruction.params) > 0 do + Map.put(payload, :param_keys, Map.keys(instruction.params)) + else + payload + end + + if instruction.id do + Map.put(payload, :instruction_id, instruction.id) + else + payload end end end diff --git a/lib/jido/agent/strategy/fsm.ex b/lib/jido/agent/strategy/fsm.ex index cb2d920..331337b 100644 --- a/lib/jido/agent/strategy/fsm.ex +++ b/lib/jido/agent/strategy/fsm.ex @@ -66,6 +66,7 @@ defmodule Jido.Agent.Strategy.FSM do alias Jido.Agent.Strategy.State, as: StratState alias Jido.Error alias Jido.Instruction + alias Jido.Thread.Agent, as: ThreadAgent @default_initial_state "idle" @default_transitions %{ @@ -131,6 +132,7 @@ defmodule Jido.Agent.Strategy.FSM do opts = ctx[:strategy_opts] || [] initial_state = Keyword.get(opts, :initial_state, @default_initial_state) transitions = Keyword.get(opts, :transitions, @default_transitions) + thread_enabled? = Keyword.get(opts, :thread?, false) machine = Machine.new(initial_state, transitions) @@ -142,6 +144,14 @@ defmodule Jido.Agent.Strategy.FSM do auto_transition: Keyword.get(opts, :auto_transition, true) }) + agent = + if thread_enabled? or ThreadAgent.has_thread?(agent) do + agent = ThreadAgent.ensure(agent) + append_checkpoint(agent, :init, initial_state) + else + agent + end + {agent, []} end @@ -155,14 +165,19 @@ defmodule Jido.Agent.Strategy.FSM do transitions = Keyword.get(opts, :transitions, @default_transitions) auto_transition = Map.get(state, :auto_transition, Keyword.get(opts, :auto_transition, true)) + thread_enabled? = Keyword.get(opts, :thread?, false) machine = Map.get(state, :machine) || Machine.new(initial_state, transitions) + agent = maybe_ensure_thread(agent, thread_enabled?) + case Machine.transition(machine, "processing") do {:ok, machine} -> + agent = maybe_append_checkpoint(agent, :transition, "processing") {agent, machine, directives} = process_instructions(agent, machine, instructions) machine = maybe_auto_transition(machine, auto_transition, initial_state) + agent = maybe_append_checkpoint(agent, :transition, machine.status) agent = StratState.put(agent, %{state | machine: machine}) {agent, directives} @@ -173,12 +188,37 @@ defmodule Jido.Agent.Strategy.FSM do end end + defp maybe_ensure_thread(agent, thread_enabled?) do + if thread_enabled? or ThreadAgent.has_thread?(agent) do + ThreadAgent.ensure(agent) + else + agent + end + end + + defp maybe_append_checkpoint(agent, event, fsm_state) do + if ThreadAgent.has_thread?(agent) do + append_checkpoint(agent, event, fsm_state) + else + agent + end + end + + defp append_checkpoint(agent, event, fsm_state) do + entry = %{ + kind: :checkpoint, + payload: %{event: event, fsm_state: fsm_state} + } + + ThreadAgent.append(agent, entry) + end + defp process_instructions(agent, machine, instructions) do {final_agent, final_machine, reversed_directives} = Enum.reduce(instructions, {agent, machine, []}, fn instruction, {acc_agent, acc_machine, acc_directives} -> {new_agent, new_machine, new_directives} = - run_instruction(acc_agent, acc_machine, instruction) + run_instruction_with_tracking(acc_agent, acc_machine, instruction) {new_agent, new_machine, Enum.reverse(new_directives) ++ acc_directives} end) @@ -195,24 +235,71 @@ defmodule Jido.Agent.Strategy.FSM do end end + defp run_instruction_with_tracking(agent, machine, %Instruction{} = instruction) do + if ThreadAgent.has_thread?(agent) do + agent = append_instruction_start(agent, instruction) + {agent, machine, directives, status} = run_instruction(agent, machine, instruction) + agent = append_instruction_end(agent, instruction, status) + {agent, machine, directives} + else + {agent, machine, directives, _status} = run_instruction(agent, machine, instruction) + {agent, machine, directives} + end + end + defp run_instruction(agent, machine, %Instruction{} = instruction) do instruction = %{instruction | context: Map.put(instruction.context, :state, agent.state)} case Jido.Exec.run(instruction) do {:ok, result} when is_map(result) -> machine = %{machine | processed_count: machine.processed_count + 1, last_result: result} - {StateOps.apply_result(agent, result), machine, []} + {StateOps.apply_result(agent, result), machine, [], :ok} {:ok, result, effects} when is_map(result) -> machine = %{machine | processed_count: machine.processed_count + 1, last_result: result} agent = StateOps.apply_result(agent, result) {agent, directives} = StateOps.apply_state_ops(agent, List.wrap(effects)) - {agent, machine, directives} + {agent, machine, directives, :ok} {:error, reason} -> machine = %{machine | error: reason} error = Error.execution_error("Instruction failed", %{reason: reason}) - {agent, machine, [%Directive.Error{error: error, context: :instruction}]} + {agent, machine, [%Directive.Error{error: error, context: :instruction}], :error} + end + end + + defp append_instruction_start(agent, %Instruction{} = instruction) do + entry = %{ + kind: :instruction_start, + payload: instruction_payload(instruction) + } + + ThreadAgent.append(agent, entry) + end + + defp append_instruction_end(agent, %Instruction{} = instruction, status) do + entry = %{ + kind: :instruction_end, + payload: Map.put(instruction_payload(instruction), :status, status) + } + + ThreadAgent.append(agent, entry) + end + + defp instruction_payload(%Instruction{} = instruction) do + payload = %{action: instruction.action} + + payload = + if is_map(instruction.params) and map_size(instruction.params) > 0 do + Map.put(payload, :param_keys, Map.keys(instruction.params)) + else + payload + end + + if instruction.id do + Map.put(payload, :instruction_id, instruction.id) + else + payload end end diff --git a/lib/jido/agent_server.ex b/lib/jido/agent_server.ex index 6061bc6..d334267 100644 --- a/lib/jido/agent_server.ex +++ b/lib/jido/agent_server.ex @@ -604,7 +604,6 @@ defmodule Jido.AgentServer do state = state.lifecycle.mod.init(lifecycle_opts, state) - Logger.debug("AgentServer #{state.id} initialized, status: idle") {:noreply, State.set_status(state, :idle)} end @@ -815,7 +814,6 @@ defmodule Jido.AgentServer do end end) - Logger.debug("AgentServer #{state.id} terminating: #{inspect(reason)}") :ok end diff --git a/lib/jido/discovery.ex b/lib/jido/discovery.ex index bbad361..87eac2c 100644 --- a/lib/jido/discovery.ex +++ b/lib/jido/discovery.ex @@ -89,11 +89,6 @@ defmodule Jido.Discovery do Task.async(fn -> catalog = build_catalog() :persistent_term.put(@catalog_key, catalog) - - Logger.info( - "[Jido.Discovery] Catalog initialized with #{count_components(catalog)} components" - ) - :ok end) end @@ -105,10 +100,8 @@ defmodule Jido.Discovery do """ @spec refresh() :: :ok def refresh do - Logger.info("[Jido.Discovery] Refreshing catalog...") catalog = build_catalog() :persistent_term.put(@catalog_key, catalog) - Logger.info("[Jido.Discovery] Catalog refreshed with #{count_components(catalog)} components") :ok end @@ -240,13 +233,6 @@ defmodule Jido.Discovery do } end - defp count_components(catalog) do - catalog.components - |> Map.values() - |> Enum.map(&length/1) - |> Enum.sum() - end - defp discover_components(metadata_fun) do loaded_applications() |> Enum.flat_map(&modules_for/1) diff --git a/lib/jido/persist.ex b/lib/jido/persist.ex new file mode 100644 index 0000000..135cc6f --- /dev/null +++ b/lib/jido/persist.ex @@ -0,0 +1,356 @@ +defmodule Jido.Persist do + @moduledoc """ + Coordinates hibernate/thaw operations for agents with thread support. + + This module is the **invariant enforcer** - it ensures: + + 1. Journal is flushed before checkpoint + 2. Checkpoint never contains full Thread, only a pointer + 3. Thread is rehydrated on thaw + + ## API + + The primary API accepts a storage configuration tuple: + + Jido.Persist.hibernate({adapter, opts}, agent) + Jido.Persist.thaw({adapter, opts}, agent_module, key) + + Or a Jido instance with embedded storage config: + + Jido.Persist.hibernate(jido_instance, agent) + Jido.Persist.thaw(jido_instance, agent_module, key) + + ## hibernate/2 Flow + + 1. Extract thread from `agent.state[:__thread__]` + 2. If thread exists with entries, flush journal via `adapter.append_thread/3` + 3. Call `agent_module.checkpoint/2` if implemented, else use default + 4. **Enforce invariant**: Remove `:__thread__` from state, store only thread pointer + 5. Call `adapter.put_checkpoint/3` + + ## thaw/3 Flow + + 1. Call `adapter.get_checkpoint/2` + 2. If `:not_found`, return `:not_found` + 3. Call `agent_module.restore/2` if implemented, else use default + 4. If checkpoint has thread pointer, load and attach thread + 5. Verify loaded thread.rev matches checkpoint pointer rev + + ## Agent Callbacks + + Agents may optionally implement: + + - `checkpoint(agent, ctx)` - Returns `{:ok, checkpoint_data}` for custom serialization + - `restore(checkpoint_data, ctx)` - Returns `{:ok, agent}` for custom deserialization + + If not implemented, default serialization is used. + + ## Examples + + # Using storage config tuple + storage = {Jido.Storage.ETS, table: :my_storage} + + # Hibernate an agent + :ok = Jido.Persist.hibernate(storage, agent) + + # Thaw an agent + case Jido.Persist.thaw(storage, MyAgent, "agent-123") do + {:ok, agent} -> agent + :not_found -> start_fresh() + {:error, :missing_thread} -> handle_missing_thread() + {:error, :thread_mismatch} -> handle_mismatch() + end + """ + + require Logger + + alias Jido.Thread + + @type storage_config :: {module(), keyword()} + @type agent :: struct() + @type agent_module :: module() + @type key :: term() + @type checkpoint_key :: {agent_module(), term()} + + @type thread_pointer :: %{id: String.t(), rev: non_neg_integer()} + + @type checkpoint :: %{ + version: pos_integer(), + agent_module: agent_module(), + id: term(), + state: map(), + thread: thread_pointer() | nil + } + + @doc """ + Persists an agent to storage, flushing any pending thread entries first. + + Accepts either a `{adapter, opts}` tuple or a struct with `:storage` field. + + ## Examples + + storage = {Jido.Storage.ETS, table: :agents} + :ok = Jido.Persist.hibernate(storage, my_agent) + + ## Returns + + - `:ok` - Successfully hibernated + - `{:error, reason}` - Failed to hibernate + """ + @spec hibernate(storage_config() | module() | struct(), agent()) :: :ok | {:error, term()} + def hibernate(storage_or_instance, agent) + + def hibernate({adapter, opts}, agent) when is_atom(adapter) do + do_hibernate(adapter, opts, agent) + end + + def hibernate(%{storage: {adapter, opts}}, agent) do + do_hibernate(adapter, opts, agent) + end + + def hibernate(jido_instance, agent) when is_atom(jido_instance) do + {adapter, opts} = jido_instance.__jido_storage__() + do_hibernate(adapter, opts, agent) + end + + @doc """ + Restores an agent from storage, rehydrating thread if present. + + Accepts either a `{adapter, opts}` tuple or a struct with `:storage` field. + + ## Examples + + storage = {Jido.Storage.ETS, table: :agents} + {:ok, agent} = Jido.Persist.thaw(storage, MyAgent, "agent-123") + + ## Returns + + - `{:ok, agent}` - Successfully thawed + - `:not_found` - No checkpoint exists for this key + - `{:error, :missing_thread}` - Checkpoint references thread that doesn't exist + - `{:error, :thread_mismatch}` - Loaded thread.rev != checkpoint thread.rev + - `{:error, reason}` - Other errors + """ + @spec thaw(storage_config() | module() | struct(), agent_module(), key()) :: + {:ok, agent()} | :not_found | {:error, term()} + def thaw(storage_or_instance, agent_module, key) + + def thaw({adapter, opts}, agent_module, key) when is_atom(adapter) do + do_thaw(adapter, opts, agent_module, key) + end + + def thaw(%{storage: {adapter, opts}}, agent_module, key) do + do_thaw(adapter, opts, agent_module, key) + end + + def thaw(jido_instance, agent_module, key) when is_atom(jido_instance) do + {adapter, opts} = jido_instance.__jido_storage__() + do_thaw(adapter, opts, agent_module, key) + end + + # --- Private Implementation --- + + @spec do_hibernate(module(), keyword(), agent()) :: :ok | {:error, term()} + defp do_hibernate(adapter, opts, agent) do + agent_module = agent.__struct__ + thread = get_thread(agent) + + Logger.debug("Persist.hibernate starting for #{inspect(agent_module)} id=#{agent.id}") + + with :ok <- flush_journal(adapter, opts, thread), + {:ok, checkpoint} <- create_checkpoint(agent_module, agent, thread), + checkpoint_key <- make_checkpoint_key(agent_module, agent.id), + :ok <- adapter.put_checkpoint(checkpoint_key, checkpoint, opts) do + Logger.debug("Persist.hibernate completed for #{inspect(agent_module)} id=#{agent.id}") + :ok + else + {:error, reason} = error -> + Logger.error( + "Persist.hibernate failed for #{inspect(agent_module)} id=#{agent.id}: #{inspect(reason)}" + ) + + error + end + end + + @spec do_thaw(module(), keyword(), agent_module(), key()) :: + {:ok, agent()} | :not_found | {:error, term()} + defp do_thaw(adapter, opts, agent_module, key) do + checkpoint_key = make_checkpoint_key(agent_module, key) + + Logger.debug("Persist.thaw starting for #{inspect(agent_module)} key=#{inspect(key)}") + + case adapter.get_checkpoint(checkpoint_key, opts) do + {:ok, checkpoint} -> + restore_from_checkpoint(adapter, opts, agent_module, checkpoint) + + :not_found -> + Logger.debug("Persist.thaw: checkpoint not found for #{inspect(checkpoint_key)}") + :not_found + + {:error, reason} = error -> + Logger.error( + "Persist.thaw failed to get checkpoint for #{inspect(checkpoint_key)}: #{inspect(reason)}" + ) + + error + end + end + + @spec flush_journal(module(), keyword(), Thread.t() | nil) :: :ok | {:error, term()} + defp flush_journal(_adapter, _opts, nil), do: :ok + defp flush_journal(_adapter, _opts, %Thread{entries: []}), do: :ok + + defp flush_journal(adapter, opts, %Thread{} = thread) do + Logger.debug("Persist: flushing #{length(thread.entries)} entries for thread #{thread.id}") + + case adapter.append_thread(thread.id, thread.entries, [{:expected_rev, 0} | opts]) do + {:ok, _updated_thread} -> + :ok + + {:error, :conflict} -> + Logger.debug("Persist: conflict on append, thread may already be persisted") + :ok + + {:error, reason} = error -> + Logger.error( + "Persist: failed to flush journal for thread #{thread.id}: #{inspect(reason)}" + ) + + error + end + end + + @spec create_checkpoint(agent_module(), agent(), Thread.t() | nil) :: + {:ok, checkpoint()} | {:error, term()} + defp create_checkpoint(agent_module, agent, thread) do + ctx = %{} + + result = + if function_exported?(agent_module, :checkpoint, 2) do + agent_module.checkpoint(agent, ctx) + else + {:ok, default_checkpoint(agent, thread)} + end + + case result do + {:ok, checkpoint} -> + {:ok, enforce_checkpoint_invariants(checkpoint, thread)} + + {:error, _} = error -> + error + end + end + + @spec enforce_checkpoint_invariants(map(), Thread.t() | nil) :: checkpoint() + defp enforce_checkpoint_invariants(checkpoint, thread) do + state_without_thread = Map.delete(checkpoint[:state] || %{}, :__thread__) + + thread_pointer = + case thread do + nil -> nil + %Thread{id: id, rev: rev} -> %{id: id, rev: rev} + end + + checkpoint + |> Map.put(:state, state_without_thread) + |> Map.put(:thread, thread_pointer) + end + + @spec default_checkpoint(agent(), Thread.t() | nil) :: checkpoint() + defp default_checkpoint(agent, thread) do + thread_pointer = + case thread do + nil -> nil + %Thread{id: id, rev: rev} -> %{id: id, rev: rev} + end + + %{ + version: 1, + agent_module: agent.__struct__, + id: agent.id, + state: Map.delete(agent.state, :__thread__), + thread: thread_pointer + } + end + + @spec restore_from_checkpoint(module(), keyword(), agent_module(), checkpoint()) :: + {:ok, agent()} | {:error, term()} + defp restore_from_checkpoint(adapter, opts, agent_module, checkpoint) do + ctx = %{} + + with {:ok, agent} <- restore_agent(agent_module, checkpoint, ctx), + {:ok, agent} <- rehydrate_thread(adapter, opts, agent, checkpoint) do + Logger.debug("Persist.thaw completed for #{inspect(agent_module)} id=#{checkpoint.id}") + {:ok, agent} + end + end + + @spec restore_agent(agent_module(), checkpoint(), map()) :: {:ok, agent()} | {:error, term()} + defp restore_agent(agent_module, checkpoint, ctx) do + if function_exported?(agent_module, :restore, 2) do + agent_module.restore(checkpoint, ctx) + else + default_restore(agent_module, checkpoint) + end + end + + @spec default_restore(agent_module(), checkpoint()) :: {:ok, agent()} | {:error, term()} + defp default_restore(agent_module, checkpoint) do + case agent_module.new(id: checkpoint.id) do + {:ok, agent} -> + merged_state = Map.merge(agent.state, checkpoint.state || %{}) + {:ok, %{agent | state: merged_state}} + + agent when is_struct(agent) -> + merged_state = Map.merge(agent.state, checkpoint.state || %{}) + {:ok, %{agent | state: merged_state}} + + {:error, _} = error -> + error + end + end + + @spec rehydrate_thread(module(), keyword(), agent(), checkpoint()) :: + {:ok, agent()} | {:error, term()} + defp rehydrate_thread(_adapter, _opts, agent, %{thread: nil}), do: {:ok, agent} + + defp rehydrate_thread(adapter, opts, agent, %{thread: %{id: thread_id, rev: expected_rev}}) do + Logger.debug("Persist: rehydrating thread #{thread_id} with expected rev=#{expected_rev}") + + case adapter.load_thread(thread_id, opts) do + {:ok, %Thread{rev: ^expected_rev} = thread} -> + agent_with_thread = attach_thread(agent, thread) + {:ok, agent_with_thread} + + {:ok, %Thread{rev: actual_rev}} -> + Logger.error( + "Persist: thread rev mismatch for #{thread_id}: expected=#{expected_rev}, actual=#{actual_rev}" + ) + + {:error, :thread_mismatch} + + :not_found -> + Logger.error("Persist: thread #{thread_id} not found but referenced in checkpoint") + {:error, :missing_thread} + + {:error, reason} = error -> + Logger.error("Persist: failed to load thread #{thread_id}: #{inspect(reason)}") + error + end + end + + @spec get_thread(agent()) :: Thread.t() | nil + defp get_thread(%{state: %{__thread__: thread}}) when is_struct(thread, Thread), do: thread + defp get_thread(_agent), do: nil + + @spec attach_thread(agent(), Thread.t()) :: agent() + defp attach_thread(agent, thread) do + %{agent | state: Map.put(agent.state, :__thread__, thread)} + end + + @spec make_checkpoint_key(agent_module(), term()) :: checkpoint_key() + defp make_checkpoint_key(agent_module, agent_id) do + {agent_module, agent_id} + end +end diff --git a/lib/jido/storage.ex b/lib/jido/storage.ex new file mode 100644 index 0000000..3e54ab6 --- /dev/null +++ b/lib/jido/storage.ex @@ -0,0 +1,118 @@ +defmodule Jido.Storage do + @moduledoc """ + Unified storage behaviour for agent checkpoints and thread journals. + + Implementations handle both: + - **Checkpoints**: key-value overwrite semantics for agent state snapshots + - **Journals**: append-only thread entries with sequence ordering + + ## Built-in Adapters + + | Adapter | Durability | Use Case | + |---------|------------|----------| + | `Jido.Storage.ETS` | Ephemeral | Development, testing | + + ## Implementing Custom Adapters + + Implement all 6 callbacks to create a custom storage adapter: + + defmodule MyApp.Storage do + @behaviour Jido.Storage + + @impl true + def get_checkpoint(key, opts), do: ... + + @impl true + def put_checkpoint(key, data, opts), do: ... + + @impl true + def delete_checkpoint(key, opts), do: ... + + @impl true + def load_thread(thread_id, opts), do: ... + + @impl true + def append_thread(thread_id, entries, opts), do: ... + + @impl true + def delete_thread(thread_id, opts), do: ... + end + + ## Concurrency + + The `append_thread/3` callback accepts an `:expected_rev` option for + optimistic concurrency control. Implementations should reject appends + when the current revision doesn't match the expected value. + """ + + alias Jido.Thread + alias Jido.Thread.Entry + + @doc """ + Retrieve a checkpoint by key. + + Returns `{:ok, data}` if found, `:not_found` if the key doesn't exist. + """ + @callback get_checkpoint(key :: term(), opts :: keyword()) :: + {:ok, term()} | :not_found | {:error, term()} + + @doc """ + Store a checkpoint, overwriting any existing value for the key. + """ + @callback put_checkpoint(key :: term(), data :: term(), opts :: keyword()) :: + :ok | {:error, term()} + + @doc """ + Delete a checkpoint by key. + + Returns `:ok` even if the key didn't exist. + """ + @callback delete_checkpoint(key :: term(), opts :: keyword()) :: + :ok | {:error, term()} + + @doc """ + Load a thread by ID, reconstructing from stored entries. + + Returns `{:ok, thread}` if entries exist, `:not_found` if the thread + has no entries. + """ + @callback load_thread(thread_id :: String.t(), opts :: keyword()) :: + {:ok, Thread.t()} | :not_found | {:error, term()} + + @doc """ + Append entries to a thread. + + ## Options + + - `:expected_rev` - If provided, the append should fail with + `{:error, :conflict}` if the current thread revision doesn't match. + - `:metadata` - Thread metadata to set (typically only for new threads). + + Returns `{:ok, updated_thread}` on success. + """ + @callback append_thread(thread_id :: String.t(), entries :: [Entry.t()], opts :: keyword()) :: + {:ok, Thread.t()} | {:error, term()} + + @doc """ + Delete a thread and all its entries. + + Returns `:ok` even if the thread didn't exist. + """ + @callback delete_thread(thread_id :: String.t(), opts :: keyword()) :: + :ok | {:error, term()} + + @doc """ + Normalize a storage configuration to `{module, opts}` tuple. + + ## Examples + + iex> Jido.Storage.normalize_storage(Jido.Storage.ETS) + {Jido.Storage.ETS, []} + + iex> Jido.Storage.normalize_storage({Jido.Storage.File, path: "priv/jido"}) + {Jido.Storage.File, [path: "priv/jido"]} + """ + @spec normalize_storage(module() | {module(), keyword()}) :: {module(), keyword()} + def normalize_storage({mod, opts}) when is_atom(mod) and is_list(opts), do: {mod, opts} + def normalize_storage(mod) when is_atom(mod), do: {mod, []} +end diff --git a/lib/jido/storage/ets.ex b/lib/jido/storage/ets.ex new file mode 100644 index 0000000..bddbdd4 --- /dev/null +++ b/lib/jido/storage/ets.ex @@ -0,0 +1,307 @@ +defmodule Jido.Storage.ETS do + @moduledoc """ + ETS-based storage adapter for agent checkpoints and thread journals. + + Fast in-memory storage for development and testing. Not restart-safe - + all data is lost when the BEAM stops. + + ## Usage + + defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: {Jido.Storage.ETS, table: :my_jido_storage} + end + + ## Options + + - `:table` - Base table name (default: `:jido_storage`). Creates three tables: + - `{table, :checkpoints}` - Agent checkpoint data (set) + - `{table, :threads}` - Thread entries ordered by `{thread_id, seq}` (ordered_set) + - `{table, :thread_meta}` - Thread metadata (set) + + ## Concurrency + + Thread operations use atomic ETS operations. The `expected_rev` option in + `append_thread/3` provides optimistic concurrency control. + """ + + @behaviour Jido.Storage + + alias Jido.Thread + alias Jido.Thread.Entry + + @default_table :jido_storage + + @type opts :: keyword() + + @impl true + @doc """ + Retrieve a checkpoint by key. + + Returns `{:ok, data}` if found, `:not_found` otherwise. + """ + @spec get_checkpoint(term(), opts()) :: {:ok, term()} | :not_found | {:error, term()} + def get_checkpoint(key, opts) do + table = checkpoint_table(opts) + ensure_tables(opts) + + case :ets.lookup(table, key) do + [{^key, data}] -> {:ok, data} + [] -> :not_found + end + rescue + ArgumentError -> :not_found + end + + @impl true + @doc """ + Store a checkpoint, overwriting any existing value. + """ + @spec put_checkpoint(term(), term(), opts()) :: :ok | {:error, term()} + def put_checkpoint(key, data, opts) do + table = checkpoint_table(opts) + ensure_tables(opts) + :ets.insert(table, {key, data}) + :ok + rescue + ArgumentError -> {:error, :table_not_found} + end + + @impl true + @doc """ + Delete a checkpoint by key. + """ + @spec delete_checkpoint(term(), opts()) :: :ok | {:error, term()} + def delete_checkpoint(key, opts) do + table = checkpoint_table(opts) + ensure_tables(opts) + :ets.delete(table, key) + :ok + rescue + ArgumentError -> :ok + end + + @impl true + @doc """ + Load a thread by ID, reconstructing from stored entries. + + Returns `{:ok, thread}` if entries exist, `:not_found` otherwise. + """ + @spec load_thread(String.t(), opts()) :: {:ok, Thread.t()} | :not_found | {:error, term()} + def load_thread(thread_id, opts) do + threads_table = threads_table(opts) + meta_table = meta_table(opts) + ensure_tables(opts) + + entries = + :ets.select(threads_table, [ + {{{thread_id, :_}, :_}, [], [:"$_"]} + ]) + |> Enum.sort_by(fn {{_id, seq}, _entry} -> seq end) + |> Enum.map(fn {_key, entry} -> entry end) + + case entries do + [] -> + :not_found + + entries -> + meta = get_thread_meta(meta_table, thread_id) + {:ok, reconstruct_thread(thread_id, entries, meta)} + end + rescue + ArgumentError -> :not_found + end + + @impl true + @doc """ + Append entries to a thread. + + ## Options + + - `:expected_rev` - If provided, the append will fail with `{:error, :conflict}` + if the current thread revision doesn't match. + - `:metadata` - Thread metadata to merge (only used when creating new thread). + """ + @spec append_thread(String.t(), [Entry.t()], opts()) :: + {:ok, Thread.t()} | {:error, term()} + def append_thread(thread_id, entries, opts) do + threads_table = threads_table(opts) + meta_table = meta_table(opts) + ensure_tables(opts) + + expected_rev = Keyword.get(opts, :expected_rev) + now = System.system_time(:millisecond) + + current_rev = get_current_rev(threads_table, thread_id) + + if expected_rev && current_rev != expected_rev do + {:error, :conflict} + else + base_seq = current_rev + is_new = current_rev == 0 + + prepared_entries = + entries + |> Enum.with_index() + |> Enum.map(fn {entry, idx} -> + seq = base_seq + idx + prepare_entry(entry, seq, now) + end) + + ets_entries = + Enum.map(prepared_entries, fn entry -> + {{thread_id, entry.seq}, entry} + end) + + :ets.insert(threads_table, ets_entries) + + meta = + if is_new do + new_meta = %{ + created_at: now, + updated_at: now, + metadata: Keyword.get(opts, :metadata, %{}) + } + + :ets.insert(meta_table, {thread_id, new_meta}) + new_meta + else + update_thread_meta(meta_table, thread_id, now) + end + + {:ok, reconstruct_thread(thread_id, load_all_entries(threads_table, thread_id), meta)} + end + rescue + ArgumentError -> {:error, :table_not_found} + end + + @impl true + @doc """ + Delete a thread and all its entries. + """ + @spec delete_thread(String.t(), opts()) :: :ok | {:error, term()} + def delete_thread(thread_id, opts) do + threads_table = threads_table(opts) + meta_table = meta_table(opts) + ensure_tables(opts) + + :ets.select_delete(threads_table, [ + {{{thread_id, :_}, :_}, [], [true]} + ]) + + :ets.delete(meta_table, thread_id) + + :ok + rescue + ArgumentError -> :ok + end + + defp checkpoint_table(opts) do + base = Keyword.get(opts, :table, @default_table) + :"#{base}_checkpoints" + end + + defp threads_table(opts) do + base = Keyword.get(opts, :table, @default_table) + :"#{base}_threads" + end + + defp meta_table(opts) do + base = Keyword.get(opts, :table, @default_table) + :"#{base}_thread_meta" + end + + defp ensure_tables(opts) do + ensure_table(checkpoint_table(opts), [:set]) + ensure_table(threads_table(opts), [:ordered_set]) + ensure_table(meta_table(opts), [:set]) + end + + defp ensure_table(name, extra_opts) do + case :ets.whereis(name) do + :undefined -> + :ets.new(name, [:named_table, :public, read_concurrency: true] ++ extra_opts) + + _ref -> + :ok + end + rescue + ArgumentError -> :ok + end + + defp get_current_rev(table, thread_id) do + case :ets.select_reverse(table, [{{{thread_id, :"$1"}, :_}, [], [:"$1"]}], 1) do + {[seq], _cont} -> seq + 1 + :"$end_of_table" -> 0 + end + end + + defp load_all_entries(table, thread_id) do + :ets.select(table, [ + {{{thread_id, :_}, :"$1"}, [], [:"$1"]} + ]) + |> Enum.sort_by(& &1.seq) + end + + defp get_thread_meta(table, thread_id) do + case :ets.lookup(table, thread_id) do + [{^thread_id, meta}] -> meta + [] -> %{created_at: nil, updated_at: nil, metadata: %{}} + end + end + + defp update_thread_meta(table, thread_id, now) do + case :ets.lookup(table, thread_id) do + [{^thread_id, meta}] -> + updated = %{meta | updated_at: now} + :ets.insert(table, {thread_id, updated}) + updated + + [] -> + meta = %{created_at: now, updated_at: now, metadata: %{}} + :ets.insert(table, {thread_id, meta}) + meta + end + end + + defp prepare_entry(%Entry{} = entry, seq, now) do + %Entry{ + id: entry.id || generate_entry_id(), + seq: seq, + at: entry.at || now, + kind: entry.kind, + payload: entry.payload, + refs: entry.refs + } + end + + defp prepare_entry(attrs, seq, now) when is_map(attrs) do + %Entry{ + id: attrs[:id] || attrs["id"] || generate_entry_id(), + seq: seq, + at: attrs[:at] || attrs["at"] || now, + kind: attrs[:kind] || attrs["kind"] || :note, + payload: attrs[:payload] || attrs["payload"] || %{}, + refs: attrs[:refs] || attrs["refs"] || %{} + } + end + + defp reconstruct_thread(thread_id, entries, meta) do + entry_count = length(entries) + + %Thread{ + id: thread_id, + rev: entry_count, + entries: entries, + created_at: meta[:created_at] || (List.first(entries) && List.first(entries).at), + updated_at: meta[:updated_at] || (List.last(entries) && List.last(entries).at), + metadata: meta[:metadata] || %{}, + stats: %{entry_count: entry_count} + } + end + + defp generate_entry_id do + "entry_" <> Jido.Util.generate_id() + end +end diff --git a/lib/jido/storage/file.ex b/lib/jido/storage/file.ex new file mode 100644 index 0000000..3be142b --- /dev/null +++ b/lib/jido/storage/file.ex @@ -0,0 +1,348 @@ +defmodule Jido.Storage.File do + @moduledoc """ + File-based storage adapter for Jido. + + Provides persistent storage for agent checkpoints and thread journals using + a directory-based layout. Suitable for simple production deployments. + + ## Usage + + defmodule MyApp.Jido do + use Jido, + otp_app: :my_app, + storage: {Jido.Storage.File, path: "priv/jido/storage"} + end + + ## Options + + - `:path` - Base directory path (required). Created if it doesn't exist. + + ## Directory Layout + + base_path/ + ├── checkpoints/ + │ └── {key_hash}.term # Serialized checkpoint + └── threads/ + └── {thread_id}/ + ├── meta.term # {rev, created_at, updated_at, metadata} + └── entries.log # Length-prefixed binary frames + + ## Concurrency + + Uses `:global.trans/3` for thread-level locking to ensure safe concurrent access. + """ + + @behaviour Jido.Storage + + alias Jido.Thread + alias Jido.Thread.Entry + + @type key :: term() + @type opts :: keyword() + + # ============================================================================= + # Checkpoint Operations + # ============================================================================= + + @doc """ + Retrieve a checkpoint by key. + + Returns `{:ok, data}` if found, `:not_found` if the file doesn't exist, + or `{:error, reason}` on failure. + """ + @impl true + @spec get_checkpoint(key(), opts()) :: {:ok, term()} | :not_found | {:error, term()} + def get_checkpoint(key, opts) do + path = Keyword.fetch!(opts, :path) + file_path = checkpoint_path(path, key) + + case File.read(file_path) do + {:ok, binary} -> + {:ok, :erlang.binary_to_term(binary, [:safe])} + + {:error, :enoent} -> + :not_found + + {:error, reason} -> + {:error, reason} + end + rescue + ArgumentError -> {:error, :invalid_term} + end + + @doc """ + Store a checkpoint with atomic write semantics. + + Writes to a temporary file first, then renames for atomicity. + """ + @impl true + @spec put_checkpoint(key(), term(), opts()) :: :ok | {:error, term()} + def put_checkpoint(key, data, opts) do + path = Keyword.fetch!(opts, :path) + ensure_checkpoints_dir(path) + file_path = checkpoint_path(path, key) + tmp_path = file_path <> ".tmp" + binary = :erlang.term_to_binary(data) + + with :ok <- File.write(tmp_path, binary), + :ok <- File.rename(tmp_path, file_path) do + :ok + else + {:error, reason} -> + File.rm(tmp_path) + {:error, reason} + end + end + + @doc """ + Delete a checkpoint. + + Returns `:ok` even if the file doesn't exist. + """ + @impl true + @spec delete_checkpoint(key(), opts()) :: :ok | {:error, term()} + def delete_checkpoint(key, opts) do + path = Keyword.fetch!(opts, :path) + file_path = checkpoint_path(path, key) + + case File.rm(file_path) do + :ok -> :ok + {:error, :enoent} -> :ok + {:error, reason} -> {:error, reason} + end + end + + # ============================================================================= + # Thread Operations + # ============================================================================= + + @doc """ + Load a thread from disk. + + Reads the meta file and entries log, reconstructing a `%Jido.Thread{}`. + Returns `:not_found` if the thread directory doesn't exist. + """ + @impl true + @spec load_thread(String.t(), opts()) :: {:ok, Thread.t()} | :not_found | {:error, term()} + def load_thread(thread_id, opts) do + path = Keyword.fetch!(opts, :path) + thread_dir = thread_path(path, thread_id) + meta_file = Path.join(thread_dir, "meta.term") + entries_file = Path.join(thread_dir, "entries.log") + + with {:ok, meta_binary} <- File.read(meta_file), + {:ok, entries_binary} <- File.read(entries_file) do + {rev, created_at, updated_at, metadata} = :erlang.binary_to_term(meta_binary, [:safe]) + entries = decode_entries(entries_binary) + + thread = %Thread{ + id: thread_id, + rev: rev, + entries: entries, + created_at: created_at, + updated_at: updated_at, + metadata: metadata, + stats: %{entry_count: length(entries)} + } + + {:ok, thread} + else + {:error, :enoent} -> :not_found + {:error, reason} -> {:error, reason} + end + rescue + ArgumentError -> {:error, :invalid_term} + end + + @doc """ + Append entries to a thread with optimistic concurrency. + + Options: + - `:expected_rev` - Expected current revision. Fails with `{:error, :conflict}` + if the current revision doesn't match. + + Uses a global lock to ensure safe concurrent access. + """ + @impl true + @spec append_thread(String.t(), [Entry.t()], opts()) :: + {:ok, Thread.t()} | {:error, term()} + def append_thread(thread_id, entries, opts) do + path = Keyword.fetch!(opts, :path) + expected_rev = Keyword.get(opts, :expected_rev) + + with_thread_lock(thread_id, fn -> + do_append_thread(path, thread_id, entries, expected_rev) + end) + end + + @doc """ + Delete a thread and all its data. + + Removes the entire thread directory. + """ + @impl true + @spec delete_thread(String.t(), opts()) :: :ok | {:error, term()} + def delete_thread(thread_id, opts) do + path = Keyword.fetch!(opts, :path) + thread_dir = thread_path(path, thread_id) + + case File.rm_rf(thread_dir) do + {:ok, _} -> :ok + {:error, reason, _} -> {:error, reason} + end + end + + # ============================================================================= + # Private Helpers + # ============================================================================= + + defp do_append_thread(path, thread_id, entries, expected_rev) do + thread_dir = thread_path(path, thread_id) + meta_file = Path.join(thread_dir, "meta.term") + entries_file = Path.join(thread_dir, "entries.log") + + # Load existing or create new + {current_rev, current_entries, created_at, metadata} = + case load_existing_thread(meta_file, entries_file) do + {:ok, rev, existing_entries, created, meta} -> + {rev, existing_entries, created, meta} + + :not_found -> + now = System.system_time(:millisecond) + {0, [], now, %{}} + end + + # Check expected revision + if expected_rev && expected_rev != current_rev do + {:error, :conflict} + else + # Ensure directory exists + ensure_thread_dir(thread_dir) + + # Prepare new entries with correct seq numbers + now = System.system_time(:millisecond) + base_seq = length(current_entries) + + prepared_entries = + entries + |> Enum.with_index() + |> Enum.map(fn {entry, idx} -> + %Entry{ + id: entry.id || generate_entry_id(), + seq: base_seq + idx, + at: entry.at || now, + kind: entry.kind, + payload: entry.payload || %{}, + refs: entry.refs || %{} + } + end) + + # Encode new entries and append to log + new_entries_binary = encode_entries(prepared_entries) + + with :ok <- append_to_file(entries_file, new_entries_binary) do + # Update meta atomically + all_entries = current_entries ++ prepared_entries + new_rev = current_rev + length(prepared_entries) + + meta = {new_rev, created_at, now, metadata} + meta_binary = :erlang.term_to_binary(meta) + tmp_meta = meta_file <> ".tmp" + + with :ok <- File.write(tmp_meta, meta_binary), + :ok <- File.rename(tmp_meta, meta_file) do + thread = %Thread{ + id: thread_id, + rev: new_rev, + entries: all_entries, + created_at: created_at, + updated_at: now, + metadata: metadata, + stats: %{entry_count: length(all_entries)} + } + + {:ok, thread} + else + {:error, reason} -> + File.rm(tmp_meta) + {:error, reason} + end + end + end + end + + defp load_existing_thread(meta_file, entries_file) do + with {:ok, meta_binary} <- File.read(meta_file), + {:ok, entries_binary} <- File.read(entries_file) do + {rev, created_at, _updated_at, metadata} = :erlang.binary_to_term(meta_binary, [:safe]) + entries = decode_entries(entries_binary) + {:ok, rev, entries, created_at, metadata} + else + {:error, :enoent} -> :not_found + {:error, _reason} -> :not_found + end + end + + defp append_to_file(file_path, binary) do + case File.open(file_path, [:append, :binary], fn file -> + IO.binwrite(file, binary) + end) do + {:ok, :ok} -> :ok + {:ok, {:error, reason}} -> {:error, reason} + {:error, reason} -> {:error, reason} + end + end + + # Binary framing: <> for each entry + defp encode_entries(entries) do + Enum.reduce(entries, <<>>, fn entry, acc -> + term_binary = :erlang.term_to_binary(entry) + size = byte_size(term_binary) + acc <> <> + end) + end + + defp decode_entries(<<>>), do: [] + + defp decode_entries(<>) do + <> = rest + entry = :erlang.binary_to_term(term_binary, [:safe]) + [entry | decode_entries(remaining)] + end + + defp checkpoint_path(base_path, key) do + hash = :crypto.hash(:sha256, :erlang.term_to_binary(key)) |> Base.url_encode64(padding: false) + Path.join([base_path, "checkpoints", "#{hash}.term"]) + end + + defp thread_path(base_path, thread_id) do + Path.join([base_path, "threads", thread_id]) + end + + defp ensure_checkpoints_dir(base_path) do + File.mkdir_p!(Path.join(base_path, "checkpoints")) + end + + defp ensure_thread_dir(thread_dir) do + File.mkdir_p!(thread_dir) + + # Ensure entries.log exists + entries_file = Path.join(thread_dir, "entries.log") + + unless File.exists?(entries_file) do + File.write!(entries_file, <<>>) + end + end + + defp with_thread_lock(thread_id, fun) do + lock_id = {:jido_thread_lock, thread_id} + + :global.trans(lock_id, fn -> + fun.() + end) + end + + defp generate_entry_id do + "entry_" <> Base.url_encode64(:crypto.strong_rand_bytes(12), padding: false) + end +end diff --git a/lib/jido/telemetry.ex b/lib/jido/telemetry.ex index 4efa71b..ec69229 100644 --- a/lib/jido/telemetry.ex +++ b/lib/jido/telemetry.ex @@ -1,10 +1,52 @@ defmodule Jido.Telemetry do @moduledoc """ - Handles telemetry events for Jido Agent and Strategy operations. + Production-ready telemetry for Jido Agent operations. - This module provides telemetry integration for agent command execution - and strategy lifecycle events. It tracks execution time, success/failure - rates, and provides debugging insights. + Provides structured, scannable logging with intelligent filtering to reduce noise + while preserving actionable debugging information. + + ## Log Levels + + The telemetry system uses three effective log levels: + + - **INFO** - Developer narrative for user-facing interactions (request start/stop) + - **DEBUG** - Interesting events only (slow operations, signals with directives, errors) + - **TRACE** - Fine-grained internal churn (every signal/directive) - opt-in via config + + ## Configuration + + Configure via application environment: + + config :jido, :telemetry, + log_level: :debug, # :trace | :debug | :info + slow_signal_threshold_ms: 10, # Log signals slower than this + slow_directive_threshold_ms: 5, # Log directives slower than this + interesting_signal_types: [ # Always log these signal types + "jido.agent.user_request", + "jido.tool.result", + "jido.llm.done" + ], + log_prompts: false, # Privacy: don't log LLM prompts + log_tool_args: :keys_only # :keys_only | :full | :none + + ## "Interestingness" Filtering + + At DEBUG level, signals are only logged if they are "interesting": + - Duration exceeds `slow_signal_threshold_ms` + - Produced one or more directives + - Signal type is in `interesting_signal_types` + - An error occurred + + This reduces log spam from high-frequency internal signals while preserving + visibility into operations that matter. + + ## Structured Output + + All log entries include structured metadata for filtering and correlation: + - `trace_id`, `span_id` - For distributed tracing + - `agent_id`, `agent_module` - Agent identification + - `signal_type`, `directive_count`, `directive_types` - What happened + - `duration` - Formatted timing (e.g., "12.3ms") ## Events @@ -32,20 +74,14 @@ defmodule Jido.Telemetry do - `[:jido, :agent, :strategy, :tick, :start]` - Strategy tick started - `[:jido, :agent, :strategy, :tick, :stop]` - Strategy tick completed - `[:jido, :agent, :strategy, :tick, :exception]` - Strategy tick failed - - ## Metadata - - All events include metadata about the agent, action, and strategy: - - `:agent_id` - The agent's unique identifier - - `:agent_module` - The agent module name - - `:strategy` - The strategy module name - - `:action` - The action being executed (for cmd events) - - `:directive_count` - Number of directives produced (for stop events) """ use GenServer require Logger + alias Jido.Telemetry.Config + alias Jido.Telemetry.Formatter + @typedoc """ Supported telemetry event names. """ @@ -212,53 +248,80 @@ defmodule Jido.Telemetry do @doc """ Handles telemetry events for agent and strategy operations. + + Uses intelligent filtering to reduce noise while preserving actionable information. + Events are logged based on "interestingness" criteria configured via `Jido.Telemetry.Config`. """ @spec handle_event(event_name(), measurements(), metadata(), config :: term()) :: :ok - def handle_event([:jido, :agent, :cmd, :start], _measurements, metadata, _config) do - Logger.debug("[Agent] Command started", - agent_id: metadata[:agent_id], - agent_module: metadata[:agent_module], - action: inspect(metadata[:action]) - ) + + # --------------------------------------------------------------------------- + # Agent Command Events + # --------------------------------------------------------------------------- + + def handle_event([:jido, :agent, :cmd, :start], _measurements, _metadata, _config) do + :ok end def handle_event([:jido, :agent, :cmd, :stop], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) + duration_ms = Formatter.to_ms(duration) + directive_count = metadata[:directive_count] || 0 + + if interesting_agent_cmd?(duration_ms, directive_count, metadata) do + Logger.debug( + fn -> + "[agent.cmd] #{format_module(metadata[:agent_module])} " <> + "action=#{Formatter.format_action(metadata[:action])} " <> + "directives=#{directive_count} " <> + "duration=#{Formatter.format_duration(duration)}" + end, + agent_id: metadata[:agent_id], + trace_id: metadata[:trace_id], + span_id: metadata[:span_id] + ) + end - Logger.debug("[Agent] Command completed", - agent_id: metadata[:agent_id], - agent_module: metadata[:agent_module], - duration_μs: duration, - directive_count: metadata[:directive_count] - ) + :ok end def handle_event([:jido, :agent, :cmd, :exception], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) - Logger.warning("[Agent] Command failed", + Logger.warning( + "[agent.cmd.error] #{format_module(metadata[:agent_module])} " <> + "action=#{Formatter.format_action(metadata[:action])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}", agent_id: metadata[:agent_id], - agent_module: metadata[:agent_module], - duration_μs: duration, - error: inspect(metadata[:error]) + trace_id: metadata[:trace_id], + span_id: metadata[:span_id], + stacktrace: metadata[:stacktrace] ) end - def handle_event([:jido, :agent, :strategy, :init, :start], _measurements, metadata, _config) do - Logger.debug("[Strategy] Initialization started", - agent_id: metadata[:agent_id], - strategy: metadata[:strategy] - ) + # --------------------------------------------------------------------------- + # Strategy Events + # --------------------------------------------------------------------------- + + def handle_event([:jido, :agent, :strategy, :init, :start], _measurements, _metadata, _config) do + :ok end def handle_event([:jido, :agent, :strategy, :init, :stop], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) + + if Config.trace_enabled?() do + Logger.debug( + fn -> + "[strategy.init] #{format_module(metadata[:strategy])} " <> + "duration=#{Formatter.format_duration(duration)}" + end, + agent_id: metadata[:agent_id], + trace_id: metadata[:trace_id] + ) + end - Logger.debug("[Strategy] Initialization completed", - agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - duration_μs: duration - ) + :ok end def handle_event( @@ -267,33 +330,40 @@ defmodule Jido.Telemetry do metadata, _config ) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) - Logger.warning("[Strategy] Initialization failed", + Logger.warning( + "[strategy.init.error] #{format_module(metadata[:strategy])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}", agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - duration_μs: duration, - error: inspect(metadata[:error]) + trace_id: metadata[:trace_id], + stacktrace: metadata[:stacktrace] ) end - def handle_event([:jido, :agent, :strategy, :cmd, :start], _measurements, metadata, _config) do - Logger.debug("[Strategy] Command execution started", - agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - instruction_count: metadata[:instruction_count] - ) + def handle_event([:jido, :agent, :strategy, :cmd, :start], _measurements, _metadata, _config) do + :ok end def handle_event([:jido, :agent, :strategy, :cmd, :stop], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) + duration_ms = Formatter.to_ms(duration) + directive_count = metadata[:directive_count] || 0 + + if interesting_strategy_cmd?(duration_ms, directive_count) do + Logger.debug( + fn -> + "[strategy.cmd] #{format_module(metadata[:strategy])} " <> + "directives=#{directive_count} " <> + "duration=#{Formatter.format_duration(duration)}" + end, + agent_id: metadata[:agent_id], + trace_id: metadata[:trace_id] + ) + end - Logger.debug("[Strategy] Command execution completed", - agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - duration_μs: duration, - directive_count: metadata[:directive_count] - ) + :ok end def handle_event( @@ -302,31 +372,39 @@ defmodule Jido.Telemetry do metadata, _config ) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) - Logger.warning("[Strategy] Command execution failed", + Logger.warning( + "[strategy.cmd.error] #{format_module(metadata[:strategy])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}", agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - duration_μs: duration, - error: inspect(metadata[:error]) + trace_id: metadata[:trace_id], + stacktrace: metadata[:stacktrace] ) end - def handle_event([:jido, :agent, :strategy, :tick, :start], _measurements, metadata, _config) do - Logger.debug("[Strategy] Tick started", - agent_id: metadata[:agent_id], - strategy: metadata[:strategy] - ) + def handle_event([:jido, :agent, :strategy, :tick, :start], _measurements, _metadata, _config) do + :ok end def handle_event([:jido, :agent, :strategy, :tick, :stop], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) + duration_ms = Formatter.to_ms(duration) + + # Only log slow ticks - ticks are high frequency + if duration_ms > Config.slow_signal_threshold_ms() do + Logger.debug( + fn -> + "[strategy.tick] #{format_module(metadata[:strategy])} " <> + "duration=#{Formatter.format_duration(duration)} (slow)" + end, + agent_id: metadata[:agent_id], + trace_id: metadata[:trace_id] + ) + end - Logger.debug("[Strategy] Tick completed", - agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - duration_μs: duration - ) + :ok end def handle_event( @@ -335,36 +413,48 @@ defmodule Jido.Telemetry do metadata, _config ) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) - Logger.warning("[Strategy] Tick failed", + Logger.warning( + "[strategy.tick.error] #{format_module(metadata[:strategy])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}", agent_id: metadata[:agent_id], - strategy: metadata[:strategy], - duration_μs: duration, - error: inspect(metadata[:error]) + trace_id: metadata[:trace_id], + stacktrace: metadata[:stacktrace] ) end # --------------------------------------------------------------------------- - # AgentServer Event Handlers + # AgentServer Signal Events - The main source of log noise # --------------------------------------------------------------------------- - def handle_event([:jido, :agent_server, :signal, :start], _measurements, metadata, _config) do - Logger.debug("[AgentServer] Signal processing started", - agent_id: metadata[:agent_id], - signal_type: metadata[:signal_type] - ) + def handle_event([:jido, :agent_server, :signal, :start], _measurements, _metadata, _config) do + :ok end def handle_event([:jido, :agent_server, :signal, :stop], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) + duration_ms = Formatter.to_ms(duration) + directive_count = metadata[:directive_count] || 0 + signal_type = metadata[:signal_type] + + cond do + # At trace level, log everything + Config.trace_enabled?() -> + log_signal_stop(metadata, duration, directive_count) + + # At debug level, only log "interesting" signals + Config.debug_enabled?() and + interesting_signal?(signal_type, duration_ms, directive_count, metadata) -> + log_signal_stop(metadata, duration, directive_count) + + # Otherwise, stay silent + true -> + :ok + end - Logger.debug("[AgentServer] Signal processing completed", - agent_id: metadata[:agent_id], - signal_type: metadata[:signal_type], - duration_μs: duration, - directive_count: metadata[:directive_count] - ) + :ok end def handle_event( @@ -373,32 +463,47 @@ defmodule Jido.Telemetry do metadata, _config ) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) - Logger.warning("[AgentServer] Signal processing failed", + Logger.warning( + "[signal.error] type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}", agent_id: metadata[:agent_id], - signal_type: metadata[:signal_type], - duration_μs: duration, - error: inspect(metadata[:error]) + trace_id: metadata[:trace_id], + span_id: metadata[:span_id], + stacktrace: metadata[:stacktrace] ) end - def handle_event([:jido, :agent_server, :directive, :start], _measurements, metadata, _config) do - Logger.debug("[AgentServer] Directive execution started", - agent_id: metadata[:agent_id], - directive_type: metadata[:directive_type] - ) + # --------------------------------------------------------------------------- + # AgentServer Directive Events + # --------------------------------------------------------------------------- + + def handle_event([:jido, :agent_server, :directive, :start], _measurements, _metadata, _config) do + :ok end def handle_event([:jido, :agent_server, :directive, :stop], measurements, metadata, _config) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) + duration_ms = Formatter.to_ms(duration) + directive_type = metadata[:directive_type] + + cond do + # At trace level, log everything + Config.trace_enabled?() -> + log_directive_stop(metadata, duration) + + # At debug level, only log slow or interesting directives + Config.debug_enabled?() and interesting_directive?(directive_type, duration_ms, metadata) -> + log_directive_stop(metadata, duration) + + # Otherwise, stay silent + true -> + :ok + end - Logger.debug("[AgentServer] Directive execution completed", - agent_id: metadata[:agent_id], - directive_type: metadata[:directive_type], - duration_μs: duration, - result: metadata[:result] - ) + :ok end def handle_event( @@ -407,24 +512,109 @@ defmodule Jido.Telemetry do metadata, _config ) do - duration = Map.get(measurements, :duration, 0) |> to_microseconds() + duration = Map.get(measurements, :duration, 0) - Logger.warning("[AgentServer] Directive execution failed", + Logger.warning( + "[directive.error] type=#{metadata[:directive_type]} " <> + "error=#{Formatter.safe_inspect(metadata[:error], 200)} " <> + "duration=#{Formatter.format_duration(duration)}", agent_id: metadata[:agent_id], - directive_type: metadata[:directive_type], - duration_μs: duration, - error: inspect(metadata[:error]) + trace_id: metadata[:trace_id], + span_id: metadata[:span_id], + stacktrace: metadata[:stacktrace] ) end def handle_event([:jido, :agent_server, :queue, :overflow], measurements, metadata, _config) do - Logger.warning("[AgentServer] Queue overflow", + Logger.warning( + "[queue.overflow] signal_type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> + "queue_size=#{measurements[:queue_size]}", + agent_id: metadata[:agent_id], + trace_id: metadata[:trace_id] + ) + end + + # --------------------------------------------------------------------------- + # Private: Logging Helpers + # --------------------------------------------------------------------------- + + defp log_signal_stop(metadata, duration, directive_count) do + Logger.debug( + fn -> + "[signal] type=#{Formatter.format_signal_type(metadata[:signal_type])} " <> + "directives=#{directive_count} " <> + "duration=#{Formatter.format_duration(duration)}" + end, + agent_id: metadata[:agent_id], + trace_id: metadata[:trace_id], + span_id: metadata[:span_id] + ) + end + + defp log_directive_stop(metadata, duration) do + Logger.debug( + fn -> + "[directive] type=#{metadata[:directive_type]} " <> + "result=#{metadata[:result]} " <> + "duration=#{Formatter.format_duration(duration)}" + end, agent_id: metadata[:agent_id], - signal_type: metadata[:signal_type], - queue_size: measurements[:queue_size] + trace_id: metadata[:trace_id], + span_id: metadata[:span_id] ) end + # --------------------------------------------------------------------------- + # Private: Interestingness Checks + # --------------------------------------------------------------------------- + + defp interesting_signal?(signal_type, duration_ms, directive_count, metadata) do + # A signal is interesting if any of these are true: + is_slow = duration_ms > Config.slow_signal_threshold_ms() + has_directives = directive_count > 0 + is_interesting_type = Config.interesting_signal_type?(to_string(signal_type)) + has_error = metadata[:error] != nil + + is_slow or has_directives or is_interesting_type or has_error + end + + defp interesting_directive?(directive_type, duration_ms, metadata) do + is_slow = duration_ms > Config.slow_directive_threshold_ms() + has_error = metadata[:error] != nil + + # Some directive types are always interesting + interesting_types = ["Tool", "LLM", "Await", "Spawn"] + is_interesting_type = directive_type in interesting_types + + is_slow or has_error or is_interesting_type + end + + defp interesting_agent_cmd?(duration_ms, directive_count, metadata) do + is_slow = duration_ms > Config.slow_signal_threshold_ms() + has_directives = directive_count > 0 + has_error = metadata[:error] != nil + + is_slow or has_directives or has_error + end + + defp interesting_strategy_cmd?(duration_ms, directive_count) do + is_slow = duration_ms > Config.slow_signal_threshold_ms() + has_directives = directive_count > 0 + + is_slow or has_directives + end + + defp format_module(nil), do: "unknown" + + defp format_module(module) when is_atom(module) do + case to_string(module) do + "Elixir." <> rest -> rest + other -> other + end + end + + defp format_module(other), do: Formatter.safe_inspect(other, 50) + @doc """ Executes an agent command while emitting telemetry events. @@ -538,9 +728,4 @@ defmodule Jido.Telemetry do :erlang.raise(kind, reason, stack) end end - - # Convert native monotonic time to microseconds for logging - defp to_microseconds(native_duration) do - System.convert_time_unit(native_duration, :native, :microsecond) - end end diff --git a/lib/jido/telemetry/config.ex b/lib/jido/telemetry/config.ex new file mode 100644 index 0000000..d07c8ef --- /dev/null +++ b/lib/jido/telemetry/config.ex @@ -0,0 +1,219 @@ +defmodule Jido.Telemetry.Config do + @moduledoc """ + Configuration for Jido's telemetry logging system. + + This module provides functions to check log levels, interestingness thresholds, + and privacy settings for telemetry events. + + ## Configuration + + All configuration is read from application environment under `:jido, :telemetry`: + + config :jido, :telemetry, + log_level: :debug, # :trace | :debug | :info | :warning | :error + slow_signal_threshold_ms: 10, + slow_directive_threshold_ms: 5, + interesting_signal_types: ["jido.strategy.init", "jido.strategy.complete"], + log_args: :keys_only # :keys_only | :full | :none + + ## Log Levels + + The telemetry system supports five log levels in order of verbosity: + + - `:trace` - Very verbose, logs every signal and directive + - `:debug` - Logs interesting events and slow operations + - `:info` - Logs significant lifecycle events + - `:warning` - Logs potential issues + - `:error` - Logs only errors + + ## Examples + + # Check if trace logging is enabled + if Jido.Telemetry.Config.trace_enabled?() do + # Log detailed per-signal information + end + + # Check if an operation is slow + if duration_ms > Jido.Telemetry.Config.slow_signal_threshold_ms() do + # Log as interesting even at higher log levels + end + + # Check argument logging settings + case Jido.Telemetry.Config.log_args?() do + :full -> log_full_args(args) + :keys_only -> log_keys(args) + :none -> :skip + end + """ + + @default_log_level :debug + @default_slow_signal_threshold_ms 10 + @default_slow_directive_threshold_ms 5 + @default_interesting_signal_types [ + "jido.strategy.init", + "jido.strategy.complete" + ] + @default_log_args :keys_only + + @log_level_priority %{ + trace: 0, + debug: 1, + info: 2, + warning: 3, + error: 4 + } + + # Compile-time defaults for efficiency + @compile_log_level Application.compile_env(:jido, [:telemetry, :log_level], @default_log_level) + + @doc """ + Returns the current log level. + + ## Examples + + iex> Jido.Telemetry.Config.log_level() + :debug + """ + @spec log_level() :: :trace | :debug | :info | :warning | :error + def log_level do + get_config(:log_level, @compile_log_level) + end + + @doc """ + Returns true if trace-level logging is enabled. + + Trace level is the most verbose, logging every signal and directive. + + ## Examples + + iex> Jido.Telemetry.Config.trace_enabled?() + false + """ + @spec trace_enabled?() :: boolean() + def trace_enabled? do + level_enabled?(:trace) + end + + @doc """ + Returns true if debug-level logging is enabled. + + ## Examples + + iex> Jido.Telemetry.Config.debug_enabled?() + true + """ + @spec debug_enabled?() :: boolean() + def debug_enabled? do + level_enabled?(:debug) + end + + @doc """ + Returns true if the given log level is enabled based on current configuration. + + ## Examples + + iex> Jido.Telemetry.Config.level_enabled?(:debug) + true + + iex> Jido.Telemetry.Config.level_enabled?(:trace) + false + """ + @spec level_enabled?(:trace | :debug | :info | :warning | :error) :: boolean() + def level_enabled?(level) do + current = log_level() + Map.get(@log_level_priority, level, 5) >= Map.get(@log_level_priority, current, 1) + end + + @doc """ + Returns the slow signal threshold in milliseconds. + + Signals taking longer than this are considered "interesting" and logged at debug level. + + Default: #{@default_slow_signal_threshold_ms}ms + + ## Examples + + iex> Jido.Telemetry.Config.slow_signal_threshold_ms() + 10 + """ + @spec slow_signal_threshold_ms() :: non_neg_integer() + def slow_signal_threshold_ms do + get_config(:slow_signal_threshold_ms, @default_slow_signal_threshold_ms) + end + + @doc """ + Returns the slow directive threshold in milliseconds. + + Directives taking longer than this are considered "interesting" and logged at debug level. + + Default: #{@default_slow_directive_threshold_ms}ms + + ## Examples + + iex> Jido.Telemetry.Config.slow_directive_threshold_ms() + 5 + """ + @spec slow_directive_threshold_ms() :: non_neg_integer() + def slow_directive_threshold_ms do + get_config(:slow_directive_threshold_ms, @default_slow_directive_threshold_ms) + end + + @doc """ + Returns the list of signal types that are always considered "interesting". + + These signals are logged at debug level regardless of duration. + + Default: #{inspect(@default_interesting_signal_types)} + + ## Examples + + iex> "jido.strategy.init" in Jido.Telemetry.Config.interesting_signal_types() + true + """ + @spec interesting_signal_types() :: [String.t()] + def interesting_signal_types do + get_config(:interesting_signal_types, @default_interesting_signal_types) + end + + @doc """ + Returns true if the given signal type is considered "interesting". + + ## Examples + + iex> Jido.Telemetry.Config.interesting_signal_type?("jido.strategy.init") + true + + iex> Jido.Telemetry.Config.interesting_signal_type?("jido.some.random.signal") + false + """ + @spec interesting_signal_type?(String.t()) :: boolean() + def interesting_signal_type?(signal_type) do + signal_type in interesting_signal_types() + end + + @doc """ + Returns the action/directive arguments logging mode. + + - `:full` - Log complete arguments + - `:keys_only` - Log only the keys of arguments (default) + - `:none` - Do not log arguments + + Default: #{inspect(@default_log_args)} + + ## Examples + + iex> Jido.Telemetry.Config.log_args?() + :keys_only + """ + @spec log_args?() :: :keys_only | :full | :none + def log_args? do + get_config(:log_args, @default_log_args) + end + + # Private helpers + + defp get_config(key, default) do + Application.get_env(:jido, :telemetry, []) + |> Keyword.get(key, default) + end +end diff --git a/lib/jido/telemetry/formatter.ex b/lib/jido/telemetry/formatter.ex new file mode 100644 index 0000000..570f6e9 --- /dev/null +++ b/lib/jido/telemetry/formatter.ex @@ -0,0 +1,351 @@ +defmodule Jido.Telemetry.Formatter do + @moduledoc """ + Structured log formatting utilities for the Jido telemetry system. + + Provides functions to format durations, metadata, directives, and other + telemetry data into human-readable, scannable log output. + + ## Examples + + iex> Jido.Telemetry.Formatter.format_duration(1_500_000) + "1.5ms" + + iex> Jido.Telemetry.Formatter.format_metadata(%{agent_id: "abc123", trace_id: "xyz"}) + "agent_id=abc123 trace_id=xyz" + """ + + @default_max_length 100 + @default_value_max_length 50 + + @doc """ + Convert native time to human-readable format. + + Returns a string like "1.2ms", "342μs", or "2.1s" depending on the magnitude. + + ## Examples + + iex> Formatter.format_duration(1_000) + "1μs" + + iex> Formatter.format_duration(1_500_000) + "1.5ms" + + iex> Formatter.format_duration(2_500_000_000) + "2.5s" + """ + @spec format_duration(integer() | nil) :: String.t() + def format_duration(nil), do: "0μs" + + def format_duration(native_time) when is_integer(native_time) do + microseconds = System.convert_time_unit(native_time, :native, :microsecond) + + cond do + microseconds >= 1_000_000 -> + seconds = microseconds / 1_000_000 + "#{Float.round(seconds, 2)}s" + + microseconds >= 1_000 -> + ms = microseconds / 1_000 + "#{Float.round(ms, 2)}ms" + + true -> + "#{microseconds}μs" + end + end + + def format_duration(_), do: "??" + + @doc """ + Convert native time to milliseconds. + + Returns an integer for times >= 1ms, otherwise a float with precision. + + ## Examples + + iex> Formatter.to_ms(1_500_000) + 1.5 + + iex> Formatter.to_ms(5_000_000) + 5 + """ + @spec to_ms(integer() | nil) :: number() + def to_ms(nil), do: 0 + + def to_ms(native_time) when is_integer(native_time) do + microseconds = System.convert_time_unit(native_time, :native, :microsecond) + ms = microseconds / 1_000 + + if ms == trunc(ms) do + trunc(ms) + else + Float.round(ms, 3) + end + end + + def to_ms(_), do: 0 + + @doc """ + Format metadata map into a scannable key=value string for logs. + + Handles nil values gracefully and truncates long values. + + ## Options + + * `:max_value_length` - Maximum length for individual values (default: 50) + + ## Examples + + iex> Formatter.format_metadata(%{agent_id: "abc123", trace_id: "xyz"}) + "agent_id=abc123 trace_id=xyz" + + iex> Formatter.format_metadata(%{key: nil, other: "value"}) + "other=value" + + iex> Formatter.format_metadata(nil) + "" + """ + @spec format_metadata(map() | nil, keyword()) :: String.t() + def format_metadata(metadata, opts \\ []) + def format_metadata(nil, _opts), do: "" + + def format_metadata(metadata, opts) when is_map(metadata) do + max_value_length = Keyword.get(opts, :max_value_length, @default_value_max_length) + + metadata + |> Enum.reject(fn {_k, v} -> is_nil(v) end) + |> Enum.sort_by(fn {k, _v} -> to_string(k) end) + |> Enum.map(fn {k, v} -> + formatted_value = format_value(v, max_value_length) + "#{k}=#{formatted_value}" + end) + |> Enum.join(" ") + end + + def format_metadata(_, _opts), do: "" + + defp format_value(value, max_length) when is_binary(value) do + truncate_string(value, max_length) + end + + defp format_value(value, _max_length) when is_atom(value) do + to_string(value) + end + + defp format_value(value, _max_length) when is_number(value) do + to_string(value) + end + + defp format_value(value, max_length) do + value + |> inspect(limit: 10, printable_limit: max_length) + |> truncate_string(max_length) + end + + defp truncate_string(str, max_length) when byte_size(str) <= max_length, do: str + + defp truncate_string(str, max_length) do + String.slice(str, 0, max_length - 3) <> "..." + end + + @doc """ + Returns a map of directive_type => count from a list of directives. + + ## Examples + + iex> directives = [%{type: :emit}, %{type: :emit}, %{type: :tool}] + iex> Formatter.summarize_directives(directives) + %{emit: 2, tool: 1} + + iex> Formatter.summarize_directives([]) + %{} + """ + @spec summarize_directives(list()) :: map() + def summarize_directives(nil), do: %{} + + def summarize_directives(directives) when is_list(directives) do + directives + |> Enum.map(&extract_directive_type/1) + |> Enum.reject(&is_nil/1) + |> Enum.frequencies() + end + + def summarize_directives(_), do: %{} + + defp extract_directive_type(%{type: type}), do: type + defp extract_directive_type({type, _}) when is_atom(type), do: type + defp extract_directive_type(type) when is_atom(type), do: type + defp extract_directive_type(_), do: nil + + @doc """ + Format directive summary map for logging. + + ## Examples + + iex> Formatter.format_directive_types(%{emit: 2, tool: 1, await: 1}) + "Await=1 Emit=2 Tool=1" + + iex> Formatter.format_directive_types(%{}) + "" + """ + @spec format_directive_types(map()) :: String.t() + def format_directive_types(nil), do: "" + def format_directive_types(summary) when map_size(summary) == 0, do: "" + + def format_directive_types(summary) when is_map(summary) do + summary + |> Enum.sort_by(fn {k, _v} -> to_string(k) end) + |> Enum.map(fn {type, count} -> + formatted_type = type |> to_string() |> String.capitalize() + "#{formatted_type}=#{count}" + end) + |> Enum.join(" ") + end + + def format_directive_types(_), do: "" + + @doc """ + Format signal type for logging. + + Handles atoms and strings, converting to a readable format. + + ## Examples + + iex> Formatter.format_signal_type(:user_request) + "user_request" + + iex> Formatter.format_signal_type("api_call") + "api_call" + + iex> Formatter.format_signal_type(nil) + "unknown" + """ + @spec format_signal_type(atom() | String.t() | nil) :: String.t() + def format_signal_type(nil), do: "unknown" + def format_signal_type(type) when is_atom(type), do: to_string(type) + def format_signal_type(type) when is_binary(type), do: type + def format_signal_type(_), do: "unknown" + + @doc """ + Summarize action for logging without exposing full data. + + Shows the action/module name and parameter keys but not values. + + ## Examples + + iex> Formatter.format_action({:analyze, %{query: "secret", context: %{}}}) + "{:analyze, keys: [:context, :query]}" + + iex> Formatter.format_action(MyApp.Actions.ProcessData) + "MyApp.Actions.ProcessData" + + iex> Formatter.format_action(%{action: :run, params: %{x: 1}}) + "{:run, keys: [:x]}" + """ + @spec format_action(term()) :: String.t() + def format_action(nil), do: "nil" + + def format_action({action, params}) when is_atom(action) and is_map(params) do + keys = params |> Map.keys() |> Enum.sort() + "{#{inspect(action)}, keys: #{inspect(keys)}}" + end + + def format_action({action, params}) when is_atom(action) and is_list(params) do + "{#{inspect(action)}, arity: #{length(params)}}" + end + + def format_action(%{action: action, params: params}) when is_map(params) do + keys = params |> Map.keys() |> Enum.sort() + "{#{inspect(action)}, keys: #{inspect(keys)}}" + end + + def format_action(%{action: action}) do + inspect(action) + end + + def format_action(module) when is_atom(module) do + case to_string(module) do + "Elixir." <> rest -> rest + other -> other + end + end + + def format_action(action) do + safe_inspect(action, 60) + end + + @doc """ + Inspect a term with a length limit. + + Safely inspects any term and truncates if necessary. + + ## Examples + + iex> Formatter.safe_inspect(%{a: 1, b: 2}) + "%{a: 1, b: 2}" + + iex> Formatter.safe_inspect(String.duplicate("x", 200), 50) + "\\\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx..." + """ + @spec safe_inspect(term(), pos_integer()) :: String.t() + def safe_inspect(term, max_length \\ @default_max_length) do + term + |> inspect(limit: 10, printable_limit: max_length, width: max_length) + |> truncate_string(max_length) + rescue + _ -> "" + end + + @doc """ + Extract just the keys from a map for safe logging. + + Useful when you want to log what fields are present without exposing values. + + ## Examples + + iex> Formatter.extract_keys(%{user_id: 123, secret: "password", data: %{}}) + [:data, :secret, :user_id] + + iex> Formatter.extract_keys(nil) + [] + + iex> Formatter.extract_keys("not a map") + [] + """ + @spec extract_keys(map() | nil) :: list(atom() | String.t()) + def extract_keys(nil), do: [] + + def extract_keys(map) when is_map(map) do + map |> Map.keys() |> Enum.sort() + end + + def extract_keys(_), do: [] + + @doc """ + Format a complete log line with standard telemetry fields. + + Combines multiple formatting functions into a single log-ready string. + + ## Examples + + iex> Formatter.format_log_line(%{ + ...> agent_id: "agent-1", + ...> trace_id: "trace-abc", + ...> signal_type: :user_request, + ...> duration: 1_500_000 + ...> }) + "agent_id=agent-1 duration=1.5ms signal_type=user_request trace_id=trace-abc" + """ + @spec format_log_line(map()) :: String.t() + def format_log_line(fields) when is_map(fields) do + fields + |> Enum.map(fn + {:duration, v} -> {:duration, format_duration(v)} + {:signal_type, v} -> {:signal_type, format_signal_type(v)} + {k, v} -> {k, v} + end) + |> Map.new() + |> format_metadata() + end + + def format_log_line(_), do: "" +end diff --git a/lib/jido/thread.ex b/lib/jido/thread.ex new file mode 100644 index 0000000..3db724b --- /dev/null +++ b/lib/jido/thread.ex @@ -0,0 +1,147 @@ +defmodule Jido.Thread do + @moduledoc """ + An append-only log of interaction entries. + + Thread is the canonical record of "what happened" in a conversation + or workflow. It is provider-agnostic and never modified destructively. + + LLM context is derived from Thread via projection functions, not + stored directly in Thread. + + ## Example + + thread = Thread.new(metadata: %{user_id: "u1"}) + + thread = Thread.append(thread, %{ + kind: :message, + payload: %{role: "user", content: "Hello"} + }) + + Thread.entry_count(thread) # => 1 + Thread.last(thread).kind # => :message + """ + + alias Jido.Thread.Entry + + @schema Zoi.struct( + __MODULE__, + %{ + id: Zoi.string(description: "Unique thread identifier"), + rev: + Zoi.integer(description: "Monotonic revision, increments on append") + |> Zoi.default(0), + entries: + Zoi.list(Zoi.any(), description: "Ordered list of Entry structs") + |> Zoi.default([]), + created_at: Zoi.integer(description: "Creation timestamp (ms)"), + updated_at: Zoi.integer(description: "Last update timestamp (ms)"), + metadata: Zoi.map(description: "Arbitrary metadata") |> Zoi.default(%{}), + stats: Zoi.map(description: "Cached aggregates") |> Zoi.default(%{entry_count: 0}) + }, + coerce: true + ) + + @type t :: unquote(Zoi.type_spec(@schema)) + @enforce_keys Zoi.Struct.enforce_keys(@schema) + defstruct Zoi.Struct.struct_fields(@schema) + + @doc "Create a new empty thread" + @spec new(keyword()) :: t() + def new(opts \\ []) do + now = opts[:now] || System.system_time(:millisecond) + + %__MODULE__{ + id: opts[:id] || generate_id(), + rev: 0, + entries: [], + created_at: now, + updated_at: now, + metadata: opts[:metadata] || %{}, + stats: %{entry_count: 0} + } + end + + @doc "Append entries to thread (returns new thread)" + @spec append(t(), Entry.t() | map() | [Entry.t() | map()]) :: t() + def append(%__MODULE__{} = thread, entries) do + entries = List.wrap(entries) + now = System.system_time(:millisecond) + base_seq = length(thread.entries) + + prepared_entries = + entries + |> Enum.with_index() + |> Enum.map(fn {entry, idx} -> + prepare_entry(entry, base_seq + idx, now) + end) + + %{ + thread + | entries: thread.entries ++ prepared_entries, + rev: thread.rev + length(prepared_entries), + updated_at: now, + stats: %{thread.stats | entry_count: thread.stats.entry_count + length(prepared_entries)} + } + end + + @doc "Get entry count" + @spec entry_count(t()) :: non_neg_integer() + def entry_count(%__MODULE__{stats: %{entry_count: count}}), do: count + + @doc "Get last entry" + @spec last(t()) :: Entry.t() | nil + def last(%__MODULE__{entries: []}), do: nil + def last(%__MODULE__{entries: entries}), do: List.last(entries) + + @doc "Get entry by seq" + @spec get_entry(t(), non_neg_integer()) :: Entry.t() | nil + def get_entry(%__MODULE__{entries: entries}, seq) do + Enum.find(entries, &(&1.seq == seq)) + end + + @doc "Get all entries as list" + @spec to_list(t()) :: [Entry.t()] + def to_list(%__MODULE__{entries: entries}), do: entries + + @doc "Filter entries by kind" + @spec filter_by_kind(t(), atom() | [atom()]) :: [Entry.t()] + def filter_by_kind(%__MODULE__{entries: entries}, kinds) when is_list(kinds) do + Enum.filter(entries, &(&1.kind in kinds)) + end + + def filter_by_kind(thread, kind), do: filter_by_kind(thread, [kind]) + + @doc "Get entries in seq range (inclusive)" + @spec slice(t(), non_neg_integer(), non_neg_integer()) :: [Entry.t()] + def slice(%__MODULE__{entries: entries}, from_seq, to_seq) do + Enum.filter(entries, fn e -> e.seq >= from_seq and e.seq <= to_seq end) + end + + defp prepare_entry(%Entry{} = entry, seq, now) do + %{ + entry + | id: entry.id || generate_entry_id(), + seq: seq, + at: entry.at || now + } + end + + defp prepare_entry(attrs, seq, now) when is_map(attrs) do + %Entry{ + id: attrs[:id] || attrs["id"] || generate_entry_id(), + seq: seq, + at: attrs[:at] || attrs["at"] || now, + kind: attrs[:kind] || attrs["kind"] || :note, + payload: attrs[:payload] || attrs["payload"] || %{}, + refs: attrs[:refs] || attrs["refs"] || %{} + } + end + + defp generate_id do + "thread_" <> Jido.Util.generate_id() + end + + defp generate_entry_id do + "entry_" <> Jido.Util.generate_id() + end +end diff --git a/lib/jido/thread/agent.ex b/lib/jido/thread/agent.ex new file mode 100644 index 0000000..43c9d2f --- /dev/null +++ b/lib/jido/thread/agent.ex @@ -0,0 +1,70 @@ +defmodule Jido.Thread.Agent do + @moduledoc """ + Helper for managing Thread in agent state. + + Thread is stored at the reserved key `:__thread__` in `agent.state`. + This follows the same pattern as `:__strategy__` for strategy state. + + ## Example + + alias Jido.Thread.Agent, as: ThreadAgent + + # Ensure agent has a thread + agent = ThreadAgent.ensure(agent, metadata: %{user_id: "u1"}) + + # Append an entry + agent = ThreadAgent.append(agent, %{kind: :message, payload: %{text: "hi"}}) + + # Get the thread + thread = ThreadAgent.get(agent) + """ + + alias Jido.Agent + alias Jido.Thread + + @key :__thread__ + + @doc "Returns the reserved key for thread storage" + @spec key() :: atom() + def key, do: @key + + @doc "Get thread from agent state" + @spec get(Agent.t(), Thread.t() | nil) :: Thread.t() | nil + def get(%Agent{state: state}, default \\ nil) do + Map.get(state, @key, default) + end + + @doc "Put thread into agent state" + @spec put(Agent.t(), Thread.t()) :: Agent.t() + def put(%Agent{} = agent, %Thread{} = thread) do + %{agent | state: Map.put(agent.state, @key, thread)} + end + + @doc "Update thread using a function" + @spec update(Agent.t(), (Thread.t() | nil -> Thread.t())) :: Agent.t() + def update(%Agent{} = agent, fun) when is_function(fun, 1) do + current = get(agent) + put(agent, fun.(current)) + end + + @doc "Ensure agent has a thread (initialize if missing)" + @spec ensure(Agent.t(), keyword()) :: Agent.t() + def ensure(%Agent{} = agent, opts \\ []) do + case get(agent) do + nil -> put(agent, Thread.new(opts)) + _thread -> agent + end + end + + @doc "Append entry to agent's thread (ensures thread exists)" + @spec append(Agent.t(), term(), keyword()) :: Agent.t() + def append(%Agent{} = agent, entry_or_entries, opts \\ []) do + agent = ensure(agent, opts) + thread = get(agent) + put(agent, Thread.append(thread, entry_or_entries)) + end + + @doc "Check if agent has a thread" + @spec has_thread?(Agent.t()) :: boolean() + def has_thread?(%Agent{} = agent), do: get(agent) != nil +end diff --git a/lib/jido/thread/entry.ex b/lib/jido/thread/entry.ex new file mode 100644 index 0000000..2e8cf10 --- /dev/null +++ b/lib/jido/thread/entry.ex @@ -0,0 +1,62 @@ +defmodule Jido.Thread.Entry do + @moduledoc """ + A single entry in a Thread. Typed by `kind` with kind-specific payload. + + Entries are immutable once appended. The `refs` map provides cross-links + to other Jido primitives (signals, instructions, actions). + + ## Entry Kinds + + Kinds are open - any atom is accepted. Recommended kinds include: + - `:message` - User/assistant/system message + - `:tool_call` - Tool execution request + - `:tool_result` - Tool execution result + - `:signal_in` / `:signal_out` - Signal events + - `:instruction_start` / `:instruction_end` - Instruction execution + - `:note` - Human annotation + - `:error` - Error occurred + - `:checkpoint` - State snapshot marker + + ## Refs Conventions + + Common ref keys (not enforced): + - `signal_id` - Associated signal ID + - `instruction_id` - Associated instruction ID + - `action` - Action module name + - `agent_id` - Agent ID + - `parent_thread_id` / `child_thread_id` - Thread relationships + """ + + @schema Zoi.struct( + __MODULE__, + %{ + id: Zoi.string(description: "Unique entry identifier"), + seq: Zoi.integer(description: "Monotonic sequence within thread"), + at: Zoi.integer(description: "Timestamp (ms)"), + kind: Zoi.atom(description: "Entry type - open, any atom accepted"), + payload: Zoi.map(description: "Kind-specific data") |> Zoi.default(%{}), + refs: + Zoi.map(description: "Cross-references to other primitives") |> Zoi.default(%{}) + }, + coerce: true + ) + + @type t :: unquote(Zoi.type_spec(@schema)) + @enforce_keys Zoi.Struct.enforce_keys(@schema) + defstruct Zoi.Struct.struct_fields(@schema) + + @doc "Create a new entry from attributes" + @spec new(map() | keyword()) :: t() + def new(attrs) when is_list(attrs), do: new(Map.new(attrs)) + + def new(attrs) when is_map(attrs) do + %__MODULE__{ + id: attrs[:id] || Map.get(attrs, "id"), + seq: attrs[:seq] || Map.get(attrs, "seq") || 0, + at: attrs[:at] || Map.get(attrs, "at") || System.system_time(:millisecond), + kind: attrs[:kind] || Map.get(attrs, "kind") || :note, + payload: attrs[:payload] || Map.get(attrs, "payload") || %{}, + refs: attrs[:refs] || Map.get(attrs, "refs") || %{} + } + end +end diff --git a/lib/jido/thread/store.ex b/lib/jido/thread/store.ex new file mode 100644 index 0000000..13c575c --- /dev/null +++ b/lib/jido/thread/store.ex @@ -0,0 +1,105 @@ +defmodule Jido.Thread.Store do + @moduledoc """ + Persistence behavior for Thread storage. + + Store operations return updated store state to preserve purity + for adapters that don't use external processes. + + ## Example + + {:ok, store} = Thread.Store.new() + + thread = Thread.new(id: "t1") + {:ok, store} = Thread.Store.save(store, thread) + + {:ok, store, loaded} = Thread.Store.load(store, "t1") + """ + + alias Jido.Thread + alias Jido.Thread.Entry + + @type adapter_state :: term() + @type t :: %__MODULE__{adapter: module(), adapter_state: adapter_state()} + + defstruct [:adapter, :adapter_state] + + @doc "Initialize adapter state" + @callback init(opts :: keyword()) :: {:ok, adapter_state()} | {:error, term()} + + @doc "Load thread by ID" + @callback load(adapter_state(), thread_id :: String.t()) :: + {:ok, adapter_state(), Thread.t()} | {:error, adapter_state(), :not_found | term()} + + @doc "Save thread" + @callback save(adapter_state(), Thread.t()) :: + {:ok, adapter_state()} | {:error, adapter_state(), term()} + + @doc "Append entries to thread" + @callback append(adapter_state(), thread_id :: String.t(), [Entry.t()]) :: + {:ok, adapter_state(), Thread.t()} | {:error, adapter_state(), term()} + + @doc "Create new store with adapter" + @spec new(module(), keyword()) :: {:ok, t()} | {:error, term()} + def new(adapter \\ __MODULE__.Adapters.InMemory, opts \\ []) do + case adapter.init(opts) do + {:ok, state} -> {:ok, %__MODULE__{adapter: adapter, adapter_state: state}} + {:error, _} = error -> error + end + end + + @doc "Load thread from store" + @spec load(t(), String.t()) :: {:ok, t(), Thread.t()} | {:error, t(), term()} + def load(%__MODULE__{adapter: adapter, adapter_state: state} = store, thread_id) do + case adapter.load(state, thread_id) do + {:ok, new_state, thread} -> {:ok, %{store | adapter_state: new_state}, thread} + {:error, new_state, reason} -> {:error, %{store | adapter_state: new_state}, reason} + end + end + + @doc "Save thread to store" + @spec save(t(), Thread.t()) :: {:ok, t()} | {:error, t(), term()} + def save(%__MODULE__{adapter: adapter, adapter_state: state} = store, thread) do + case adapter.save(state, thread) do + {:ok, new_state} -> {:ok, %{store | adapter_state: new_state}} + {:error, new_state, reason} -> {:error, %{store | adapter_state: new_state}, reason} + end + end + + @doc "Append entries to thread in store" + @spec append(t(), String.t(), Entry.t() | [Entry.t()]) :: + {:ok, t(), Thread.t()} | {:error, t(), term()} + def append(%__MODULE__{adapter: adapter, adapter_state: state} = store, thread_id, entries) do + entries = List.wrap(entries) + + case adapter.append(state, thread_id, entries) do + {:ok, new_state, thread} -> {:ok, %{store | adapter_state: new_state}, thread} + {:error, new_state, reason} -> {:error, %{store | adapter_state: new_state}, reason} + end + end + + @doc "Delete thread from store" + @spec delete(t(), String.t()) :: {:ok, t()} | {:error, t(), term()} + def delete(%__MODULE__{adapter: adapter, adapter_state: state} = store, thread_id) do + if function_exported?(adapter, :delete, 2) do + case adapter.delete(state, thread_id) do + {:ok, new_state} -> {:ok, %{store | adapter_state: new_state}} + {:error, new_state, reason} -> {:error, %{store | adapter_state: new_state}, reason} + end + else + {:error, store, :not_implemented} + end + end + + @doc "List all thread IDs in store" + @spec list(t()) :: {:ok, t(), [String.t()]} | {:error, t(), term()} + def list(%__MODULE__{adapter: adapter, adapter_state: state} = store) do + if function_exported?(adapter, :list, 1) do + case adapter.list(state) do + {:ok, new_state, ids} -> {:ok, %{store | adapter_state: new_state}, ids} + {:error, new_state, reason} -> {:error, %{store | adapter_state: new_state}, reason} + end + else + {:error, store, :not_implemented} + end + end +end diff --git a/lib/jido/thread/store/adapters/in_memory.ex b/lib/jido/thread/store/adapters/in_memory.ex new file mode 100644 index 0000000..180c6fc --- /dev/null +++ b/lib/jido/thread/store/adapters/in_memory.ex @@ -0,0 +1,50 @@ +defmodule Jido.Thread.Store.Adapters.InMemory do + @moduledoc """ + Pure in-memory adapter storing threads in a map. + + No external processes - state is carried in adapter_state. + Thread is auto-created on append if it doesn't exist. + """ + + @behaviour Jido.Thread.Store + + alias Jido.Thread + + @impl true + def init(_opts) do + {:ok, %{threads: %{}}} + end + + @impl true + def load(%{threads: threads} = state, thread_id) do + case Map.get(threads, thread_id) do + nil -> {:error, state, :not_found} + thread -> {:ok, state, thread} + end + end + + @impl true + def save(%{threads: threads} = state, %Thread{id: id} = thread) do + {:ok, %{state | threads: Map.put(threads, id, thread)}} + end + + @impl true + def append(%{threads: threads} = state, thread_id, entries) do + thread = Map.get(threads, thread_id) || Thread.new(id: thread_id) + thread = Thread.append(thread, entries) + new_state = %{state | threads: Map.put(threads, thread_id, thread)} + {:ok, new_state, thread} + end + + @doc "Delete a thread" + @spec delete(%{threads: map()}, String.t()) :: {:ok, %{threads: map()}} + def delete(%{threads: threads} = state, thread_id) do + {:ok, %{state | threads: Map.delete(threads, thread_id)}} + end + + @doc "List all thread IDs" + @spec list(%{threads: map()}) :: {:ok, %{threads: map()}, [String.t()]} + def list(%{threads: threads} = state) do + {:ok, state, Map.keys(threads)} + end +end diff --git a/lib/jido/thread/store/adapters/journal_backed.ex b/lib/jido/thread/store/adapters/journal_backed.ex new file mode 100644 index 0000000..eb2e927 --- /dev/null +++ b/lib/jido/thread/store/adapters/journal_backed.ex @@ -0,0 +1,213 @@ +defmodule Jido.Thread.Store.Adapters.JournalBacked do + @moduledoc """ + Thread persistence adapter backed by Jido.Signal.Journal. + + Stores thread entries as signals with type "jido.thread.entry". + Thread ordering is by entry.seq (authoritative), not signal time. + + ## Mapping + + - `thread_id` → `signal.subject` + - Each entry → Signal with type "jido.thread.entry" + - `entry.seq` stored in signal.data for ordering (authoritative) + + ## Usage + + {:ok, store} = Jido.Thread.Store.new(Jido.Thread.Store.Adapters.JournalBacked) + + # With custom journal adapter + {:ok, store} = Jido.Thread.Store.new( + Jido.Thread.Store.Adapters.JournalBacked, + journal_adapter: Jido.Signal.Journal.Adapters.ETS + ) + """ + + @behaviour Jido.Thread.Store + + alias Jido.Signal + alias Jido.Signal.Journal + alias Jido.Thread + alias Jido.Thread.Entry + + @signal_type "jido.thread.entry" + + @impl true + def init(opts) do + adapter = Keyword.get(opts, :journal_adapter, Journal.Adapters.InMemory) + journal = Journal.new(adapter) + {:ok, %{journal: journal}} + end + + @impl true + def load(%{journal: journal} = state, thread_id) do + signals = Journal.get_conversation(journal, thread_id) + + entries = + signals + |> Enum.filter(&(&1.type == @signal_type)) + |> Enum.map(&decode_entry/1) + |> Enum.reject(&is_nil/1) + |> Enum.sort_by(& &1.seq) + + case entries do + [] -> + {:error, state, :not_found} + + entries -> + thread = reconstruct_thread(thread_id, entries) + {:ok, state, thread} + end + end + + @impl true + def save(%{journal: journal} = state, %Thread{} = thread) do + result = + thread.entries + |> Enum.reduce_while({:ok, journal}, fn entry, {:ok, j} -> + signal = encode_entry(thread.id, entry) + + case Journal.record(j, signal) do + {:ok, j} -> {:cont, {:ok, j}} + error -> {:halt, error} + end + end) + + case result do + {:ok, journal} -> {:ok, %{state | journal: journal}} + {:error, reason} -> {:error, state, reason} + end + end + + @impl true + def append(%{journal: journal} = state, thread_id, entries) do + {base_seq, existing_entries} = + case load(state, thread_id) do + {:ok, _, thread} -> {length(thread.entries), thread.entries} + {:error, _, :not_found} -> {0, []} + end + + now = System.system_time(:millisecond) + + prepared = + entries + |> Enum.with_index() + |> Enum.map(fn {entry, idx} -> + %Entry{ + id: get_entry_id(entry) || generate_id(), + seq: base_seq + idx, + at: get_entry_at(entry) || now, + kind: get_entry_kind(entry) || :note, + payload: get_entry_payload(entry) || %{}, + refs: get_entry_refs(entry) || %{} + } + end) + + result = + prepared + |> Enum.reduce_while({:ok, journal}, fn entry, {:ok, j} -> + signal = encode_entry(thread_id, entry) + + case Journal.record(j, signal) do + {:ok, j} -> {:cont, {:ok, j}} + error -> {:halt, error} + end + end) + + case result do + {:ok, journal} -> + thread = reconstruct_thread(thread_id, existing_entries ++ prepared) + {:ok, %{state | journal: journal}, thread} + + {:error, reason} -> + {:error, state, reason} + end + end + + defp get_entry_id(%Entry{id: id}), do: id + defp get_entry_id(%{id: id}), do: id + defp get_entry_id(_), do: nil + + defp get_entry_at(%Entry{at: at}), do: at + defp get_entry_at(%{at: at}), do: at + defp get_entry_at(_), do: nil + + defp get_entry_kind(%Entry{kind: kind}), do: kind + defp get_entry_kind(%{kind: kind}), do: kind + defp get_entry_kind(_), do: nil + + defp get_entry_payload(%Entry{payload: payload}), do: payload + defp get_entry_payload(%{payload: payload}), do: payload + defp get_entry_payload(_), do: nil + + defp get_entry_refs(%Entry{refs: refs}), do: refs + defp get_entry_refs(%{refs: refs}), do: refs + defp get_entry_refs(_), do: nil + + defp encode_entry(thread_id, %Entry{} = entry) do + Signal.new!(%{ + id: "sig_#{entry.id}", + type: @signal_type, + source: "jido.thread", + subject: thread_id, + time: DateTime.utc_now() |> DateTime.to_iso8601(), + data: %{ + entry_id: entry.id, + seq: entry.seq, + at: entry.at, + kind: entry.kind, + payload: entry.payload, + refs: entry.refs + } + }) + end + + defp decode_entry(%Signal{data: data}) when is_map(data) do + %Entry{ + id: data["entry_id"] || data[:entry_id], + seq: data["seq"] || data[:seq], + at: data["at"] || data[:at], + kind: to_atom(data["kind"] || data[:kind]), + payload: data["payload"] || data[:payload] || %{}, + refs: data["refs"] || data[:refs] || %{} + } + rescue + _ -> nil + end + + defp decode_entry(_), do: nil + + defp to_atom(atom) when is_atom(atom), do: atom + + defp to_atom(string) when is_binary(string) do + try do + String.to_existing_atom(string) + rescue + _ -> String.to_atom(string) + end + end + + defp to_atom(_), do: :unknown + + defp reconstruct_thread(thread_id, entries) do + timestamps = Enum.map(entries, & &1.at) + now = System.system_time(:millisecond) + + %Thread{ + id: thread_id, + rev: length(entries), + entries: entries, + created_at: Enum.min(timestamps, fn -> now end), + updated_at: Enum.max(timestamps, fn -> now end), + metadata: %{}, + stats: %{entry_count: length(entries)} + } + end + + defp generate_id, do: "entry_" <> random_string(12) + + defp random_string(length) do + :crypto.strong_rand_bytes(length) + |> Base.url_encode64() + |> binary_part(0, length) + end +end diff --git a/mix.exs b/mix.exs index 50fa284..1c27df1 100644 --- a/mix.exs +++ b/mix.exs @@ -95,7 +95,7 @@ defmodule Jido.MixProject do "guides/observability.md", "guides/testing.md", "guides/configuration.md", - "guides/persistence.md", + "guides/storage.md", "guides/worker-pools.md", "guides/scheduling.md" ], @@ -149,7 +149,7 @@ defmodule Jido.MixProject do {"guides/observability.md", title: "Observability"}, {"guides/testing.md", title: "Testing"}, {"guides/configuration.md", title: "Configuration"}, - {"guides/persistence.md", title: "Persistence"}, + {"guides/storage.md", title: "Persistence & Storage"}, {"guides/worker-pools.md", title: "Worker Pools"}, {"guides/scheduling.md", title: "Scheduling"}, @@ -191,10 +191,19 @@ defmodule Jido.MixProject do Jido.Agent.Strategy, Jido.Agent.Strategy.Direct, Jido.Agent.Strategy.FSM, - Jido.Agent.Strategy.State + Jido.Agent.Strategy.FSM.Machine, + Jido.Agent.Strategy.State, + Jido.Agent.Strategy.Snapshot ], Skills: [ - Jido.Skill + Jido.Skill, + Jido.Skill.Config, + Jido.Skill.Instance, + Jido.Skill.Manifest, + Jido.Skill.Requirements, + Jido.Skill.Routes, + Jido.Skill.Schedules, + Jido.Skill.Spec ], Directives: [ Jido.Agent.Directive, @@ -208,22 +217,85 @@ defmodule Jido.MixProject do Jido.Agent.Directive.Cron, Jido.Agent.Directive.CronCancel ], - "Agent Components": [ + "State Operations": [ + Jido.Agent.StateOp, + Jido.Agent.StateOp.SetState, + Jido.Agent.StateOp.ReplaceState, + Jido.Agent.StateOp.DeleteKeys, + Jido.Agent.StateOp.SetPath, + Jido.Agent.StateOp.DeletePath, + Jido.Agent.StateOps + ], + "Agent Internals": [ Jido.Agent.State, Jido.Agent.Schema, - Jido.Agent.StateOps, - Jido.Agent.StateOp, Jido.AgentServer.State, + Jido.AgentServer.State.Lifecycle, Jido.AgentServer.Status, Jido.AgentServer.Options, Jido.AgentServer.ErrorPolicy, - Jido.AgentServer.SignalRouter + Jido.AgentServer.SignalRouter, + Jido.AgentServer.ChildInfo, + Jido.AgentServer.DirectiveExec, + Jido.AgentServer.Lifecycle, + Jido.AgentServer.Lifecycle.Keyed, + Jido.AgentServer.Lifecycle.Noop, + Jido.AgentServer.Signal.ChildStarted, + Jido.AgentServer.Signal.ChildExit, + Jido.AgentServer.Signal.CronTick, + Jido.AgentServer.Signal.Orphaned, + Jido.AgentServer.Signal.Scheduled ], "Built-in Actions": [ Jido.Actions.Control, + Jido.Actions.Control.Broadcast, + Jido.Actions.Control.Cancel, + Jido.Actions.Control.Forward, + Jido.Actions.Control.Noop, + Jido.Actions.Control.Reply, Jido.Actions.Lifecycle, + Jido.Actions.Lifecycle.NotifyParent, + Jido.Actions.Lifecycle.NotifyPid, + Jido.Actions.Lifecycle.SpawnChild, + Jido.Actions.Lifecycle.StopChild, + Jido.Actions.Lifecycle.StopSelf, Jido.Actions.Scheduling, - Jido.Actions.Status + Jido.Actions.Scheduling.CancelCron, + Jido.Actions.Scheduling.ScheduleCron, + Jido.Actions.Scheduling.ScheduleSignal, + Jido.Actions.Scheduling.ScheduleTimeout, + Jido.Actions.Status, + Jido.Actions.Status.MarkCompleted, + Jido.Actions.Status.MarkFailed, + Jido.Actions.Status.MarkIdle, + Jido.Actions.Status.MarkWorking, + Jido.Actions.Status.SetStatus + ], + Sensors: [ + Jido.Sensor, + Jido.Sensor.Runtime, + Jido.Sensor.Spec, + Jido.Sensors.Heartbeat, + Jido.Sensors.Bus + ], + Thread: [ + Jido.Thread, + Jido.Thread.Agent, + Jido.Thread.Entry, + Jido.Thread.Store, + Jido.Thread.Store.Adapters.InMemory, + Jido.Thread.Store.Adapters.JournalBacked + ], + Storage: [ + Jido.Storage, + Jido.Storage.ETS, + Jido.Storage.File, + Jido.Persist, + Jido.Agent.InstanceManager, + Jido.Agent.Persistence, + Jido.Agent.Store, + Jido.Agent.Store.ETS, + Jido.Agent.Store.File ], Observability: [ Jido.Observe, @@ -231,7 +303,11 @@ defmodule Jido.MixProject do Jido.Observe.Tracer, Jido.Observe.NoopTracer, Jido.Observe.SpanCtx, - Jido.Telemetry + Jido.Telemetry, + Jido.Telemetry.Config, + Jido.Telemetry.Formatter, + Jido.Tracing.Context, + Jido.Tracing.Trace ], Utilities: [ Jido.Discovery, @@ -239,6 +315,14 @@ defmodule Jido.MixProject do Jido.Scheduler, Jido.Util, Jido.Agent.WorkerPool + ], + Exceptions: [ + Jido.Error.CompensationError, + Jido.Error.ExecutionError, + Jido.Error.InternalError, + Jido.Error.RoutingError, + Jido.Error.TimeoutError, + Jido.Error.ValidationError ] ] ] diff --git a/test/jido/integration/hibernate_thaw_test.exs b/test/jido/integration/hibernate_thaw_test.exs new file mode 100644 index 0000000..e4be873 --- /dev/null +++ b/test/jido/integration/hibernate_thaw_test.exs @@ -0,0 +1,405 @@ +defmodule JidoTest.Integration.HibernateThawTest do + use ExUnit.Case, async: true + + alias Jido.Thread + alias Jido.Thread.Agent, as: ThreadAgent + + defmodule WorkflowAgent do + use Jido.Agent, + name: "workflow_agent", + schema: [ + step: [type: :integer, default: 0], + status: [type: :atom, default: :pending], + data: [type: :map, default: %{}] + ] + + @impl true + def signal_routes, do: [] + end + + defp unique_table do + :"hibernate_thaw_test_#{System.unique_integer([:positive])}" + end + + defp create_jido_instance(table) do + %{storage: {Jido.Storage.ETS, table: table}} + end + + describe "basic round-trip: create agent → hibernate → thaw → verify" do + test "agent id is preserved after hibernate/thaw" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "basic-roundtrip-1") + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "basic-roundtrip-1") + + assert thawed.id == "basic-roundtrip-1" + end + + test "agent struct type is preserved" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "basic-roundtrip-2") + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "basic-roundtrip-2") + + assert thawed.__struct__ == Jido.Agent + end + + test "default state values are preserved" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "basic-roundtrip-3") + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "basic-roundtrip-3") + + assert thawed.state.step == 0 + assert thawed.state.status == :pending + assert thawed.state.data == %{} + end + end + + describe "with thread: create agent → attach thread → add entries → hibernate → thaw → verify" do + test "thread is restored with correct id" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "thread-test-1") + + agent = + agent + |> ThreadAgent.ensure(id: "thread-for-agent-1") + |> ThreadAgent.append(%{kind: :message, payload: %{content: "hello"}}) + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "thread-test-1") + + assert thawed.state[:__thread__] != nil + assert thawed.state[:__thread__].id == "thread-for-agent-1" + end + + test "thread rev matches entry count" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "thread-test-2") + + thread = + Thread.new(id: "thread-rev-test") + |> Thread.append(%{kind: :message, payload: %{content: "one"}}) + |> Thread.append(%{kind: :message, payload: %{content: "two"}}) + |> Thread.append(%{kind: :message, payload: %{content: "three"}}) + + agent = ThreadAgent.put(agent, thread) + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "thread-test-2") + + rehydrated = thawed.state[:__thread__] + assert rehydrated.rev == 3 + assert Thread.entry_count(rehydrated) == 3 + end + + test "entry seq numbers are correct and ordered" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "thread-test-3") + + entries = [ + %{kind: :user_message, payload: %{role: "user", content: "query"}}, + %{kind: :tool_call, payload: %{name: "search", args: %{q: "test"}}}, + %{kind: :tool_result, payload: %{result: "found 5 results"}}, + %{kind: :assistant_message, payload: %{role: "assistant", content: "response"}} + ] + + thread = Thread.new(id: "seq-test-thread") |> Thread.append(entries) + agent = ThreadAgent.put(agent, thread) + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "thread-test-3") + + rehydrated = thawed.state[:__thread__] + entry_list = Thread.to_list(rehydrated) + + assert length(entry_list) == 4 + assert Enum.at(entry_list, 0).seq == 0 + assert Enum.at(entry_list, 1).seq == 1 + assert Enum.at(entry_list, 2).seq == 2 + assert Enum.at(entry_list, 3).seq == 3 + end + + test "entry payloads are preserved" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "thread-test-4") + + thread = + Thread.new(id: "payload-test-thread") + |> Thread.append(%{ + kind: :message, + payload: %{complex: %{nested: "data"}, list: [1, 2, 3]} + }) + + agent = ThreadAgent.put(agent, thread) + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "thread-test-4") + + rehydrated = thawed.state[:__thread__] + [entry] = Thread.to_list(rehydrated) + + assert entry.payload.complex == %{nested: "data"} + assert entry.payload.list == [1, 2, 3] + end + end + + describe "state mutations: create → modify state → hibernate → thaw → verify state" do + test "modified step value is preserved" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "state-mut-1") + agent = %{agent | state: %{agent.state | step: 5}} + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "state-mut-1") + + assert thawed.state.step == 5 + end + + test "modified status value is preserved" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "state-mut-2") + agent = %{agent | state: %{agent.state | status: :completed}} + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "state-mut-2") + + assert thawed.state.status == :completed + end + + test "modified data map is preserved" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "state-mut-3") + agent = %{agent | state: %{agent.state | data: %{user_id: "u123", items: ["a", "b", "c"]}}} + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "state-mut-3") + + assert thawed.state.data == %{user_id: "u123", items: ["a", "b", "c"]} + end + + test "all state fields are preserved together" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "state-mut-4") + + agent = %{ + agent + | state: %{ + agent.state + | step: 10, + status: :in_progress, + data: %{key: "value", count: 42} + } + } + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "state-mut-4") + + assert thawed.state.step == 10 + assert thawed.state.status == :in_progress + assert thawed.state.data == %{key: "value", count: 42} + end + end + + describe "multiple agents: hibernate/thaw multiple agents with different IDs" do + test "multiple agents can be hibernated and thawed independently" do + jido = create_jido_instance(unique_table()) + + agent1 = WorkflowAgent.new(id: "multi-agent-1") + agent1 = %{agent1 | state: %{agent1.state | step: 1, status: :first}} + + agent2 = WorkflowAgent.new(id: "multi-agent-2") + agent2 = %{agent2 | state: %{agent2.state | step: 2, status: :second}} + + agent3 = WorkflowAgent.new(id: "multi-agent-3") + agent3 = %{agent3 | state: %{agent3.state | step: 3, status: :third}} + + :ok = Jido.Persist.hibernate(jido, agent1) + :ok = Jido.Persist.hibernate(jido, agent2) + :ok = Jido.Persist.hibernate(jido, agent3) + + {:ok, thawed1} = Jido.Persist.thaw(jido, Jido.Agent, "multi-agent-1") + {:ok, thawed2} = Jido.Persist.thaw(jido, Jido.Agent, "multi-agent-2") + {:ok, thawed3} = Jido.Persist.thaw(jido, Jido.Agent, "multi-agent-3") + + assert thawed1.id == "multi-agent-1" + assert thawed1.state.step == 1 + assert thawed1.state.status == :first + + assert thawed2.id == "multi-agent-2" + assert thawed2.state.step == 2 + assert thawed2.state.status == :second + + assert thawed3.id == "multi-agent-3" + assert thawed3.state.step == 3 + assert thawed3.state.status == :third + end + + test "multiple agents with threads can be hibernated and thawed independently" do + jido = create_jido_instance(unique_table()) + + agent1 = WorkflowAgent.new(id: "multi-thread-1") + + thread1 = + Thread.new(id: "thread-multi-1") + |> Thread.append(%{kind: :note, payload: %{text: "agent1"}}) + + agent1 = ThreadAgent.put(agent1, thread1) + + agent2 = WorkflowAgent.new(id: "multi-thread-2") + + thread2 = + Thread.new(id: "thread-multi-2") + |> Thread.append(%{kind: :note, payload: %{text: "agent2"}}) + + agent2 = ThreadAgent.put(agent2, thread2) + + :ok = Jido.Persist.hibernate(jido, agent1) + :ok = Jido.Persist.hibernate(jido, agent2) + + {:ok, thawed1} = Jido.Persist.thaw(jido, Jido.Agent, "multi-thread-1") + {:ok, thawed2} = Jido.Persist.thaw(jido, Jido.Agent, "multi-thread-2") + + assert thawed1.state[:__thread__].id == "thread-multi-1" + assert thawed2.state[:__thread__].id == "thread-multi-2" + + [entry1] = Thread.to_list(thawed1.state[:__thread__]) + [entry2] = Thread.to_list(thawed2.state[:__thread__]) + + assert entry1.payload.text == "agent1" + assert entry2.payload.text == "agent2" + end + + test "thawing nonexistent agent returns :not_found" do + jido = create_jido_instance(unique_table()) + + agent = WorkflowAgent.new(id: "exists") + :ok = Jido.Persist.hibernate(jido, agent) + + assert :not_found = Jido.Persist.thaw(jido, Jido.Agent, "does-not-exist") + end + end + + describe "overwrite checkpoint: hibernate → modify → hibernate again → thaw → verify latest state" do + test "second hibernate overwrites first checkpoint" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "overwrite-1") + + agent_v1 = %{agent | state: %{agent.state | step: 1, status: :version1}} + :ok = Jido.Persist.hibernate(jido, agent_v1) + + agent_v2 = %{agent | state: %{agent.state | step: 2, status: :version2}} + :ok = Jido.Persist.hibernate(jido, agent_v2) + + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "overwrite-1") + + assert thawed.state.step == 2 + assert thawed.state.status == :version2 + end + + test "thaw → modify → hibernate → thaw preserves modifications" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "overwrite-2") + agent = %{agent | state: %{agent.state | step: 1}} + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed1} = Jido.Persist.thaw(jido, Jido.Agent, "overwrite-2") + + assert thawed1.state.step == 1 + + updated = %{thawed1 | state: %{thawed1.state | step: 99, status: :final}} + :ok = Jido.Persist.hibernate(jido, updated) + + {:ok, thawed2} = Jido.Persist.thaw(jido, Jido.Agent, "overwrite-2") + + assert thawed2.state.step == 99 + assert thawed2.state.status == :final + end + + test "thread updates are preserved on re-hibernate" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "overwrite-3") + + thread = Thread.new(id: "overwrite-thread") + thread = Thread.append(thread, %{kind: :message, payload: %{content: "first"}}) + agent = ThreadAgent.put(agent, thread) + + :ok = Jido.Persist.hibernate(jido, agent) + + {:ok, thawed1} = Jido.Persist.thaw(jido, Jido.Agent, "overwrite-3") + assert Thread.entry_count(thawed1.state[:__thread__]) == 1 + + updated_thread = + Thread.new(id: "overwrite-thread-v2") + |> Thread.append(%{kind: :message, payload: %{content: "new first"}}) + |> Thread.append(%{kind: :message, payload: %{content: "new second"}}) + + updated = ThreadAgent.put(thawed1, updated_thread) + :ok = Jido.Persist.hibernate(jido, updated) + + {:ok, thawed2} = Jido.Persist.thaw(jido, Jido.Agent, "overwrite-3") + + assert thawed2.state[:__thread__].id == "overwrite-thread-v2" + assert Thread.entry_count(thawed2.state[:__thread__]) == 2 + assert thawed2.state[:__thread__].rev == 2 + end + end + + describe "integration invariants" do + test "checkpoint never contains full Thread struct, only pointer" do + table = unique_table() + jido = create_jido_instance(table) + agent = WorkflowAgent.new(id: "invariant-1") + + thread = + Thread.new(id: "invariant-thread") + |> Thread.append(%{kind: :message, payload: %{content: "test"}}) + + agent = ThreadAgent.put(agent, thread) + + :ok = Jido.Persist.hibernate(jido, agent) + + {:ok, checkpoint} = + Jido.Storage.ETS.get_checkpoint({Jido.Agent, "invariant-1"}, table: table) + + refute is_struct(checkpoint.thread, Thread) + assert checkpoint.thread == %{id: "invariant-thread", rev: 1} + refute Map.has_key?(checkpoint.state, :__thread__) + end + + test "agent.id is preserved exactly" do + jido = create_jido_instance(unique_table()) + original_id = "exact-id-preservation-test-#{System.unique_integer([:positive])}" + agent = WorkflowAgent.new(id: original_id) + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, original_id) + + assert thawed.id == original_id + end + + test "thread entries preserve kind field" do + jido = create_jido_instance(unique_table()) + agent = WorkflowAgent.new(id: "kind-test") + + thread = + Thread.new(id: "kind-test-thread") + |> Thread.append(%{kind: :user_input, payload: %{}}) + |> Thread.append(%{kind: :system_response, payload: %{}}) + |> Thread.append(%{kind: :tool_call, payload: %{}}) + + agent = ThreadAgent.put(agent, thread) + + :ok = Jido.Persist.hibernate(jido, agent) + {:ok, thawed} = Jido.Persist.thaw(jido, Jido.Agent, "kind-test") + + entries = Thread.to_list(thawed.state[:__thread__]) + assert Enum.at(entries, 0).kind == :user_input + assert Enum.at(entries, 1).kind == :system_response + assert Enum.at(entries, 2).kind == :tool_call + end + end +end diff --git a/test/jido/persist_test.exs b/test/jido/persist_test.exs new file mode 100644 index 0000000..805e30c --- /dev/null +++ b/test/jido/persist_test.exs @@ -0,0 +1,393 @@ +defmodule JidoTest.PersistTest do + use JidoTest.Case, async: true + + alias Jido.Persist + alias Jido.Thread + alias Jido.Agent + alias Jido.Storage.ETS + alias JidoTest.PersistTest.TestAgent + alias JidoTest.PersistTest.CustomAgent + + defmodule TestAgent do + use Jido.Agent, + name: "test_agent", + schema: [ + counter: [type: :integer, default: 0], + status: [type: :atom, default: :idle] + ] + + @impl true + def signal_routes, do: [] + end + + defmodule CustomAgent do + @moduledoc """ + Agent with a different schema for testing state variations. + + Note: Custom checkpoint/restore callbacks cannot be tested because + agent.__struct__ always returns Jido.Agent, so Persist cannot identify + the specific agent module to call its callbacks. + """ + use Jido.Agent, + name: "custom_agent", + schema: [value: [type: :string, default: ""]] + + @impl true + def signal_routes, do: [] + end + + defp unique_table do + :"persist_test_#{System.unique_integer([:positive])}" + end + + defp storage(table) do + {ETS, table: table} + end + + describe "hibernate/2" do + test "hibernates agent without thread" do + table = unique_table() + agent = TestAgent.new(id: "agent-1") + agent = %{agent | state: %{agent.state | counter: 42, status: :active}} + + assert :ok = Persist.hibernate(storage(table), agent) + + {:ok, checkpoint} = ETS.get_checkpoint({Agent, "agent-1"}, table: table) + assert checkpoint.id == "agent-1" + assert checkpoint.state.counter == 42 + assert checkpoint.state.status == :active + assert checkpoint.thread == nil + end + + test "hibernates agent with thread (thread is flushed first)" do + table = unique_table() + agent = TestAgent.new(id: "agent-2") + + thread = + Thread.new(id: "thread-2") + |> Thread.append(%{kind: :message, payload: %{content: "hello"}}) + |> Thread.append(%{kind: :message, payload: %{content: "world"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + assert :ok = Persist.hibernate(storage(table), agent) + + {:ok, loaded_thread} = ETS.load_thread("thread-2", table: table) + assert Thread.entry_count(loaded_thread) == 2 + + {:ok, checkpoint} = ETS.get_checkpoint({Agent, "agent-2"}, table: table) + assert checkpoint.thread == %{id: "thread-2", rev: 2} + end + + test "uses default checkpoint format (agent.__struct__ is always Jido.Agent)" do + table = unique_table() + agent = CustomAgent.new(id: "custom-1") + agent = %{agent | state: %{agent.state | value: "custom_value"}} + + assert :ok = Persist.hibernate(storage(table), agent) + + {:ok, checkpoint} = ETS.get_checkpoint({Agent, "custom-1"}, table: table) + assert checkpoint.id == "custom-1" + assert checkpoint.state.value == "custom_value" + assert checkpoint.thread == nil + assert checkpoint.agent_module == Agent + end + + test "checkpoint never contains full Thread struct" do + table = unique_table() + agent = TestAgent.new(id: "agent-3") + + thread = + Thread.new(id: "thread-3") + |> Thread.append(%{kind: :message, payload: %{content: "test"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + assert :ok = Persist.hibernate(storage(table), agent) + + {:ok, checkpoint} = ETS.get_checkpoint({Agent, "agent-3"}, table: table) + + refute is_struct(checkpoint.thread, Thread) + refute Map.has_key?(checkpoint.state, :__thread__) + end + + test "checkpoint contains thread pointer %{id, rev}" do + table = unique_table() + agent = TestAgent.new(id: "agent-4") + + thread = + Thread.new(id: "thread-4") + |> Thread.append(%{kind: :message, payload: %{content: "one"}}) + |> Thread.append(%{kind: :message, payload: %{content: "two"}}) + |> Thread.append(%{kind: :message, payload: %{content: "three"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + assert :ok = Persist.hibernate(storage(table), agent) + + {:ok, checkpoint} = ETS.get_checkpoint({Agent, "agent-4"}, table: table) + + assert checkpoint.thread == %{id: "thread-4", rev: 3} + assert Map.keys(checkpoint.thread) |> Enum.sort() == [:id, :rev] + end + end + + describe "thaw/3" do + test "returns :not_found for missing agent" do + table = unique_table() + ETS.put_checkpoint({Agent, "nonexistent"}, %{}, table: table) + ETS.delete_checkpoint({Agent, "nonexistent"}, table: table) + + assert :not_found = Persist.thaw(storage(table), Agent, "nonexistent") + end + + test "thaws agent without thread" do + table = unique_table() + agent = TestAgent.new(id: "thaw-1") + agent = %{agent | state: %{agent.state | counter: 100, status: :completed}} + + :ok = Persist.hibernate(storage(table), agent) + + assert {:ok, thawed} = Persist.thaw(storage(table), Agent, "thaw-1") + assert thawed.id == "thaw-1" + assert thawed.state.counter == 100 + assert thawed.state.status == :completed + refute Map.has_key?(thawed.state, :__thread__) + end + + test "thaws agent with thread (thread is rehydrated)" do + table = unique_table() + agent = TestAgent.new(id: "thaw-2") + + thread = + Thread.new(id: "thaw-thread-2") + |> Thread.append(%{kind: :message, payload: %{role: "user", content: "hello"}}) + |> Thread.append(%{kind: :message, payload: %{role: "assistant", content: "hi"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + :ok = Persist.hibernate(storage(table), agent) + + assert {:ok, thawed} = Persist.thaw(storage(table), Agent, "thaw-2") + assert thawed.state[:__thread__] != nil + + rehydrated_thread = thawed.state[:__thread__] + assert rehydrated_thread.id == "thaw-thread-2" + assert Thread.entry_count(rehydrated_thread) == 2 + end + + test "uses default restore (Agent.new + state merge)" do + table = unique_table() + agent = CustomAgent.new(id: "thaw-custom") + agent = %{agent | state: %{agent.state | value: "restored_value"}} + + :ok = Persist.hibernate(storage(table), agent) + + assert {:ok, thawed} = Persist.thaw(storage(table), Agent, "thaw-custom") + assert thawed.id == "thaw-custom" + assert thawed.state.value == "restored_value" + end + + test "returns {:error, :missing_thread} if thread pointer exists but thread not in storage" do + table = unique_table() + + checkpoint = %{ + version: 1, + agent_module: Agent, + id: "orphan-1", + state: %{counter: 0, status: :idle}, + thread: %{id: "missing-thread", rev: 5} + } + + :ok = ETS.put_checkpoint({Agent, "orphan-1"}, checkpoint, table: table) + + assert {:error, :missing_thread} = Persist.thaw(storage(table), Agent, "orphan-1") + end + + test "returns {:error, :thread_mismatch} if thread rev doesn't match checkpoint" do + table = unique_table() + + {:ok, _thread} = + ETS.append_thread( + "mismatch-thread", + [ + %{kind: :message, payload: %{content: "one"}}, + %{kind: :message, payload: %{content: "two"}} + ], + table: table + ) + + checkpoint = %{ + version: 1, + agent_module: Agent, + id: "mismatch-1", + state: %{counter: 0, status: :idle}, + thread: %{id: "mismatch-thread", rev: 10} + } + + :ok = ETS.put_checkpoint({Agent, "mismatch-1"}, checkpoint, table: table) + + assert {:error, :thread_mismatch} = Persist.thaw(storage(table), Agent, "mismatch-1") + end + end + + describe "round-trip" do + test "hibernate then thaw returns equivalent agent" do + table = unique_table() + agent = TestAgent.new(id: "roundtrip-1") + agent = %{agent | state: %{agent.state | counter: 999, status: :processing}} + + :ok = Persist.hibernate(storage(table), agent) + {:ok, thawed} = Persist.thaw(storage(table), Agent, "roundtrip-1") + + assert thawed.id == agent.id + assert thawed.__struct__ == agent.__struct__ + end + + test "state is preserved correctly" do + table = unique_table() + agent = TestAgent.new(id: "roundtrip-2") + agent = %{agent | state: %{agent.state | counter: 12345, status: :hibernated}} + + :ok = Persist.hibernate(storage(table), agent) + {:ok, thawed} = Persist.thaw(storage(table), Agent, "roundtrip-2") + + assert thawed.state.counter == 12345 + assert thawed.state.status == :hibernated + end + + test "thread is preserved correctly with all entries" do + table = unique_table() + agent = TestAgent.new(id: "roundtrip-3") + + entries = [ + %{kind: :message, payload: %{role: "user", content: "first"}}, + %{kind: :tool_call, payload: %{name: "search", args: %{q: "test"}}}, + %{kind: :tool_result, payload: %{result: "found"}}, + %{kind: :message, payload: %{role: "assistant", content: "done"}} + ] + + thread = + Thread.new(id: "roundtrip-thread-3") + |> Thread.append(entries) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + :ok = Persist.hibernate(storage(table), agent) + {:ok, thawed} = Persist.thaw(storage(table), Agent, "roundtrip-3") + + rehydrated = thawed.state[:__thread__] + assert rehydrated.id == "roundtrip-thread-3" + assert Thread.entry_count(rehydrated) == 4 + + entry_list = Thread.to_list(rehydrated) + assert Enum.at(entry_list, 0).kind == :message + assert Enum.at(entry_list, 0).payload.role == "user" + assert Enum.at(entry_list, 1).kind == :tool_call + assert Enum.at(entry_list, 2).kind == :tool_result + assert Enum.at(entry_list, 3).kind == :message + assert Enum.at(entry_list, 3).payload.role == "assistant" + end + + test "multiple hibernate cycles update correctly" do + table = unique_table() + agent = TestAgent.new(id: "multi-1") + + agent = %{agent | state: %{agent.state | counter: 1}} + :ok = Persist.hibernate(storage(table), agent) + + {:ok, thawed} = Persist.thaw(storage(table), Agent, "multi-1") + assert thawed.state.counter == 1 + + updated = %{thawed | state: %{thawed.state | counter: 2}} + :ok = Persist.hibernate(storage(table), updated) + + {:ok, thawed2} = Persist.thaw(storage(table), Agent, "multi-1") + assert thawed2.state.counter == 2 + end + end + + describe "integration with Jido instance" do + test "hibernate and thaw with Jido instance struct" do + table = unique_table() + jido_instance = %{storage: {ETS, table: table}} + + agent = TestAgent.new(id: "jido-instance-1") + agent = %{agent | state: %{agent.state | counter: 777}} + + assert :ok = Persist.hibernate(jido_instance, agent) + assert {:ok, thawed} = Persist.thaw(jido_instance, Agent, "jido-instance-1") + + assert thawed.state.counter == 777 + end + + test "hibernate and thaw with thread using Jido instance" do + table = unique_table() + jido_instance = %{storage: {ETS, table: table}} + + agent = TestAgent.new(id: "jido-instance-2") + + thread = + Thread.new(id: "jido-instance-thread") + |> Thread.append(%{kind: :message, payload: %{content: "via jido"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + :ok = Persist.hibernate(jido_instance, agent) + {:ok, thawed} = Persist.thaw(jido_instance, Agent, "jido-instance-2") + + assert thawed.state[:__thread__].id == "jido-instance-thread" + assert Thread.entry_count(thawed.state[:__thread__]) == 1 + end + end + + describe "edge cases" do + test "empty thread (no entries) does not create thread in storage" do + table = unique_table() + agent = TestAgent.new(id: "empty-thread-1") + + thread = Thread.new(id: "empty-thread") + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + :ok = Persist.hibernate(storage(table), agent) + + assert :not_found = ETS.load_thread("empty-thread", table: table) + end + + test "thread with entries but no expected_rev conflict" do + table = unique_table() + agent = TestAgent.new(id: "conflict-1") + + thread = + Thread.new(id: "conflict-thread") + |> Thread.append(%{kind: :message, payload: %{content: "test"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + :ok = Persist.hibernate(storage(table), agent) + :ok = Persist.hibernate(storage(table), agent) + + {:ok, loaded} = ETS.load_thread("conflict-thread", table: table) + assert Thread.entry_count(loaded) == 1 + end + + test "checkpoint with thread enforces invariants (no __thread__ in state)" do + table = unique_table() + agent = CustomAgent.new(id: "custom-thread-1") + agent = %{agent | state: %{agent.state | value: "with_thread"}} + + thread = + Thread.new(id: "custom-thread") + |> Thread.append(%{kind: :note, payload: %{text: "custom note"}}) + + agent = %{agent | state: Map.put(agent.state, :__thread__, thread)} + + :ok = Persist.hibernate(storage(table), agent) + + {:ok, checkpoint} = ETS.get_checkpoint({Agent, "custom-thread-1"}, table: table) + + assert checkpoint.thread == %{id: "custom-thread", rev: 1} + refute Map.has_key?(checkpoint.state || %{}, :__thread__) + end + end +end diff --git a/test/jido/storage/ets_test.exs b/test/jido/storage/ets_test.exs new file mode 100644 index 0000000..1ec55e1 --- /dev/null +++ b/test/jido/storage/ets_test.exs @@ -0,0 +1,341 @@ +defmodule JidoTest.Storage.ETSTest do + use ExUnit.Case, async: true + + alias Jido.Storage + alias Jido.Storage.ETS + alias Jido.Thread + alias Jido.Thread.Entry + + defp unique_table(test_name) do + :"test_storage_#{test_name}_#{System.unique_integer([:positive])}" + end + + describe "normalize_storage/1" do + test "module atom normalizes to {Module, []}" do + assert {Jido.Storage.ETS, []} = Storage.normalize_storage(Jido.Storage.ETS) + end + + test "tuple passes through unchanged" do + assert {Jido.Storage.ETS, [table: :custom]} = + Storage.normalize_storage({Jido.Storage.ETS, table: :custom}) + end + end + + describe "checkpoint operations" do + test "get_checkpoint/2 returns :not_found for missing key" do + opts = [table: unique_table(:get_missing)] + + assert :not_found = ETS.get_checkpoint(:nonexistent_key, opts) + end + + test "put_checkpoint/3 stores and get_checkpoint/2 retrieves data" do + opts = [table: unique_table(:put_get)] + + assert :ok = ETS.put_checkpoint(:my_key, %{state: "saved"}, opts) + assert {:ok, %{state: "saved"}} = ETS.get_checkpoint(:my_key, opts) + end + + test "put_checkpoint/3 overwrites existing data" do + opts = [table: unique_table(:overwrite)] + + assert :ok = ETS.put_checkpoint(:key, %{version: 1}, opts) + assert {:ok, %{version: 1}} = ETS.get_checkpoint(:key, opts) + + assert :ok = ETS.put_checkpoint(:key, %{version: 2}, opts) + assert {:ok, %{version: 2}} = ETS.get_checkpoint(:key, opts) + end + + test "delete_checkpoint/2 removes data" do + opts = [table: unique_table(:delete)] + + assert :ok = ETS.put_checkpoint(:to_delete, %{data: "exists"}, opts) + assert {:ok, _} = ETS.get_checkpoint(:to_delete, opts) + + assert :ok = ETS.delete_checkpoint(:to_delete, opts) + assert :not_found = ETS.get_checkpoint(:to_delete, opts) + end + + test "delete_checkpoint/2 succeeds even if key doesn't exist" do + opts = [table: unique_table(:delete_missing)] + + assert :ok = ETS.delete_checkpoint(:never_existed, opts) + end + + test "supports various key types" do + opts = [table: unique_table(:key_types)] + + assert :ok = ETS.put_checkpoint("string_key", :data1, opts) + assert :ok = ETS.put_checkpoint({:tuple, :key}, :data2, opts) + assert :ok = ETS.put_checkpoint(123, :data3, opts) + + assert {:ok, :data1} = ETS.get_checkpoint("string_key", opts) + assert {:ok, :data2} = ETS.get_checkpoint({:tuple, :key}, opts) + assert {:ok, :data3} = ETS.get_checkpoint(123, opts) + end + end + + describe "thread operations" do + test "load_thread/2 returns :not_found for missing thread" do + opts = [table: unique_table(:load_missing)] + + assert :not_found = ETS.load_thread("nonexistent_thread", opts) + end + + test "append_thread/3 creates thread with entries" do + opts = [table: unique_table(:create_thread)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entries = [ + %{kind: :message, payload: %{role: "user", content: "Hello"}} + ] + + assert {:ok, %Thread{} = thread} = ETS.append_thread(thread_id, entries, opts) + assert thread.id == thread_id + assert thread.rev == 1 + assert length(thread.entries) == 1 + assert hd(thread.entries).kind == :message + end + + test "append_thread/3 appends to existing thread" do + opts = [table: unique_table(:append_thread)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entry1 = %{kind: :message, payload: %{content: "First"}} + entry2 = %{kind: :message, payload: %{content: "Second"}} + + {:ok, thread1} = ETS.append_thread(thread_id, [entry1], opts) + assert thread1.rev == 1 + assert length(thread1.entries) == 1 + + {:ok, thread2} = ETS.append_thread(thread_id, [entry2], opts) + assert thread2.rev == 2 + assert length(thread2.entries) == 2 + assert Enum.at(thread2.entries, 0).payload.content == "First" + assert Enum.at(thread2.entries, 1).payload.content == "Second" + end + + test "append_thread/3 with expected_rev: succeeds when rev matches" do + opts = [table: unique_table(:expected_rev_match)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + {:ok, thread1} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + assert thread1.rev == 1 + + {:ok, thread2} = + ETS.append_thread(thread_id, [%{kind: :note}], Keyword.put(opts, :expected_rev, 1)) + + assert thread2.rev == 2 + end + + test "append_thread/3 with expected_rev: returns {:error, :conflict} when rev doesn't match" do + opts = [table: unique_table(:expected_rev_conflict)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + {:ok, _} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + + assert {:error, :conflict} = + ETS.append_thread(thread_id, [%{kind: :note}], Keyword.put(opts, :expected_rev, 0)) + + assert {:error, :conflict} = + ETS.append_thread(thread_id, [%{kind: :note}], Keyword.put(opts, :expected_rev, 5)) + end + + test "load_thread/2 returns correct %Jido.Thread{} with all entries" do + opts = [table: unique_table(:load_thread)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entries = [ + %{kind: :message, payload: %{role: "user", content: "Hello"}}, + %{kind: :message, payload: %{role: "assistant", content: "Hi there"}}, + %{kind: :tool_call, payload: %{name: "search", args: %{}}} + ] + + {:ok, _} = ETS.append_thread(thread_id, entries, opts) + + assert {:ok, %Thread{} = thread} = ETS.load_thread(thread_id, opts) + assert thread.id == thread_id + assert thread.rev == 3 + assert length(thread.entries) == 3 + + assert Enum.all?(thread.entries, fn e -> %Entry{} = e end) + + [e0, e1, e2] = thread.entries + assert e0.kind == :message + assert e1.kind == :message + assert e2.kind == :tool_call + end + + test "delete_thread/2 removes thread and all entries" do + opts = [table: unique_table(:delete_thread)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entries = [ + %{kind: :message, payload: %{content: "Entry 1"}}, + %{kind: :message, payload: %{content: "Entry 2"}} + ] + + {:ok, _} = ETS.append_thread(thread_id, entries, opts) + assert {:ok, _} = ETS.load_thread(thread_id, opts) + + assert :ok = ETS.delete_thread(thread_id, opts) + assert :not_found = ETS.load_thread(thread_id, opts) + end + + test "delete_thread/2 succeeds even if thread doesn't exist" do + opts = [table: unique_table(:delete_missing_thread)] + + assert :ok = ETS.delete_thread("never_existed_thread", opts) + end + + test "thread entries have correct seq numbers assigned" do + opts = [table: unique_table(:seq_numbers)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entries = [ + %{kind: :note, payload: %{text: "First"}}, + %{kind: :note, payload: %{text: "Second"}}, + %{kind: :note, payload: %{text: "Third"}} + ] + + {:ok, thread} = ETS.append_thread(thread_id, entries, opts) + + assert Enum.at(thread.entries, 0).seq == 0 + assert Enum.at(thread.entries, 1).seq == 1 + assert Enum.at(thread.entries, 2).seq == 2 + + more_entries = [ + %{kind: :note, payload: %{text: "Fourth"}} + ] + + {:ok, updated_thread} = ETS.append_thread(thread_id, more_entries, opts) + + assert Enum.at(updated_thread.entries, 3).seq == 3 + end + + test "thread rev increments correctly" do + opts = [table: unique_table(:rev_increment)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + {:ok, t1} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + assert t1.rev == 1 + + {:ok, t2} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + assert t2.rev == 2 + + {:ok, t3} = ETS.append_thread(thread_id, [%{kind: :note}, %{kind: :note}], opts) + assert t3.rev == 4 + + {:ok, loaded} = ETS.load_thread(thread_id, opts) + assert loaded.rev == 4 + end + + test "entries get unique IDs assigned" do + opts = [table: unique_table(:entry_ids)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entries = [ + %{kind: :message, payload: %{content: "One"}}, + %{kind: :message, payload: %{content: "Two"}} + ] + + {:ok, thread} = ETS.append_thread(thread_id, entries, opts) + + [e1, e2] = thread.entries + assert is_binary(e1.id) + assert is_binary(e2.id) + assert String.starts_with?(e1.id, "entry_") + assert String.starts_with?(e2.id, "entry_") + refute e1.id == e2.id + end + + test "entries get timestamps assigned" do + opts = [table: unique_table(:entry_timestamps)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + before = System.system_time(:millisecond) + {:ok, thread} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + after_append = System.system_time(:millisecond) + + entry = hd(thread.entries) + assert is_integer(entry.at) + assert entry.at >= before + assert entry.at <= after_append + end + + test "thread metadata is preserved" do + opts = [table: unique_table(:metadata), metadata: %{user_id: "u123", session: "s456"}] + thread_id = "thread_#{System.unique_integer([:positive])}" + + {:ok, thread} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + assert thread.metadata == %{user_id: "u123", session: "s456"} + + {:ok, loaded} = ETS.load_thread(thread_id, opts) + assert loaded.metadata == %{user_id: "u123", session: "s456"} + end + + test "thread has created_at and updated_at timestamps" do + opts = [table: unique_table(:thread_timestamps)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + before = System.system_time(:millisecond) + {:ok, thread} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + after_create = System.system_time(:millisecond) + + assert is_integer(thread.created_at) + assert is_integer(thread.updated_at) + assert thread.created_at >= before + assert thread.created_at <= after_create + assert thread.updated_at >= thread.created_at + + Process.sleep(2) + + {:ok, updated} = ETS.append_thread(thread_id, [%{kind: :note}], opts) + assert updated.created_at == thread.created_at + assert updated.updated_at >= thread.updated_at + end + + test "accepts Entry structs directly" do + opts = [table: unique_table(:entry_structs)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + entry = Entry.new(kind: :message, payload: %{role: "user", content: "Hello"}) + + {:ok, thread} = ETS.append_thread(thread_id, [entry], opts) + assert length(thread.entries) == 1 + assert hd(thread.entries).kind == :message + end + + test "stats include entry_count" do + opts = [table: unique_table(:stats)] + thread_id = "thread_#{System.unique_integer([:positive])}" + + {:ok, thread} = + ETS.append_thread(thread_id, [%{kind: :note}, %{kind: :note}, %{kind: :note}], opts) + + assert thread.stats.entry_count == 3 + end + end + + describe "table isolation" do + test "different table names are isolated" do + opts1 = [table: unique_table(:isolation1)] + opts2 = [table: unique_table(:isolation2)] + + assert :ok = ETS.put_checkpoint(:shared_key, :value1, opts1) + assert :ok = ETS.put_checkpoint(:shared_key, :value2, opts2) + + assert {:ok, :value1} = ETS.get_checkpoint(:shared_key, opts1) + assert {:ok, :value2} = ETS.get_checkpoint(:shared_key, opts2) + end + + test "checkpoints and threads use separate tables" do + opts = [table: unique_table(:separate_tables)] + + assert :ok = ETS.put_checkpoint("key1", :checkpoint_data, opts) + {:ok, _} = ETS.append_thread("key1", [%{kind: :note}], opts) + + assert {:ok, :checkpoint_data} = ETS.get_checkpoint("key1", opts) + assert {:ok, %Thread{}} = ETS.load_thread("key1", opts) + end + end +end diff --git a/test/jido/storage/file_test.exs b/test/jido/storage/file_test.exs new file mode 100644 index 0000000..2c58a60 --- /dev/null +++ b/test/jido/storage/file_test.exs @@ -0,0 +1,428 @@ +defmodule JidoTest.Storage.FileTest do + use ExUnit.Case, async: false + + alias Jido.Storage.File, as: FileStorage + alias Jido.Thread + alias Jido.Thread.Entry + + @moduletag :storage + + setup do + base_dir = + Path.join( + System.tmp_dir!(), + "jido_file_storage_test_#{:erlang.unique_integer([:positive])}" + ) + + File.mkdir_p!(base_dir) + on_exit(fn -> File.rm_rf!(base_dir) end) + {:ok, path: base_dir, opts: [path: base_dir]} + end + + describe "checkpoint operations" do + test "get_checkpoint/2 returns :not_found for missing key", %{opts: opts} do + assert :not_found = FileStorage.get_checkpoint(:missing_key, opts) + end + + test "put_checkpoint/3 stores and get_checkpoint/2 retrieves data", %{opts: opts} do + key = :my_checkpoint + data = %{counter: 42, name: "test"} + + assert :ok = FileStorage.put_checkpoint(key, data, opts) + assert {:ok, ^data} = FileStorage.get_checkpoint(key, opts) + end + + test "put_checkpoint/3 overwrites existing data atomically", %{opts: opts} do + key = :overwrite_test + + assert :ok = FileStorage.put_checkpoint(key, %{version: 1}, opts) + assert {:ok, %{version: 1}} = FileStorage.get_checkpoint(key, opts) + + assert :ok = FileStorage.put_checkpoint(key, %{version: 2}, opts) + assert {:ok, %{version: 2}} = FileStorage.get_checkpoint(key, opts) + end + + test "delete_checkpoint/2 removes data", %{opts: opts} do + key = :to_delete + + assert :ok = FileStorage.put_checkpoint(key, %{data: "exists"}, opts) + assert {:ok, _} = FileStorage.get_checkpoint(key, opts) + + assert :ok = FileStorage.delete_checkpoint(key, opts) + assert :not_found = FileStorage.get_checkpoint(key, opts) + end + + test "delete_checkpoint/2 succeeds even if key doesn't exist", %{opts: opts} do + assert :ok = FileStorage.delete_checkpoint(:nonexistent_key, opts) + end + + test "data survives serialization/deserialization correctly", %{opts: opts} do + key = :complex_data + + complex_data = %{ + string: "hello", + integer: 123, + float: 3.14, + atom: :some_atom, + list: [1, 2, 3], + tuple: {:ok, "value"}, + nested: %{ + deep: %{value: true} + }, + binary: <<1, 2, 3, 4, 5>> + } + + assert :ok = FileStorage.put_checkpoint(key, complex_data, opts) + assert {:ok, retrieved} = FileStorage.get_checkpoint(key, opts) + + assert retrieved.string == "hello" + assert retrieved.integer == 123 + assert retrieved.float == 3.14 + assert retrieved.atom == :some_atom + assert retrieved.list == [1, 2, 3] + assert retrieved.tuple == {:ok, "value"} + assert retrieved.nested == %{deep: %{value: true}} + assert retrieved.binary == <<1, 2, 3, 4, 5>> + end + end + + describe "thread operations" do + test "load_thread/2 returns :not_found for missing thread", %{opts: opts} do + assert :not_found = FileStorage.load_thread("nonexistent_thread", opts) + end + + test "append_thread/3 creates thread with entries", %{opts: opts} do + thread_id = "new_thread_#{:erlang.unique_integer([:positive])}" + + entry = %Entry{ + id: "entry_1", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{role: "user", content: "Hello"}, + refs: %{} + } + + assert {:ok, thread} = FileStorage.append_thread(thread_id, [entry], opts) + assert thread.id == thread_id + assert thread.rev == 1 + assert length(thread.entries) == 1 + assert hd(thread.entries).kind == :message + end + + test "append_thread/3 appends to existing thread", %{opts: opts} do + thread_id = "append_test_#{:erlang.unique_integer([:positive])}" + + entry1 = %Entry{ + id: "entry_1", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{role: "user", content: "First"}, + refs: %{} + } + + assert {:ok, thread1} = FileStorage.append_thread(thread_id, [entry1], opts) + assert thread1.rev == 1 + + entry2 = %Entry{ + id: "entry_2", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{role: "assistant", content: "Second"}, + refs: %{} + } + + assert {:ok, thread2} = FileStorage.append_thread(thread_id, [entry2], opts) + assert thread2.rev == 2 + assert length(thread2.entries) == 2 + assert Enum.at(thread2.entries, 0).payload.content == "First" + assert Enum.at(thread2.entries, 1).payload.content == "Second" + end + + test "append_thread/3 with expected_rev: succeeds when rev matches", %{opts: opts} do + thread_id = "expected_rev_success_#{:erlang.unique_integer([:positive])}" + + entry1 = %Entry{ + id: "entry_1", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{content: "First"}, + refs: %{} + } + + {:ok, _thread1} = FileStorage.append_thread(thread_id, [entry1], opts) + + entry2 = %Entry{ + id: "entry_2", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{content: "Second"}, + refs: %{} + } + + opts_with_rev = Keyword.put(opts, :expected_rev, 1) + assert {:ok, thread2} = FileStorage.append_thread(thread_id, [entry2], opts_with_rev) + assert thread2.rev == 2 + end + + test "append_thread/3 with expected_rev: returns {:error, :conflict} when rev doesn't match", + %{opts: opts} do + thread_id = "expected_rev_conflict_#{:erlang.unique_integer([:positive])}" + + entry1 = %Entry{ + id: "entry_1", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{content: "First"}, + refs: %{} + } + + {:ok, _thread1} = FileStorage.append_thread(thread_id, [entry1], opts) + + entry2 = %Entry{ + id: "entry_2", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{content: "Second"}, + refs: %{} + } + + opts_with_wrong_rev = Keyword.put(opts, :expected_rev, 0) + + assert {:error, :conflict} = + FileStorage.append_thread(thread_id, [entry2], opts_with_wrong_rev) + end + + test "load_thread/2 returns correct %Jido.Thread{} with all entries", %{opts: opts} do + thread_id = "load_test_#{:erlang.unique_integer([:positive])}" + + entries = + for i <- 1..3 do + %Entry{ + id: "entry_#{i}", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{index: i}, + refs: %{} + } + end + + {:ok, _thread} = FileStorage.append_thread(thread_id, entries, opts) + {:ok, loaded} = FileStorage.load_thread(thread_id, opts) + + assert %Thread{} = loaded + assert loaded.id == thread_id + assert loaded.rev == 3 + assert length(loaded.entries) == 3 + assert loaded.stats.entry_count == 3 + + for {entry, idx} <- Enum.with_index(loaded.entries) do + assert entry.seq == idx + assert entry.payload.index == idx + 1 + end + end + + test "delete_thread/2 removes thread directory and all files", %{opts: opts} do + thread_id = "delete_test_#{:erlang.unique_integer([:positive])}" + path = Keyword.fetch!(opts, :path) + + entry = %Entry{ + id: "entry_1", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{content: "test"}, + refs: %{} + } + + {:ok, _thread} = FileStorage.append_thread(thread_id, [entry], opts) + + thread_dir = Path.join([path, "threads", thread_id]) + assert File.exists?(thread_dir) + + assert :ok = FileStorage.delete_thread(thread_id, opts) + refute File.exists?(thread_dir) + assert :not_found = FileStorage.load_thread(thread_id, opts) + end + + test "thread entries have correct seq numbers", %{opts: opts} do + thread_id = "seq_test_#{:erlang.unique_integer([:positive])}" + + entry1 = %Entry{id: "e1", seq: 0, at: 0, kind: :message, payload: %{}, refs: %{}} + {:ok, _} = FileStorage.append_thread(thread_id, [entry1], opts) + + entry2 = %Entry{id: "e2", seq: 0, at: 0, kind: :message, payload: %{}, refs: %{}} + entry3 = %Entry{id: "e3", seq: 0, at: 0, kind: :message, payload: %{}, refs: %{}} + {:ok, thread} = FileStorage.append_thread(thread_id, [entry2, entry3], opts) + + assert Enum.at(thread.entries, 0).seq == 0 + assert Enum.at(thread.entries, 1).seq == 1 + assert Enum.at(thread.entries, 2).seq == 2 + end + + test "binary framing handles various entry sizes correctly", %{opts: opts} do + thread_id = "framing_test_#{:erlang.unique_integer([:positive])}" + + small_entry = %Entry{ + id: "small", + seq: 0, + at: 0, + kind: :message, + payload: %{data: "x"}, + refs: %{} + } + + medium_entry = %Entry{ + id: "medium", + seq: 0, + at: 0, + kind: :message, + payload: %{data: String.duplicate("y", 1000)}, + refs: %{} + } + + large_entry = %Entry{ + id: "large", + seq: 0, + at: 0, + kind: :message, + payload: %{data: String.duplicate("z", 100_000)}, + refs: %{} + } + + {:ok, _} = FileStorage.append_thread(thread_id, [small_entry], opts) + {:ok, _} = FileStorage.append_thread(thread_id, [medium_entry], opts) + {:ok, thread} = FileStorage.append_thread(thread_id, [large_entry], opts) + + assert length(thread.entries) == 3 + assert Enum.at(thread.entries, 0).payload.data == "x" + assert Enum.at(thread.entries, 1).payload.data == String.duplicate("y", 1000) + assert Enum.at(thread.entries, 2).payload.data == String.duplicate("z", 100_000) + + {:ok, loaded} = FileStorage.load_thread(thread_id, opts) + assert length(loaded.entries) == 3 + assert Enum.at(loaded.entries, 2).payload.data == String.duplicate("z", 100_000) + end + end + + describe "edge cases" do + test "handles special characters in keys", %{opts: opts} do + special_keys = [ + "key with spaces", + "key/with/slashes", + "key:with:colons", + "key\twith\ttabs", + "key🎉with🎉emoji", + {:tuple, "key"}, + ["list", "key"] + ] + + for key <- special_keys do + data = %{key: inspect(key)} + assert :ok = FileStorage.put_checkpoint(key, data, opts) + assert {:ok, ^data} = FileStorage.get_checkpoint(key, opts) + end + end + + test "handles empty entries list", %{opts: opts} do + thread_id = "empty_entries_#{:erlang.unique_integer([:positive])}" + + entry = %Entry{id: "e1", seq: 0, at: 0, kind: :message, payload: %{}, refs: %{}} + {:ok, _} = FileStorage.append_thread(thread_id, [entry], opts) + + {:ok, thread} = FileStorage.append_thread(thread_id, [], opts) + assert length(thread.entries) == 1 + assert thread.rev == 1 + end + + test "handles large payloads", %{opts: opts} do + large_data = %{ + blob: :crypto.strong_rand_bytes(1_000_000), + list: Enum.to_list(1..10_000) + } + + assert :ok = FileStorage.put_checkpoint(:large_checkpoint, large_data, opts) + assert {:ok, retrieved} = FileStorage.get_checkpoint(:large_checkpoint, opts) + assert byte_size(retrieved.blob) == 1_000_000 + assert length(retrieved.list) == 10_000 + end + + test "handles thread with many entries", %{opts: opts} do + thread_id = "many_entries_#{:erlang.unique_integer([:positive])}" + + entries = + for i <- 1..100 do + %Entry{ + id: "entry_#{i}", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{index: i, data: String.duplicate("x", 100)}, + refs: %{} + } + end + + {:ok, thread} = FileStorage.append_thread(thread_id, entries, opts) + assert thread.rev == 100 + assert length(thread.entries) == 100 + + {:ok, loaded} = FileStorage.load_thread(thread_id, opts) + assert length(loaded.entries) == 100 + + for {entry, idx} <- Enum.with_index(loaded.entries) do + assert entry.seq == idx + end + end + + test "handles concurrent appends to different threads", %{opts: opts} do + tasks = + for i <- 1..5 do + Task.async(fn -> + thread_id = "concurrent_#{i}" + + entry = %Entry{ + id: "entry_#{i}", + seq: 0, + at: System.system_time(:millisecond), + kind: :message, + payload: %{thread: i}, + refs: %{} + } + + {:ok, thread} = FileStorage.append_thread(thread_id, [entry], opts) + {thread_id, thread} + end) + end + + results = Task.await_many(tasks, 5000) + + for {thread_id, thread} <- results do + assert thread.id == thread_id + assert thread.rev == 1 + end + end + + test "handles nil values in payload", %{opts: opts} do + key = :nil_payload + + data = %{ + value: nil, + nested: %{inner: nil}, + list: [nil, 1, nil] + } + + assert :ok = FileStorage.put_checkpoint(key, data, opts) + assert {:ok, retrieved} = FileStorage.get_checkpoint(key, opts) + assert retrieved.value == nil + assert retrieved.nested.inner == nil + assert retrieved.list == [nil, 1, nil] + end + end +end diff --git a/test/jido/thread/agent_test.exs b/test/jido/thread/agent_test.exs new file mode 100644 index 0000000..383be27 --- /dev/null +++ b/test/jido/thread/agent_test.exs @@ -0,0 +1,185 @@ +defmodule JidoTest.Thread.AgentTest do + use ExUnit.Case, async: true + + alias Jido.Agent + alias Jido.Thread + alias Jido.Thread.Agent, as: ThreadAgent + + defp create_agent do + %Agent{ + id: "test-agent-1", + state: %{} + } + end + + describe "key/0" do + test "returns :__thread__" do + assert ThreadAgent.key() == :__thread__ + end + end + + describe "get/2" do + test "returns nil when no thread present" do + agent = create_agent() + assert ThreadAgent.get(agent) == nil + end + + test "returns default when no thread present" do + agent = create_agent() + default = Thread.new() + assert ThreadAgent.get(agent, default) == default + end + + test "returns thread when present" do + thread = Thread.new(id: "test-thread") + agent = %{create_agent() | state: %{__thread__: thread}} + assert ThreadAgent.get(agent) == thread + end + end + + describe "put/2" do + test "stores thread in agent state" do + agent = create_agent() + thread = Thread.new(id: "test-thread") + + updated = ThreadAgent.put(agent, thread) + + assert updated.state[:__thread__] == thread + assert ThreadAgent.get(updated) == thread + end + + test "preserves other state keys" do + agent = %{create_agent() | state: %{foo: :bar}} + thread = Thread.new() + + updated = ThreadAgent.put(agent, thread) + + assert updated.state[:foo] == :bar + assert updated.state[:__thread__] == thread + end + end + + describe "update/2" do + test "updates thread using function" do + thread = Thread.new(id: "test-thread") + agent = ThreadAgent.put(create_agent(), thread) + + updated = + ThreadAgent.update(agent, fn t -> + Thread.append(t, %{kind: :message, payload: %{text: "hello"}}) + end) + + result_thread = ThreadAgent.get(updated) + assert Thread.entry_count(result_thread) == 1 + end + + test "passes nil to function when no thread" do + agent = create_agent() + + updated = + ThreadAgent.update(agent, fn t -> + assert t == nil + Thread.new(id: "created-in-update") + end) + + assert ThreadAgent.get(updated).id == "created-in-update" + end + end + + describe "ensure/2" do + test "creates thread if missing" do + agent = create_agent() + assert ThreadAgent.has_thread?(agent) == false + + updated = ThreadAgent.ensure(agent) + + assert ThreadAgent.has_thread?(updated) == true + assert %Thread{} = ThreadAgent.get(updated) + end + + test "passes options to Thread.new" do + agent = create_agent() + + updated = ThreadAgent.ensure(agent, metadata: %{user_id: "u1"}) + + thread = ThreadAgent.get(updated) + assert thread.metadata == %{user_id: "u1"} + end + + test "does NOT overwrite existing thread" do + thread = Thread.new(id: "original-thread", metadata: %{keep: :this}) + agent = ThreadAgent.put(create_agent(), thread) + + updated = ThreadAgent.ensure(agent, metadata: %{new: :metadata}) + + result = ThreadAgent.get(updated) + assert result.id == "original-thread" + assert result.metadata == %{keep: :this} + end + end + + describe "append/3" do + test "initializes thread if missing and appends entry" do + agent = create_agent() + assert ThreadAgent.has_thread?(agent) == false + + updated = ThreadAgent.append(agent, %{kind: :message, payload: %{text: "hi"}}) + + assert ThreadAgent.has_thread?(updated) == true + thread = ThreadAgent.get(updated) + assert Thread.entry_count(thread) == 1 + assert Thread.last(thread).kind == :message + end + + test "appends to existing thread" do + thread = Thread.new() |> Thread.append(%{kind: :note, payload: %{text: "first"}}) + agent = ThreadAgent.put(create_agent(), thread) + + updated = ThreadAgent.append(agent, %{kind: :message, payload: %{text: "second"}}) + + result = ThreadAgent.get(updated) + assert Thread.entry_count(result) == 2 + assert Thread.last(result).kind == :message + end + + test "appends multiple entries" do + agent = create_agent() + + entries = [ + %{kind: :message, payload: %{role: "user"}}, + %{kind: :message, payload: %{role: "assistant"}} + ] + + updated = ThreadAgent.append(agent, entries) + + thread = ThreadAgent.get(updated) + assert Thread.entry_count(thread) == 2 + end + + test "passes options to ensure" do + agent = create_agent() + + updated = + ThreadAgent.append( + agent, + %{kind: :message, payload: %{}}, + metadata: %{channel: "web"} + ) + + thread = ThreadAgent.get(updated) + assert thread.metadata == %{channel: "web"} + end + end + + describe "has_thread?/1" do + test "returns false when no thread" do + agent = create_agent() + assert ThreadAgent.has_thread?(agent) == false + end + + test "returns true when thread present" do + agent = ThreadAgent.put(create_agent(), Thread.new()) + assert ThreadAgent.has_thread?(agent) == true + end + end +end diff --git a/test/jido/thread/journal_backed_adapter_test.exs b/test/jido/thread/journal_backed_adapter_test.exs new file mode 100644 index 0000000..4b9401d --- /dev/null +++ b/test/jido/thread/journal_backed_adapter_test.exs @@ -0,0 +1,189 @@ +defmodule Jido.Thread.Store.Adapters.JournalBackedTest do + use ExUnit.Case, async: true + + alias Jido.Thread + alias Jido.Thread.Store + alias Jido.Thread.Store.Adapters.JournalBacked + alias Jido.Thread.Entry + + describe "init/1" do + test "initializes with Journal and default InMemory adapter" do + assert {:ok, state} = JournalBacked.init([]) + assert %{journal: journal} = state + assert journal.adapter == Jido.Signal.Journal.Adapters.InMemory + end + + test "accepts custom journal adapter" do + assert {:ok, state} = JournalBacked.init(journal_adapter: Jido.Signal.Journal.Adapters.ETS) + assert %{journal: journal} = state + assert journal.adapter == Jido.Signal.Journal.Adapters.ETS + end + end + + describe "save/2 and load/2 roundtrip" do + test "preserves entries in correct seq order" do + {:ok, store} = Store.new(JournalBacked) + + thread = + Thread.new(id: "t1") + |> Thread.append(%{kind: :message, payload: %{role: "user", content: "First"}}) + |> Thread.append(%{kind: :message, payload: %{role: "assistant", content: "Second"}}) + |> Thread.append(%{kind: :tool_call, payload: %{name: "search", args: %{}}}) + + {:ok, store} = Store.save(store, thread) + {:ok, _store, loaded} = Store.load(store, "t1") + + assert loaded.id == "t1" + assert Thread.entry_count(loaded) == 3 + + entries = Thread.to_list(loaded) + assert Enum.at(entries, 0).seq == 0 + assert Enum.at(entries, 1).seq == 1 + assert Enum.at(entries, 2).seq == 2 + assert Enum.at(entries, 0).payload.content == "First" + assert Enum.at(entries, 1).payload.content == "Second" + assert Enum.at(entries, 2).payload.name == "search" + end + + test "preserves entry metadata" do + {:ok, store} = Store.new(JournalBacked) + + thread = + Thread.new(id: "meta-test") + |> Thread.append(%{ + kind: :message, + payload: %{role: "user", content: "Hello"}, + refs: %{signal_id: "sig_123", agent_id: "agent_456"} + }) + + {:ok, store} = Store.save(store, thread) + {:ok, _store, loaded} = Store.load(store, "meta-test") + + entry = Thread.last(loaded) + assert entry.refs == %{signal_id: "sig_123", agent_id: "agent_456"} + end + end + + describe "load/2" do + test "returns :not_found for missing thread" do + {:ok, store} = Store.new(JournalBacked) + + assert {:error, _store, :not_found} = Store.load(store, "nonexistent") + end + end + + describe "append/3" do + test "creates thread if missing" do + {:ok, store} = Store.new(JournalBacked) + + entry = %{kind: :message, payload: %{role: "user", content: "Hello"}} + {:ok, store, thread} = Store.append(store, "new-thread", entry) + + assert thread.id == "new-thread" + assert Thread.entry_count(thread) == 1 + assert Thread.last(thread).kind == :message + + {:ok, _store, loaded} = Store.load(store, "new-thread") + assert Thread.entry_count(loaded) == 1 + end + + test "appends to existing thread with correct seq" do + {:ok, store} = Store.new(JournalBacked) + + entry1 = %{kind: :message, payload: %{role: "user", content: "First"}} + {:ok, store, _thread} = Store.append(store, "t2", entry1) + + entry2 = %{kind: :message, payload: %{role: "assistant", content: "Second"}} + {:ok, store, thread} = Store.append(store, "t2", entry2) + + assert Thread.entry_count(thread) == 2 + assert Thread.get_entry(thread, 0).seq == 0 + assert Thread.get_entry(thread, 1).seq == 1 + assert Thread.get_entry(thread, 0).payload.content == "First" + assert Thread.get_entry(thread, 1).payload.content == "Second" + + {:ok, _store, loaded} = Store.load(store, "t2") + assert Thread.entry_count(loaded) == 2 + end + + test "handles multiple entries in single append" do + {:ok, store} = Store.new(JournalBacked) + + entries = [ + %{kind: :message, payload: %{role: "user", content: "One"}}, + %{kind: :message, payload: %{role: "assistant", content: "Two"}}, + %{kind: :message, payload: %{role: "user", content: "Three"}} + ] + + {:ok, _store, thread} = Store.append(store, "batch", entries) + + assert Thread.entry_count(thread) == 3 + assert Thread.get_entry(thread, 0).payload.content == "One" + assert Thread.get_entry(thread, 1).payload.content == "Two" + assert Thread.get_entry(thread, 2).payload.content == "Three" + end + end + + describe "entry kinds" do + test "survive encoding/decoding as atoms" do + {:ok, store} = Store.new(JournalBacked) + + kinds = [ + :message, + :tool_call, + :tool_result, + :signal_in, + :signal_out, + :note, + :error, + :checkpoint + ] + + thread = + Enum.reduce(kinds, Thread.new(id: "kinds-test"), fn kind, t -> + Thread.append(t, %{kind: kind, payload: %{test: true}}) + end) + + {:ok, store} = Store.save(store, thread) + {:ok, _store, loaded} = Store.load(store, "kinds-test") + + loaded_kinds = loaded |> Thread.to_list() |> Enum.map(& &1.kind) + assert loaded_kinds == kinds + end + end + + describe "multiple threads" do + test "stores and retrieves multiple threads independently" do + {:ok, store} = Store.new(JournalBacked) + + {:ok, store, _} = + Store.append(store, "thread-a", %{kind: :message, payload: %{content: "A"}}) + + {:ok, store, _} = + Store.append(store, "thread-b", %{kind: :message, payload: %{content: "B"}}) + + {:ok, store, _} = + Store.append(store, "thread-a", %{kind: :message, payload: %{content: "A2"}}) + + {:ok, store, thread_a} = Store.load(store, "thread-a") + {:ok, _store, thread_b} = Store.load(store, "thread-b") + + assert Thread.entry_count(thread_a) == 2 + assert Thread.entry_count(thread_b) == 1 + assert Thread.last(thread_a).payload.content == "A2" + assert Thread.last(thread_b).payload.content == "B" + end + end + + describe "Entry struct handling" do + test "handles Entry structs directly" do + {:ok, store} = Store.new(JournalBacked) + + entry = Entry.new(kind: :message, payload: %{role: "user", content: "Test"}) + {:ok, _store, thread} = Store.append(store, "entry-struct", [entry]) + + assert Thread.entry_count(thread) == 1 + assert Thread.last(thread).kind == :message + end + end +end diff --git a/test/jido/thread/store_test.exs b/test/jido/thread/store_test.exs new file mode 100644 index 0000000..d4836f9 --- /dev/null +++ b/test/jido/thread/store_test.exs @@ -0,0 +1,97 @@ +defmodule Jido.Thread.StoreTest do + use ExUnit.Case, async: true + + alias Jido.Thread + alias Jido.Thread.Store + + describe "Store.new/0" do + test "creates store with InMemory adapter" do + assert {:ok, %Store{adapter: Store.Adapters.InMemory}} = Store.new() + end + end + + describe "Store.save/2 and Store.load/2" do + test "roundtrip correctly" do + {:ok, store} = Store.new() + thread = Thread.new(id: "t1", metadata: %{user: "alice"}) + + {:ok, store} = Store.save(store, thread) + {:ok, _store, loaded} = Store.load(store, "t1") + + assert loaded.id == "t1" + assert loaded.metadata == %{user: "alice"} + end + end + + describe "Store.load/2" do + test "returns {:error, _, :not_found} for missing thread" do + {:ok, store} = Store.new() + + assert {:error, _store, :not_found} = Store.load(store, "nonexistent") + end + end + + describe "Store.append/3" do + test "creates thread if missing and appends entries" do + {:ok, store} = Store.new() + + entry = %{kind: :message, payload: %{role: "user", content: "Hello"}} + {:ok, store, thread} = Store.append(store, "t2", entry) + + assert thread.id == "t2" + assert Thread.entry_count(thread) == 1 + assert Thread.last(thread).kind == :message + + {:ok, _store, loaded} = Store.load(store, "t2") + assert loaded.id == "t2" + assert Thread.entry_count(loaded) == 1 + end + + test "appends to existing thread with correct seq" do + {:ok, store} = Store.new() + + entry1 = %{kind: :message, payload: %{role: "user", content: "First"}} + {:ok, store, _thread} = Store.append(store, "t3", entry1) + + entry2 = %{kind: :message, payload: %{role: "assistant", content: "Second"}} + {:ok, store, thread} = Store.append(store, "t3", entry2) + + assert Thread.entry_count(thread) == 2 + assert Thread.last(thread).seq == 1 + assert Thread.get_entry(thread, 0).payload.content == "First" + assert Thread.get_entry(thread, 1).payload.content == "Second" + + {:ok, _store, loaded} = Store.load(store, "t3") + assert Thread.entry_count(loaded) == 2 + end + end + + describe "Store.delete/2" do + test "removes thread" do + {:ok, store} = Store.new() + thread = Thread.new(id: "t4") + + {:ok, store} = Store.save(store, thread) + {:ok, store, _loaded} = Store.load(store, "t4") + + {:ok, store} = Store.delete(store, "t4") + assert {:error, _store, :not_found} = Store.load(store, "t4") + end + end + + describe "Store.list/1" do + test "returns all thread IDs" do + {:ok, store} = Store.new() + + {:ok, store, ids} = Store.list(store) + assert ids == [] + + {:ok, store} = Store.save(store, Thread.new(id: "t5")) + {:ok, store} = Store.save(store, Thread.new(id: "t6")) + {:ok, store} = Store.save(store, Thread.new(id: "t7")) + + {:ok, _store, ids} = Store.list(store) + assert Enum.sort(ids) == ["t5", "t6", "t7"] + end + end +end diff --git a/test/jido/thread/strategy_integration_test.exs b/test/jido/thread/strategy_integration_test.exs new file mode 100644 index 0000000..c90692c --- /dev/null +++ b/test/jido/thread/strategy_integration_test.exs @@ -0,0 +1,323 @@ +defmodule JidoTest.Thread.StrategyIntegrationTest do + use ExUnit.Case, async: true + + alias Jido.Agent + alias Jido.Thread + alias Jido.Thread.Agent, as: ThreadAgent + + defmodule SimpleAction do + @moduledoc false + use Jido.Action, + name: "simple_action", + schema: [] + + def run(_params, _context), do: {:ok, %{executed: true}} + end + + defmodule ValueAction do + @moduledoc false + use Jido.Action, + name: "value_action", + schema: [value: [type: :integer, required: true]] + + def run(%{value: value}, _context), do: {:ok, %{value: value}} + end + + defmodule FailingAction do + @moduledoc false + use Jido.Action, + name: "failing_action", + schema: [] + + def run(_params, _context), do: {:error, "intentional failure"} + end + + defmodule DirectTestAgent do + @moduledoc false + use Jido.Agent, + name: "direct_test_agent", + strategy: Jido.Agent.Strategy.Direct, + schema: [value: [type: :integer, default: 0]] + + def signal_routes, do: [] + end + + defmodule DirectThreadAgent do + @moduledoc false + use Jido.Agent, + name: "direct_thread_agent", + strategy: {Jido.Agent.Strategy.Direct, thread?: true}, + schema: [value: [type: :integer, default: 0]] + + def signal_routes, do: [] + end + + defmodule FSMTestAgent do + @moduledoc false + use Jido.Agent, + name: "fsm_test_agent", + strategy: Jido.Agent.Strategy.FSM, + schema: [value: [type: :integer, default: 0]] + + def signal_routes, do: [] + end + + defmodule FSMThreadAgent do + @moduledoc false + use Jido.Agent, + name: "fsm_thread_agent", + strategy: {Jido.Agent.Strategy.FSM, thread?: true}, + schema: [value: [type: :integer, default: 0]] + + def signal_routes, do: [] + end + + describe "Direct strategy without thread?" do + test "behavior unchanged, no thread created" do + agent = DirectTestAgent.new() + {updated, directives} = DirectTestAgent.cmd(agent, SimpleAction) + + assert updated.state.executed == true + assert directives == [] + refute ThreadAgent.has_thread?(updated) + end + + test "multiple actions execute without thread" do + agent = DirectTestAgent.new() + + {updated, _} = + DirectTestAgent.cmd(agent, [ + SimpleAction, + {ValueAction, %{value: 42}} + ]) + + assert updated.state.executed == true + assert updated.state.value == 42 + refute ThreadAgent.has_thread?(updated) + end + end + + describe "Direct strategy with thread?: true" do + test "creates thread and appends instruction_start/end entries" do + agent = DirectThreadAgent.new() + {updated, directives} = DirectThreadAgent.cmd(agent, SimpleAction) + + assert updated.state.executed == true + assert directives == [] + assert ThreadAgent.has_thread?(updated) + + thread = ThreadAgent.get(updated) + assert Thread.entry_count(thread) == 2 + + entries = Thread.to_list(thread) + [start_entry, end_entry] = entries + + assert start_entry.kind == :instruction_start + assert start_entry.payload.action == SimpleAction + + assert end_entry.kind == :instruction_end + assert end_entry.payload.action == SimpleAction + assert end_entry.payload.status == :ok + end + + test "tracks param keys but not values" do + agent = DirectThreadAgent.new() + {updated, _} = DirectThreadAgent.cmd(agent, {ValueAction, %{value: 42}}) + + thread = ThreadAgent.get(updated) + [start_entry, _end_entry] = Thread.to_list(thread) + + assert start_entry.payload.action == ValueAction + assert :value in start_entry.payload.param_keys + refute Map.has_key?(start_entry.payload, :value) + end + + test "records :error status on failing action" do + agent = DirectThreadAgent.new() + {updated, directives} = DirectThreadAgent.cmd(agent, FailingAction) + + assert [%Jido.Agent.Directive.Error{}] = directives + + thread = ThreadAgent.get(updated) + entries = Thread.to_list(thread) + end_entry = List.last(entries) + + assert end_entry.kind == :instruction_end + assert end_entry.payload.status == :error + end + + test "tracks multiple instructions in sequence" do + agent = DirectThreadAgent.new() + + {updated, _} = + DirectThreadAgent.cmd(agent, [SimpleAction, {ValueAction, %{value: 100}}]) + + thread = ThreadAgent.get(updated) + assert Thread.entry_count(thread) == 4 + + entries = Thread.to_list(thread) + kinds = Enum.map(entries, & &1.kind) + assert kinds == [:instruction_start, :instruction_end, :instruction_start, :instruction_end] + end + + test "continues tracking when thread already exists" do + agent = DirectTestAgent.new() + agent = ThreadAgent.ensure(agent) + agent = ThreadAgent.append(agent, %{kind: :note, payload: %{text: "existing"}}) + + {updated, _} = DirectTestAgent.cmd(agent, SimpleAction) + + thread = ThreadAgent.get(updated) + assert Thread.entry_count(thread) == 3 + + entries = Thread.to_list(thread) + assert hd(entries).kind == :note + end + end + + describe "FSM strategy without thread?" do + test "behavior unchanged, no thread created" do + agent = FSMTestAgent.new() + {updated, directives} = FSMTestAgent.cmd(agent, SimpleAction) + + assert updated.state.executed == true + assert directives == [] + refute ThreadAgent.has_thread?(updated) + end + + test "multiple actions execute without thread" do + agent = FSMTestAgent.new() + + {updated, _} = + FSMTestAgent.cmd(agent, [ + SimpleAction, + {ValueAction, %{value: 42}} + ]) + + assert updated.state.executed == true + assert updated.state.value == 42 + refute ThreadAgent.has_thread?(updated) + end + end + + describe "FSM strategy with thread?: true" do + test "creates thread and appends checkpoint entries for transitions" do + agent = FSMThreadAgent.new() + {updated, directives} = FSMThreadAgent.cmd(agent, SimpleAction) + + assert updated.state.executed == true + assert directives == [] + assert ThreadAgent.has_thread?(updated) + + thread = ThreadAgent.get(updated) + entries = Thread.to_list(thread) + + checkpoint_entries = Enum.filter(entries, &(&1.kind == :checkpoint)) + + # init checkpoint + 2 transition checkpoints + assert length(checkpoint_entries) == 3 + + [init_checkpoint | transition_checkpoints] = checkpoint_entries + assert init_checkpoint.payload.event == :init + assert init_checkpoint.payload.fsm_state == "idle" + + [processing_checkpoint, idle_checkpoint] = transition_checkpoints + assert processing_checkpoint.payload.event == :transition + assert processing_checkpoint.payload.fsm_state == "processing" + + assert idle_checkpoint.payload.event == :transition + assert idle_checkpoint.payload.fsm_state == "idle" + end + + test "tracks instruction_start/end entries alongside checkpoints" do + agent = FSMThreadAgent.new() + {updated, _} = FSMThreadAgent.cmd(agent, SimpleAction) + + thread = ThreadAgent.get(updated) + entries = Thread.to_list(thread) + + kinds = Enum.map(entries, & &1.kind) + assert :checkpoint in kinds + assert :instruction_start in kinds + assert :instruction_end in kinds + end + + test "records :error status on failing action" do + agent = FSMThreadAgent.new() + {updated, directives} = FSMThreadAgent.cmd(agent, FailingAction) + + assert [%Jido.Agent.Directive.Error{}] = directives + + thread = ThreadAgent.get(updated) + entries = Thread.to_list(thread) + + instruction_end = Enum.find(entries, &(&1.kind == :instruction_end)) + assert instruction_end.payload.status == :error + end + + test "tracks multiple instructions with checkpoints" do + agent = FSMThreadAgent.new() + + {updated, _} = + FSMThreadAgent.cmd(agent, [SimpleAction, {ValueAction, %{value: 100}}]) + + thread = ThreadAgent.get(updated) + entries = Thread.to_list(thread) + + checkpoint_count = Enum.count(entries, &(&1.kind == :checkpoint)) + instruction_start_count = Enum.count(entries, &(&1.kind == :instruction_start)) + instruction_end_count = Enum.count(entries, &(&1.kind == :instruction_end)) + + # 1 init + 2 transitions + assert checkpoint_count == 3 + assert instruction_start_count == 2 + assert instruction_end_count == 2 + end + + test "continues tracking when thread already exists" do + agent = FSMTestAgent.new() + agent = ThreadAgent.ensure(agent) + agent = ThreadAgent.append(agent, %{kind: :note, payload: %{text: "existing"}}) + + {updated, _} = FSMTestAgent.cmd(agent, SimpleAction) + + thread = ThreadAgent.get(updated) + entries = Thread.to_list(thread) + + assert hd(entries).kind == :note + assert Thread.entry_count(thread) > 1 + end + end + + describe "FSM init with thread?" do + test "appends checkpoint entry on init when thread? enabled" do + {:ok, agent} = Agent.new(%{id: "test"}) + + ctx = %{ + agent_module: FSMTestAgent, + strategy_opts: [thread?: true, initial_state: "idle"] + } + + {agent, _directives} = Jido.Agent.Strategy.FSM.init(agent, ctx) + + assert ThreadAgent.has_thread?(agent) + thread = ThreadAgent.get(agent) + entries = Thread.to_list(thread) + + assert length(entries) == 1 + [checkpoint] = entries + assert checkpoint.kind == :checkpoint + assert checkpoint.payload.event == :init + assert checkpoint.payload.fsm_state == "idle" + end + + test "no checkpoint when thread? not enabled" do + {:ok, agent} = Agent.new(%{id: "test"}) + ctx = %{agent_module: FSMTestAgent, strategy_opts: []} + + {agent, _directives} = Jido.Agent.Strategy.FSM.init(agent, ctx) + + refute ThreadAgent.has_thread?(agent) + end + end +end diff --git a/test/jido/thread_test.exs b/test/jido/thread_test.exs new file mode 100644 index 0000000..9f65e37 --- /dev/null +++ b/test/jido/thread_test.exs @@ -0,0 +1,348 @@ +defmodule JidoTest.ThreadTest do + use JidoTest.Case, async: true + + alias Jido.Thread + alias Jido.Thread.Entry + + describe "Entry.new/1" do + test "creates entry with defaults" do + entry = Entry.new(%{}) + + assert entry.seq == 0 + assert entry.kind == :note + assert entry.payload == %{} + assert entry.refs == %{} + assert is_integer(entry.at) + end + + test "creates entry from keyword list" do + entry = Entry.new(kind: :message, payload: %{role: "user"}) + + assert entry.kind == :message + assert entry.payload == %{role: "user"} + end + + test "creates entry with all attributes" do + now = System.system_time(:millisecond) + + entry = + Entry.new(%{ + id: "entry_123", + seq: 5, + at: now, + kind: :tool_call, + payload: %{name: "search"}, + refs: %{signal_id: "sig_1"} + }) + + assert entry.id == "entry_123" + assert entry.seq == 5 + assert entry.at == now + assert entry.kind == :tool_call + assert entry.payload == %{name: "search"} + assert entry.refs == %{signal_id: "sig_1"} + end + + test "accepts string keys in map" do + entry = Entry.new(%{"kind" => :error, "payload" => %{"msg" => "failed"}}) + + assert entry.kind == :error + assert entry.payload == %{"msg" => "failed"} + end + end + + describe "Thread.new/1" do + test "creates empty thread with defaults" do + thread = Thread.new() + + assert String.starts_with?(thread.id, "thread_") + assert thread.rev == 0 + assert thread.entries == [] + assert is_integer(thread.created_at) + assert is_integer(thread.updated_at) + assert thread.metadata == %{} + assert thread.stats == %{entry_count: 0} + end + + test "accepts custom id" do + thread = Thread.new(id: "my_thread") + + assert thread.id == "my_thread" + end + + test "accepts custom metadata" do + thread = Thread.new(metadata: %{user_id: "u1", session: "s1"}) + + assert thread.metadata == %{user_id: "u1", session: "s1"} + end + + test "accepts custom timestamp via now option" do + fixed_time = 1_700_000_000_000 + thread = Thread.new(now: fixed_time) + + assert thread.created_at == fixed_time + assert thread.updated_at == fixed_time + end + end + + describe "Thread.append/2" do + test "appends single entry as map" do + thread = + Thread.new() + |> Thread.append(%{kind: :message, payload: %{content: "hello"}}) + + assert Thread.entry_count(thread) == 1 + assert thread.rev == 1 + + entry = Thread.last(thread) + assert entry.seq == 0 + assert entry.kind == :message + assert entry.payload == %{content: "hello"} + end + + test "appends Entry struct" do + entry = Entry.new(kind: :note, payload: %{text: "annotation"}) + + thread = + Thread.new() + |> Thread.append(entry) + + assert Thread.entry_count(thread) == 1 + + appended = Thread.last(thread) + assert appended.kind == :note + assert appended.seq == 0 + end + + test "appends multiple entries as list" do + entries = [ + %{kind: :message, payload: %{role: "user"}}, + %{kind: :message, payload: %{role: "assistant"}} + ] + + thread = + Thread.new() + |> Thread.append(entries) + + assert Thread.entry_count(thread) == 2 + assert thread.rev == 2 + + [first, second] = Thread.to_list(thread) + assert first.seq == 0 + assert second.seq == 1 + end + + test "assigns monotonically increasing seq" do + thread = + Thread.new() + |> Thread.append(%{kind: :message}) + |> Thread.append(%{kind: :message}) + |> Thread.append(%{kind: :message}) + + seqs = thread |> Thread.to_list() |> Enum.map(& &1.seq) + assert seqs == [0, 1, 2] + end + + test "increments rev on each append" do + thread = Thread.new() + assert thread.rev == 0 + + thread = Thread.append(thread, %{kind: :message}) + assert thread.rev == 1 + + thread = Thread.append(thread, [%{kind: :message}, %{kind: :message}]) + assert thread.rev == 3 + end + + test "updates updated_at timestamp" do + old_time = 1_700_000_000_000 + thread = Thread.new(now: old_time) + + Process.sleep(1) + thread = Thread.append(thread, %{kind: :message}) + + assert thread.updated_at > old_time + end + + test "generates entry id if not provided" do + thread = + Thread.new() + |> Thread.append(%{kind: :message}) + + entry = Thread.last(thread) + assert String.starts_with?(entry.id, "entry_") + end + + test "preserves provided entry id" do + thread = + Thread.new() + |> Thread.append(%{id: "custom_id", kind: :message}) + + entry = Thread.last(thread) + assert entry.id == "custom_id" + end + end + + describe "Thread.entry_count/1" do + test "returns 0 for empty thread" do + thread = Thread.new() + assert Thread.entry_count(thread) == 0 + end + + test "returns correct count after appends" do + thread = + Thread.new() + |> Thread.append(%{kind: :message}) + |> Thread.append([%{kind: :message}, %{kind: :message}]) + + assert Thread.entry_count(thread) == 3 + end + end + + describe "Thread.last/1" do + test "returns nil for empty thread" do + thread = Thread.new() + assert Thread.last(thread) == nil + end + + test "returns last appended entry" do + thread = + Thread.new() + |> Thread.append(%{kind: :message, payload: %{n: 1}}) + |> Thread.append(%{kind: :message, payload: %{n: 2}}) + + last = Thread.last(thread) + assert last.payload == %{n: 2} + assert last.seq == 1 + end + end + + describe "Thread.get_entry/2" do + test "returns nil for non-existent seq" do + thread = Thread.new() + assert Thread.get_entry(thread, 0) == nil + end + + test "returns entry by seq" do + thread = + Thread.new() + |> Thread.append(%{kind: :message, payload: %{n: 0}}) + |> Thread.append(%{kind: :message, payload: %{n: 1}}) + |> Thread.append(%{kind: :message, payload: %{n: 2}}) + + entry = Thread.get_entry(thread, 1) + assert entry.payload == %{n: 1} + assert entry.seq == 1 + end + end + + describe "Thread.to_list/1" do + test "returns empty list for empty thread" do + thread = Thread.new() + assert Thread.to_list(thread) == [] + end + + test "returns all entries in order" do + thread = + Thread.new() + |> Thread.append(%{kind: :a}) + |> Thread.append(%{kind: :b}) + |> Thread.append(%{kind: :c}) + + kinds = thread |> Thread.to_list() |> Enum.map(& &1.kind) + assert kinds == [:a, :b, :c] + end + end + + describe "Thread.filter_by_kind/2" do + test "returns empty list when no matches" do + thread = + Thread.new() + |> Thread.append(%{kind: :message}) + + assert Thread.filter_by_kind(thread, :tool_call) == [] + end + + test "filters by single kind" do + thread = + Thread.new() + |> Thread.append(%{kind: :message}) + |> Thread.append(%{kind: :tool_call}) + |> Thread.append(%{kind: :message}) + + messages = Thread.filter_by_kind(thread, :message) + assert length(messages) == 2 + assert Enum.all?(messages, &(&1.kind == :message)) + end + + test "filters by multiple kinds" do + thread = + Thread.new() + |> Thread.append(%{kind: :message}) + |> Thread.append(%{kind: :tool_call}) + |> Thread.append(%{kind: :tool_result}) + |> Thread.append(%{kind: :note}) + + tools = Thread.filter_by_kind(thread, [:tool_call, :tool_result]) + assert length(tools) == 2 + assert Enum.all?(tools, &(&1.kind in [:tool_call, :tool_result])) + end + end + + describe "Thread.slice/3" do + test "returns empty list for empty thread" do + thread = Thread.new() + assert Thread.slice(thread, 0, 10) == [] + end + + test "returns entries in seq range inclusive" do + thread = + Thread.new() + |> Thread.append(%{kind: :a}) + |> Thread.append(%{kind: :b}) + |> Thread.append(%{kind: :c}) + |> Thread.append(%{kind: :d}) + |> Thread.append(%{kind: :e}) + + sliced = Thread.slice(thread, 1, 3) + assert length(sliced) == 3 + + kinds = Enum.map(sliced, & &1.kind) + assert kinds == [:b, :c, :d] + end + + test "handles out of bounds gracefully" do + thread = + Thread.new() + |> Thread.append(%{kind: :a}) + |> Thread.append(%{kind: :b}) + + sliced = Thread.slice(thread, 5, 10) + assert sliced == [] + end + + test "handles partial overlap" do + thread = + Thread.new() + |> Thread.append(%{kind: :a}) + |> Thread.append(%{kind: :b}) + |> Thread.append(%{kind: :c}) + + sliced = Thread.slice(thread, 1, 100) + assert length(sliced) == 2 + + kinds = Enum.map(sliced, & &1.kind) + assert kinds == [:b, :c] + end + end + + describe "immutability" do + test "append returns new thread without modifying original" do + original = Thread.new() + updated = Thread.append(original, %{kind: :message}) + + assert Thread.entry_count(original) == 0 + assert Thread.entry_count(updated) == 1 + end + end +end