nyo16 · nyo16 · May 1, 2026 · May 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,41 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.15.5] - 2026-05-01
+
+### Fixed
+
+- Both Req-based HTTP backends (`Nous.HTTP.Backend.Req` and
+  `Nous.HTTP.StreamBackend.Req`) now actually use the configured
+  `Nous.Finch` pool. Previously they ignored the `:finch_name` opt
+  built by `Nous.Provider` and let Req spin up its own default Finch
+  instance, leaving the supervised `Nous.Finch` pool (started by
+  `Nous.Application` with `size: 10, count: 1`) idle. Both backends
+  now read `:finch_name` from per-call opts, falling back to
+  `Application.get_env(:nous, :finch, Nous.Finch)`. Net effect:
+  `Nous.Finch` becomes the live default for both streaming and
+  non-streaming on Req, so pool tuning via app config actually takes
+  effect. (Note: Req disallows passing `:finch` together with
+  `:connect_options`; connect timeouts are now pool-level — configure
+  on the `Nous.Finch` pool itself if a non-default is needed.)
+
+### Changed
+
+- **Default timeouts increased to 3 minutes (180_000 ms) across the
+  board.** The previous 60s default routinely tripped on reasoning
+  models and longer completions. Affected:
+  - `Nous.Model` `receive_timeout` default → 180_000
+  - `Nous.Model.default_receive_timeout/1` per-provider:
+    cloud/custom → 180_000, llamacpp → 300_000 (up from 120_000)
+  - Provider `@default_timeout` (OpenAI, Anthropic, Mistral, VertexAI,
+    OpenAICompatible) → 180_000
+  - Provider `@streaming_timeout` (Anthropic, Mistral, VertexAI,
+    OpenAICompatible) → 300_000 (up from 120_000)
+  - HTTP backend defaults (Req + Hackney, both streaming and
+    non-streaming) → 180_000
+
+  Per-call `:timeout` / `:receive_timeout` opts continue to override.
+
 ## [0.15.4] - 2026-05-01
 
 Pluggable streaming HTTP backends + hackney 4 pull-mode bug fix.

diff --git a/lib/nous/http/backend/hackney.ex b/lib/nous/http/backend/hackney.ex
@@ -23,7 +23,9 @@ defmodule Nous.HTTP.Backend.Hackney do
 
   require Logger
 
-  @default_timeout 60_000
+  # 3 minutes — LLMs with reasoning/long completions routinely exceed
+  # the previous 60s default. Per-call `:timeout` opt overrides.
+  @default_timeout 180_000
   @default_connect_timeout 30_000
 
   @impl Nous.HTTP.Backend

diff --git a/lib/nous/http/backend/req.ex b/lib/nous/http/backend/req.ex
@@ -13,20 +13,25 @@ defmodule Nous.HTTP.Backend.Req do
 
   require Logger
 
-  @default_timeout 60_000
-  @default_connect_timeout 30_000
+  # 3 minutes — LLMs with reasoning/long completions routinely exceed
+  # the previous 60s default. Per-call `:timeout` opt overrides.
+  @default_timeout 180_000
 
   @impl Nous.HTTP.Backend
   def post(url, body, headers, opts \\ [])
       when is_binary(url) and is_map(body) and is_list(headers) do
     timeout = Keyword.get(opts, :timeout, @default_timeout)
-    connect_timeout = Keyword.get(opts, :connect_timeout, @default_connect_timeout)
+    finch_name = Keyword.get(opts, :finch_name) || Application.get_env(:nous, :finch, Nous.Finch)
 
+    # Note: Req disallows both `:finch` and `:connect_options` — connect
+    # timeouts are pool-level when using a named Finch pool. If callers
+    # need a custom connect timeout, configure it on the Finch pool
+    # itself (`Nous.Application` starts `Nous.Finch`).
     case Req.post(url,
            json: body,
            headers: headers,
            receive_timeout: timeout,
-           connect_options: [timeout: connect_timeout]
+           finch: finch_name
          ) do
       {:ok, %Req.Response{status: status, body: response_body}} when status in 200..299 ->
         {:ok, response_body}

diff --git a/lib/nous/http/stream_backend.ex b/lib/nous/http/stream_backend.ex
@@ -52,7 +52,7 @@ defmodule Nous.HTTP.StreamBackend do
   parsed events.
 
   ## Options
-    * `:timeout` — receive timeout in milliseconds (default: `60_000`)
+    * `:timeout` — receive timeout in milliseconds (default: `180_000`)
     * `:connect_timeout` — TCP connect timeout in milliseconds (default: `30_000`)
     * `:stream_parser` — module implementing `parse_buffer/1` for non-SSE
       formats (e.g. JSON-array streams). Defaults to SSE.

diff --git a/lib/nous/http/stream_backend/hackney.ex b/lib/nous/http/stream_backend/hackney.ex
@@ -44,7 +44,10 @@ defmodule Nous.HTTP.StreamBackend.Hackney do
 
   alias Nous.Providers.HTTP
 
-  @default_timeout 60_000
+  # 3 minutes — LLM streams (especially with reasoning) can sit silent
+  # between chunks long enough to trip a tighter timeout. Per-call
+  # `:timeout` opt overrides.
+  @default_timeout 180_000
   @default_connect_timeout 30_000
 
   @impl Nous.HTTP.StreamBackend

diff --git a/lib/nous/http/stream_backend/req.ex b/lib/nous/http/stream_backend/req.ex
@@ -32,22 +32,24 @@ defmodule Nous.HTTP.StreamBackend.Req do
 
   alias Nous.Providers.HTTP
 
-  @default_timeout 60_000
-  @default_connect_timeout 30_000
+  # 3 minutes — LLM streams (especially with reasoning) can sit silent
+  # between chunks long enough to trip a tighter timeout. Per-call
+  # `:timeout` opt overrides.
+  @default_timeout 180_000
 
   @impl Nous.HTTP.StreamBackend
   def stream(url, body, headers, opts \\ [])
 
   def stream(url, body, headers, opts)
       when is_binary(url) and is_map(body) and is_list(headers) do
     timeout = Keyword.get(opts, :timeout, @default_timeout)
-    connect_timeout = Keyword.get(opts, :connect_timeout, @default_connect_timeout)
     stream_parser = Keyword.get(opts, :stream_parser)
+    finch_name = Keyword.get(opts, :finch_name) || Application.get_env(:nous, :finch, Nous.Finch)
 
     parent = self()
     ref = make_ref()
 
-    task = start_request_task(url, body, headers, timeout, connect_timeout, parent, ref)
+    task = start_request_task(url, body, headers, timeout, finch_name, parent, ref)
 
     state = %{
       ref: ref,
@@ -69,14 +71,14 @@ defmodule Nous.HTTP.StreamBackend.Req do
     {:ok, stream}
   end
 
-  defp start_request_task(url, body, headers, timeout, connect_timeout, parent, ref) do
+  defp start_request_task(url, body, headers, timeout, finch_name, parent, ref) do
     Task.async(fn ->
       result =
         Req.post(url,
           json: body,
           headers: headers,
           receive_timeout: timeout,
-          connect_options: [timeout: connect_timeout],
+          finch: finch_name,
           into: fn {:data, chunk}, {req, resp} ->
             if resp.status in 200..299 do
               send(parent, {ref, {:chunk, chunk}})

diff --git a/lib/nous/model.ex b/lib/nous/model.ex
@@ -49,8 +49,9 @@ defmodule Nous.Model do
     :api_key,
     :organization,
     :stream_normalizer,
-    # 60 seconds default (OpenaiEx default is 15s which is too short for local models)
-    receive_timeout: 60_000,
+    # 3 minutes default — LLMs with reasoning/long completions routinely
+    # exceed shorter timeouts; local models can be slower still.
+    receive_timeout: 180_000,
     default_settings: %{}
   ]
 
@@ -216,7 +217,7 @@ defmodule Nous.Model do
     * `:base_url` - Custom API base URL
     * `:api_key` - API key (defaults to environment config)
     * `:organization` - Organization ID (for OpenAI)
-    * `:receive_timeout` - HTTP receive timeout in milliseconds (default: 60000).
+    * `:receive_timeout` - HTTP receive timeout in milliseconds (default: 180000).
       Increase this for local models that may take longer to respond.
     * `:default_settings` - Default model settings (temperature, max_tokens, etc.)
     * `:stream_normalizer` - Custom stream normalizer module implementing `Nous.StreamNormalizer` behaviour
@@ -302,10 +303,10 @@ defmodule Nous.Model do
   defp default_receive_timeout(:vllm), do: 120_000
   # 2 minutes for local SGLang
   defp default_receive_timeout(:sglang), do: 120_000
-  # 2 minutes for local LlamaCpp
-  defp default_receive_timeout(:llamacpp), do: 120_000
-  # 2 minutes for custom endpoints
-  defp default_receive_timeout(:custom), do: 120_000
-  # 60 seconds for cloud providers
-  defp default_receive_timeout(_provider), do: 60_000
+  # 5 minutes for local LlamaCpp (slow first-token on cold weights)
+  defp default_receive_timeout(:llamacpp), do: 300_000
+  # 3 minutes for custom endpoints
+  defp default_receive_timeout(:custom), do: 180_000
+  # 3 minutes for cloud providers (reasoning models can take a while)
+  defp default_receive_timeout(_provider), do: 180_000
 end
diff --git a/lib/nous/providers/anthropic.ex b/lib/nous/providers/anthropic.ex
@@ -48,8 +48,8 @@ defmodule Nous.Providers.Anthropic do
 
   @api_version "2023-06-01"
   @long_context_beta "context-1m-2025-08-07"
-  @default_timeout 60_000
-  @streaming_timeout 120_000
+  @default_timeout 180_000
+  @streaming_timeout 300_000
 
   @impl Nous.Provider
   def chat(params, opts \\ []) do

diff --git a/lib/nous/providers/http.ex b/lib/nous/providers/http.ex
@@ -82,7 +82,7 @@ defmodule Nous.Providers.HTTP do
 
   ## Options
     * `:backend` - Backend module (overrides env / config / default)
-    * `:timeout` - Request timeout in ms (default: 60_000)
+    * `:timeout` - Request timeout in ms (default: 180_000)
 
   ## Error Reasons
     * `%{status: integer(), body: term()}` - HTTP error response
@@ -158,7 +158,7 @@ defmodule Nous.Providers.HTTP do
 
   ## Options
     * `:stream_backend` - Backend module (overrides env / config / default)
-    * `:timeout` - Receive timeout in ms (default: 60_000)
+    * `:timeout` - Receive timeout in ms (default: 180_000)
     * `:connect_timeout` - TCP connect timeout in ms (default: 30_000)
     * `:stream_parser` - Module for parsing the stream buffer (default: SSE).
       Must implement `parse_buffer/1` returning `{events, remaining_buffer}`.

diff --git a/lib/nous/providers/mistral.ex b/lib/nous/providers/mistral.ex
@@ -45,8 +45,8 @@ defmodule Nous.Providers.Mistral do
 
   alias Nous.Providers.HTTP
 
-  @default_timeout 60_000
-  @streaming_timeout 120_000
+  @default_timeout 180_000
+  @streaming_timeout 300_000
 
   @impl Nous.Provider
   def chat(params, opts \\ []) do

diff --git a/lib/nous/providers/openai.ex b/lib/nous/providers/openai.ex
@@ -51,7 +51,7 @@ defmodule Nous.Providers.OpenAI do
 
   alias Nous.Providers.HTTP
 
-  @default_timeout 60_000
+  @default_timeout 180_000
   @streaming_timeout 120_000
 
   # Reasoning models have different requirements.

diff --git a/lib/nous/providers/openai_compatible.ex b/lib/nous/providers/openai_compatible.ex
@@ -132,8 +132,8 @@ defmodule Nous.Providers.OpenAICompatible do
 
   alias Nous.Providers.HTTP
 
-  @default_timeout 60_000
-  @streaming_timeout 120_000
+  @default_timeout 180_000
+  @streaming_timeout 300_000
 
   @impl Nous.Provider
   def chat(params, opts \\ []) do

diff --git a/lib/nous/providers/vertex_ai.ex b/lib/nous/providers/vertex_ai.ex
@@ -156,8 +156,8 @@ defmodule Nous.Providers.VertexAI do
 
   require Logger
 
-  @default_timeout 60_000
-  @streaming_timeout 120_000
+  @default_timeout 180_000
+  @streaming_timeout 300_000
 
   # Override to convert from generic format to Gemini's format (same API format)
   defp build_request_params(model, messages, settings) do

diff --git a/mix.exs b/mix.exs
@@ -1,7 +1,7 @@
 defmodule Nous.MixProject do
   use Mix.Project
 
-  @version "0.15.4"
+  @version "0.15.5"
   @source_url "https://github.com/nyo16/nous"
 
   def project do

diff --git a/test/nous/llm_test.exs b/test/nous/llm_test.exs
@@ -64,8 +64,8 @@ defmodule Nous.LLMTest do
       {:ok, _text} = Nous.LLM.generate_text("openai:gpt-4", "hi")
 
       [model] = CapturingDispatcher.get_models()
-      # OpenAI default is 60_000
-      assert model.receive_timeout == 60_000
+      # OpenAI default is 180_000 (3 minutes, bumped in 0.15.5)
+      assert model.receive_timeout == 180_000
     end
 
     test "with string model for local provider uses its default" do
@@ -102,7 +102,8 @@ defmodule Nous.LLMTest do
       _chunks = Enum.to_list(stream)
 
       [model] = CapturingDispatcher.get_models()
-      assert model.receive_timeout == 60_000
+      # OpenAI default is 180_000 (3 minutes, bumped in 0.15.5)
+      assert model.receive_timeout == 180_000
     end
 
     test "with %Model{} struct preserves receive_timeout" do

diff --git a/test/nous/model_test.exs b/test/nous/model_test.exs
@@ -89,11 +89,11 @@ defmodule Nous.ModelTest do
     end
 
     test "sets default receive_timeout based on provider" do
-      # Cloud providers get 60 seconds
+      # Cloud providers get 3 minutes (reasoning models can take a while)
       openai = Model.new(:openai, "gpt-4")
-      assert openai.receive_timeout == 60_000
+      assert openai.receive_timeout == 180_000
 
-      # Local providers get 120 seconds
+      # Local OpenAI-compatible servers get 2 minutes
       lmstudio = Model.new(:lmstudio, "qwen3")
       assert lmstudio.receive_timeout == 120_000