diff --git a/CHANGELOG.md b/CHANGELOG.md index 20d9a77..73a94b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,41 @@ All notable changes to this project will be documented in this file. +## [0.15.5] - 2026-05-01 + +### Fixed + +- Both Req-based HTTP backends (`Nous.HTTP.Backend.Req` and + `Nous.HTTP.StreamBackend.Req`) now actually use the configured + `Nous.Finch` pool. Previously they ignored the `:finch_name` opt + built by `Nous.Provider` and let Req spin up its own default Finch + instance, leaving the supervised `Nous.Finch` pool (started by + `Nous.Application` with `size: 10, count: 1`) idle. Both backends + now read `:finch_name` from per-call opts, falling back to + `Application.get_env(:nous, :finch, Nous.Finch)`. Net effect: + `Nous.Finch` becomes the live default for both streaming and + non-streaming on Req, so pool tuning via app config actually takes + effect. (Note: Req disallows passing `:finch` together with + `:connect_options`; connect timeouts are now pool-level — configure + on the `Nous.Finch` pool itself if a non-default is needed.) + +### Changed + +- **Default timeouts increased to 3 minutes (180_000 ms) across the + board.** The previous 60s default routinely tripped on reasoning + models and longer completions. Affected: + - `Nous.Model` `receive_timeout` default → 180_000 + - `Nous.Model.default_receive_timeout/1` per-provider: + cloud/custom → 180_000, llamacpp → 300_000 (up from 120_000) + - Provider `@default_timeout` (OpenAI, Anthropic, Mistral, VertexAI, + OpenAICompatible) → 180_000 + - Provider `@streaming_timeout` (Anthropic, Mistral, VertexAI, + OpenAICompatible) → 300_000 (up from 120_000) + - HTTP backend defaults (Req + Hackney, both streaming and + non-streaming) → 180_000 + + Per-call `:timeout` / `:receive_timeout` opts continue to override. + ## [0.15.4] - 2026-05-01 Pluggable streaming HTTP backends + hackney 4 pull-mode bug fix. diff --git a/lib/nous/http/backend/hackney.ex b/lib/nous/http/backend/hackney.ex index 18fb211..1380adb 100644 --- a/lib/nous/http/backend/hackney.ex +++ b/lib/nous/http/backend/hackney.ex @@ -23,7 +23,9 @@ defmodule Nous.HTTP.Backend.Hackney do require Logger - @default_timeout 60_000 + # 3 minutes — LLMs with reasoning/long completions routinely exceed + # the previous 60s default. Per-call `:timeout` opt overrides. + @default_timeout 180_000 @default_connect_timeout 30_000 @impl Nous.HTTP.Backend diff --git a/lib/nous/http/backend/req.ex b/lib/nous/http/backend/req.ex index 636aa42..653a9d4 100644 --- a/lib/nous/http/backend/req.ex +++ b/lib/nous/http/backend/req.ex @@ -13,20 +13,25 @@ defmodule Nous.HTTP.Backend.Req do require Logger - @default_timeout 60_000 - @default_connect_timeout 30_000 + # 3 minutes — LLMs with reasoning/long completions routinely exceed + # the previous 60s default. Per-call `:timeout` opt overrides. + @default_timeout 180_000 @impl Nous.HTTP.Backend def post(url, body, headers, opts \\ []) when is_binary(url) and is_map(body) and is_list(headers) do timeout = Keyword.get(opts, :timeout, @default_timeout) - connect_timeout = Keyword.get(opts, :connect_timeout, @default_connect_timeout) + finch_name = Keyword.get(opts, :finch_name) || Application.get_env(:nous, :finch, Nous.Finch) + # Note: Req disallows both `:finch` and `:connect_options` — connect + # timeouts are pool-level when using a named Finch pool. If callers + # need a custom connect timeout, configure it on the Finch pool + # itself (`Nous.Application` starts `Nous.Finch`). case Req.post(url, json: body, headers: headers, receive_timeout: timeout, - connect_options: [timeout: connect_timeout] + finch: finch_name ) do {:ok, %Req.Response{status: status, body: response_body}} when status in 200..299 -> {:ok, response_body} diff --git a/lib/nous/http/stream_backend.ex b/lib/nous/http/stream_backend.ex index cd4b8a6..cc899ab 100644 --- a/lib/nous/http/stream_backend.ex +++ b/lib/nous/http/stream_backend.ex @@ -52,7 +52,7 @@ defmodule Nous.HTTP.StreamBackend do parsed events. ## Options - * `:timeout` — receive timeout in milliseconds (default: `60_000`) + * `:timeout` — receive timeout in milliseconds (default: `180_000`) * `:connect_timeout` — TCP connect timeout in milliseconds (default: `30_000`) * `:stream_parser` — module implementing `parse_buffer/1` for non-SSE formats (e.g. JSON-array streams). Defaults to SSE. diff --git a/lib/nous/http/stream_backend/hackney.ex b/lib/nous/http/stream_backend/hackney.ex index 2568cec..1233e36 100644 --- a/lib/nous/http/stream_backend/hackney.ex +++ b/lib/nous/http/stream_backend/hackney.ex @@ -44,7 +44,10 @@ defmodule Nous.HTTP.StreamBackend.Hackney do alias Nous.Providers.HTTP - @default_timeout 60_000 + # 3 minutes — LLM streams (especially with reasoning) can sit silent + # between chunks long enough to trip a tighter timeout. Per-call + # `:timeout` opt overrides. + @default_timeout 180_000 @default_connect_timeout 30_000 @impl Nous.HTTP.StreamBackend diff --git a/lib/nous/http/stream_backend/req.ex b/lib/nous/http/stream_backend/req.ex index 96fe252..554e899 100644 --- a/lib/nous/http/stream_backend/req.ex +++ b/lib/nous/http/stream_backend/req.ex @@ -32,8 +32,10 @@ defmodule Nous.HTTP.StreamBackend.Req do alias Nous.Providers.HTTP - @default_timeout 60_000 - @default_connect_timeout 30_000 + # 3 minutes — LLM streams (especially with reasoning) can sit silent + # between chunks long enough to trip a tighter timeout. Per-call + # `:timeout` opt overrides. + @default_timeout 180_000 @impl Nous.HTTP.StreamBackend def stream(url, body, headers, opts \\ []) @@ -41,13 +43,13 @@ defmodule Nous.HTTP.StreamBackend.Req do def stream(url, body, headers, opts) when is_binary(url) and is_map(body) and is_list(headers) do timeout = Keyword.get(opts, :timeout, @default_timeout) - connect_timeout = Keyword.get(opts, :connect_timeout, @default_connect_timeout) stream_parser = Keyword.get(opts, :stream_parser) + finch_name = Keyword.get(opts, :finch_name) || Application.get_env(:nous, :finch, Nous.Finch) parent = self() ref = make_ref() - task = start_request_task(url, body, headers, timeout, connect_timeout, parent, ref) + task = start_request_task(url, body, headers, timeout, finch_name, parent, ref) state = %{ ref: ref, @@ -69,14 +71,14 @@ defmodule Nous.HTTP.StreamBackend.Req do {:ok, stream} end - defp start_request_task(url, body, headers, timeout, connect_timeout, parent, ref) do + defp start_request_task(url, body, headers, timeout, finch_name, parent, ref) do Task.async(fn -> result = Req.post(url, json: body, headers: headers, receive_timeout: timeout, - connect_options: [timeout: connect_timeout], + finch: finch_name, into: fn {:data, chunk}, {req, resp} -> if resp.status in 200..299 do send(parent, {ref, {:chunk, chunk}}) diff --git a/lib/nous/model.ex b/lib/nous/model.ex index 2f77d5c..da8c89c 100644 --- a/lib/nous/model.ex +++ b/lib/nous/model.ex @@ -49,8 +49,9 @@ defmodule Nous.Model do :api_key, :organization, :stream_normalizer, - # 60 seconds default (OpenaiEx default is 15s which is too short for local models) - receive_timeout: 60_000, + # 3 minutes default — LLMs with reasoning/long completions routinely + # exceed shorter timeouts; local models can be slower still. + receive_timeout: 180_000, default_settings: %{} ] @@ -216,7 +217,7 @@ defmodule Nous.Model do * `:base_url` - Custom API base URL * `:api_key` - API key (defaults to environment config) * `:organization` - Organization ID (for OpenAI) - * `:receive_timeout` - HTTP receive timeout in milliseconds (default: 60000). + * `:receive_timeout` - HTTP receive timeout in milliseconds (default: 180000). Increase this for local models that may take longer to respond. * `:default_settings` - Default model settings (temperature, max_tokens, etc.) * `:stream_normalizer` - Custom stream normalizer module implementing `Nous.StreamNormalizer` behaviour @@ -302,10 +303,10 @@ defmodule Nous.Model do defp default_receive_timeout(:vllm), do: 120_000 # 2 minutes for local SGLang defp default_receive_timeout(:sglang), do: 120_000 - # 2 minutes for local LlamaCpp - defp default_receive_timeout(:llamacpp), do: 120_000 - # 2 minutes for custom endpoints - defp default_receive_timeout(:custom), do: 120_000 - # 60 seconds for cloud providers - defp default_receive_timeout(_provider), do: 60_000 + # 5 minutes for local LlamaCpp (slow first-token on cold weights) + defp default_receive_timeout(:llamacpp), do: 300_000 + # 3 minutes for custom endpoints + defp default_receive_timeout(:custom), do: 180_000 + # 3 minutes for cloud providers (reasoning models can take a while) + defp default_receive_timeout(_provider), do: 180_000 end diff --git a/lib/nous/providers/anthropic.ex b/lib/nous/providers/anthropic.ex index cc6c4d3..85f8358 100644 --- a/lib/nous/providers/anthropic.ex +++ b/lib/nous/providers/anthropic.ex @@ -48,8 +48,8 @@ defmodule Nous.Providers.Anthropic do @api_version "2023-06-01" @long_context_beta "context-1m-2025-08-07" - @default_timeout 60_000 - @streaming_timeout 120_000 + @default_timeout 180_000 + @streaming_timeout 300_000 @impl Nous.Provider def chat(params, opts \\ []) do diff --git a/lib/nous/providers/http.ex b/lib/nous/providers/http.ex index bad54d5..da7991d 100644 --- a/lib/nous/providers/http.ex +++ b/lib/nous/providers/http.ex @@ -82,7 +82,7 @@ defmodule Nous.Providers.HTTP do ## Options * `:backend` - Backend module (overrides env / config / default) - * `:timeout` - Request timeout in ms (default: 60_000) + * `:timeout` - Request timeout in ms (default: 180_000) ## Error Reasons * `%{status: integer(), body: term()}` - HTTP error response @@ -158,7 +158,7 @@ defmodule Nous.Providers.HTTP do ## Options * `:stream_backend` - Backend module (overrides env / config / default) - * `:timeout` - Receive timeout in ms (default: 60_000) + * `:timeout` - Receive timeout in ms (default: 180_000) * `:connect_timeout` - TCP connect timeout in ms (default: 30_000) * `:stream_parser` - Module for parsing the stream buffer (default: SSE). Must implement `parse_buffer/1` returning `{events, remaining_buffer}`. diff --git a/lib/nous/providers/mistral.ex b/lib/nous/providers/mistral.ex index b0c8abc..94d19b7 100644 --- a/lib/nous/providers/mistral.ex +++ b/lib/nous/providers/mistral.ex @@ -45,8 +45,8 @@ defmodule Nous.Providers.Mistral do alias Nous.Providers.HTTP - @default_timeout 60_000 - @streaming_timeout 120_000 + @default_timeout 180_000 + @streaming_timeout 300_000 @impl Nous.Provider def chat(params, opts \\ []) do diff --git a/lib/nous/providers/openai.ex b/lib/nous/providers/openai.ex index c7b294d..7f8ac84 100644 --- a/lib/nous/providers/openai.ex +++ b/lib/nous/providers/openai.ex @@ -51,7 +51,7 @@ defmodule Nous.Providers.OpenAI do alias Nous.Providers.HTTP - @default_timeout 60_000 + @default_timeout 180_000 @streaming_timeout 120_000 # Reasoning models have different requirements. diff --git a/lib/nous/providers/openai_compatible.ex b/lib/nous/providers/openai_compatible.ex index 6931874..6d25e2a 100644 --- a/lib/nous/providers/openai_compatible.ex +++ b/lib/nous/providers/openai_compatible.ex @@ -132,8 +132,8 @@ defmodule Nous.Providers.OpenAICompatible do alias Nous.Providers.HTTP - @default_timeout 60_000 - @streaming_timeout 120_000 + @default_timeout 180_000 + @streaming_timeout 300_000 @impl Nous.Provider def chat(params, opts \\ []) do diff --git a/lib/nous/providers/vertex_ai.ex b/lib/nous/providers/vertex_ai.ex index 70e1dcf..8fdc08e 100644 --- a/lib/nous/providers/vertex_ai.ex +++ b/lib/nous/providers/vertex_ai.ex @@ -156,8 +156,8 @@ defmodule Nous.Providers.VertexAI do require Logger - @default_timeout 60_000 - @streaming_timeout 120_000 + @default_timeout 180_000 + @streaming_timeout 300_000 # Override to convert from generic format to Gemini's format (same API format) defp build_request_params(model, messages, settings) do diff --git a/mix.exs b/mix.exs index 1e47f68..b0aca1e 100644 --- a/mix.exs +++ b/mix.exs @@ -1,7 +1,7 @@ defmodule Nous.MixProject do use Mix.Project - @version "0.15.4" + @version "0.15.5" @source_url "https://github.com/nyo16/nous" def project do diff --git a/test/nous/llm_test.exs b/test/nous/llm_test.exs index afa8df0..52a5967 100644 --- a/test/nous/llm_test.exs +++ b/test/nous/llm_test.exs @@ -64,8 +64,8 @@ defmodule Nous.LLMTest do {:ok, _text} = Nous.LLM.generate_text("openai:gpt-4", "hi") [model] = CapturingDispatcher.get_models() - # OpenAI default is 60_000 - assert model.receive_timeout == 60_000 + # OpenAI default is 180_000 (3 minutes, bumped in 0.15.5) + assert model.receive_timeout == 180_000 end test "with string model for local provider uses its default" do @@ -102,7 +102,8 @@ defmodule Nous.LLMTest do _chunks = Enum.to_list(stream) [model] = CapturingDispatcher.get_models() - assert model.receive_timeout == 60_000 + # OpenAI default is 180_000 (3 minutes, bumped in 0.15.5) + assert model.receive_timeout == 180_000 end test "with %Model{} struct preserves receive_timeout" do diff --git a/test/nous/model_test.exs b/test/nous/model_test.exs index 0db079f..39a0a93 100644 --- a/test/nous/model_test.exs +++ b/test/nous/model_test.exs @@ -89,11 +89,11 @@ defmodule Nous.ModelTest do end test "sets default receive_timeout based on provider" do - # Cloud providers get 60 seconds + # Cloud providers get 3 minutes (reasoning models can take a while) openai = Model.new(:openai, "gpt-4") - assert openai.receive_timeout == 60_000 + assert openai.receive_timeout == 180_000 - # Local providers get 120 seconds + # Local OpenAI-compatible servers get 2 minutes lmstudio = Model.new(:lmstudio, "qwen3") assert lmstudio.receive_timeout == 120_000