nyo16 · nyo16 · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,85 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.15.8] - 2026-05-06
+
+### Fixed
+
+- **Vertex AI / Gemini whitespace text parts no longer crash the
+  request pipeline.** Gemini occasionally returns `text` parts whose
+  content is only newlines (e.g. `"\n\n\n"`) — typically between tool
+  calls or as filler when the model is blocked. Ecto's default
+  `:empty_values` for `cast/3` treats whitespace-only strings as
+  empty, so `Nous.Message.ContentPart`'s changeset dropped the
+  `content` field entirely and then raised
+  `%Ecto.InvalidChangesetError{errors: [content: {"content is required",
+  []}]}` from `ContentPart.new!/1`, taking down the whole
+  `Nous.LLM.run_with_tools/6` call. `ContentPart` now overrides
+  `:empty_values` to `[""]` so legitimate whitespace content is
+  preserved, and `Nous.Messages.Gemini.parse_content/1` defensively
+  skips whitespace-only text parts to avoid creating useless
+  `ContentPart`s. The streaming normalizer (`Nous.StreamNormalizer.Gemini`)
+  already had this guard; the non-streaming path is now consistent.
+- **`Nous.Messages.Gemini.parse_content/1` no longer silently drops
+  function calls without `args`.** Nullary tool calls
+  (`%{"functionCall" => %{"name" => "get_time"}}`) were falling into
+  the catch-all clause and disappearing. Pattern now requires only
+  `name` and falls back to `%{}` for `args`, matching the behavior of
+  the sibling `parse_parts/1` helper.
+
+### Added
+
+- **`Nous.Errors.RetryInfo`** parses server-suggested retry hints from
+  provider error responses. Checks `error.details[]` for
+  `google.rpc.RetryInfo` (Vertex AI / Gemini) first, then the
+  `Retry-After` HTTP header. Returns delay in milliseconds, or `nil`
+  when no hint is available — `nil` is itself meaningful for Google
+  APIs, since long-term/daily quota exhaustion deliberately omits
+  `RetryInfo` to discourage retry loops.
+- **`Nous.Errors.ProviderError` gains `:retry_after_ms`** alongside
+  the existing `:status_code`. `Nous.Provider.request/3` and
+  `request_stream/3` now populate both fields automatically when the
+  underlying HTTP layer returns an error tuple, so callers can branch
+  on rate-limit hints without parsing provider-specific bodies:
+
+  ```elixir
+  case Nous.LLM.run_with_tools(...) do
+    {:error, %Nous.Errors.ProviderError{retry_after_ms: ms}} when is_integer(ms) ->
+      {:snooze, ms}                     # use server-suggested delay
+    {:error, %Nous.Errors.ProviderError{status_code: 429}} ->
+      {:snooze, exp_backoff(attempt)}   # rate-limited, no hint
+    ...
+  end
+  ```
+
+- **Gemini/Vertex `finishReason` and `promptFeedback` are surfaced.**
+  `Nous.Messages.Gemini.from_response/1` now stores both in
+  `message.metadata` (when present) and emits a `Logger.warning` when
+  the candidate produced empty content for a non-STOP reason
+  (`SAFETY`, `RECITATION`, `MAX_TOKENS`, etc.) or when the prompt was
+  blocked. Previously these signals were discarded, so blocked
+  generations manifested as silent empty messages with no diagnostic.
+
+### Changed
+
+- **HTTP error tuples now carry response headers.**
+  `Nous.HTTP.Backend.Req`, `Nous.HTTP.Backend.Hackney`, and
+  `Nous.HTTP.StreamBackend.Req` previously returned
+  `{:error, %{status, body}}` and dropped headers entirely, which made
+  it impossible to read `Retry-After`. They now return
+  `{:error, %{status, body, headers}}` with `headers` as a list of
+  `{name, value}` tuples (lowercased per HTTP spec, both string).
+  Existing pattern matches on `%{status: _, body: _}` continue to work
+  since map matching is non-exhaustive.
+- **Gemini tool-call ID generation unified.**
+  `Nous.Messages.Gemini.parse_content/1` previously used
+  `"gemini_#{:rand.uniform(10_000)}"` (~50% birthday-paradox collision
+  at ~118 calls) while `parse_parts/1` used
+  `"call_#{:rand.uniform(1_000_000)}"` — two formats, two ranges. Both
+  now share a `generate_tool_call_id/0` helper using 64 bits of
+  `:crypto.strong_rand_bytes/1`, base64url-encoded with the
+  `gemini_` prefix preserved.
+
 ## [0.15.7] - 2026-05-05
 
 ### Changed

diff --git a/lib/nous/errors.ex b/lib/nous/errors.ex
@@ -85,21 +85,33 @@ defmodule Nous.Errors do
     Error from an LLM provider.
 
     Raised when a provider API call fails.
+
+    ## Fields
+
+      * `:provider` — provider id atom (e.g., `:vertex_ai`)
+      * `:status_code` — HTTP status when applicable
+      * `:retry_after_ms` — server-suggested backoff in milliseconds, parsed
+        from the response body (`google.rpc.RetryInfo`) or `Retry-After`
+        header. `nil` when the failure is not retry-hinted (e.g. daily quota
+        exhaustion deliberately omits `RetryInfo` per Google's spec).
+      * `:details` — raw error payload from the HTTP layer
     """
 
-    defexception [:message, :provider, :status_code, :details]
+    defexception [:message, :provider, :status_code, :retry_after_ms, :details]
 
     @type t :: %__MODULE__{
             message: String.t(),
             provider: atom() | nil,
             status_code: integer() | nil,
+            retry_after_ms: pos_integer() | nil,
             details: any()
           }
 
     @impl true
     def exception(opts) when is_list(opts) do
       provider = Keyword.get(opts, :provider)
       status_code = Keyword.get(opts, :status_code)
+      retry_after_ms = Keyword.get(opts, :retry_after_ms)
       details = Keyword.get(opts, :details)
 
       message =
@@ -112,6 +124,7 @@ defmodule Nous.Errors do
         message: message,
         provider: provider,
         status_code: status_code,
+        retry_after_ms: retry_after_ms,
         details: details
       }
     end

diff --git a/lib/nous/errors/retry_info.ex b/lib/nous/errors/retry_info.ex
@@ -0,0 +1,103 @@
+defmodule Nous.Errors.RetryInfo do
+  @moduledoc """
+  Parse server-suggested retry delays from provider error responses.
+
+  Two sources are checked, body first then headers:
+
+  1. **Body** — Google APIs (Vertex AI, Gemini) embed
+     `google.rpc.RetryInfo` inside `error.details[]` with a
+     `retryDelay` field as a `google.protobuf.Duration` string
+     (e.g. `"34s"`, `"1.500s"`).
+  2. **Headers** — Standard HTTP `Retry-After` (RFC 7231). Integer
+     seconds is supported; HTTP-date form is intentionally not handled
+     here as no LLM provider in production uses it for rate limits.
+
+  Returns the suggested delay in **milliseconds**, or `nil` when no
+  hint is available. A missing hint is itself meaningful for Google
+  APIs — daily/long-term quota exhaustion deliberately omits
+  `RetryInfo` to discourage retry loops, so callers should treat
+  `nil` as "do not auto-retry".
+  """
+
+  @retry_info_type "type.googleapis.com/google.rpc.RetryInfo"
+
+  @doc """
+  Extract a retry delay (ms) from an HTTP error tuple's payload.
+
+  Accepts the shape produced by `Nous.HTTP.Backend` implementations:
+  `%{status: integer, body: term, headers: list}`. Missing fields are
+  tolerated.
+
+  ## Examples
+
+      iex> RetryInfo.parse(%{
+      ...>   status: 429,
+      ...>   body: %{"error" => %{"details" => [
+      ...>     %{"@type" => "type.googleapis.com/google.rpc.RetryInfo",
+      ...>       "retryDelay" => "34s"}
+      ...>   ]}}
+      ...> })
+      34_000
+
+      iex> RetryInfo.parse(%{status: 429, headers: [{"retry-after", "60"}]})
+      60_000
+
+      iex> RetryInfo.parse(%{status: 429, body: %{"error" => %{"message" => "rate limited"}}})
+      nil
+  """
+  @spec parse(any()) :: pos_integer() | nil
+  def parse(%{} = error) do
+    body = Map.get(error, :body)
+    headers = Map.get(error, :headers, [])
+
+    from_body(body) || from_headers(headers)
+  end
+
+  def parse(_), do: nil
+
+  # ---------------------------------------------------------------------------
+  # Body: google.rpc.RetryInfo inside error.details[]
+
+  defp from_body(%{"error" => %{"details" => details}}) when is_list(details) do
+    Enum.find_value(details, fn
+      %{"@type" => @retry_info_type, "retryDelay" => delay} -> parse_duration(delay)
+      _ -> nil
+    end)
+  end
+
+  defp from_body(_), do: nil
+
+  # google.protobuf.Duration: "<seconds>s" — int or fractional.
+  defp parse_duration(s) when is_binary(s) do
+    case Float.parse(s) do
+      {seconds, "s"} when seconds > 0 -> trunc(seconds * 1000)
+      _ -> nil
+    end
+  end
+
+  defp parse_duration(_), do: nil
+
+  # ---------------------------------------------------------------------------
+  # Headers: Retry-After (case-insensitive)
+
+  defp from_headers(headers) when is_list(headers) do
+    Enum.find_value(headers, fn
+      {k, v} ->
+        if to_string(k) |> String.downcase() == "retry-after" do
+          parse_retry_after(to_string(v))
+        end
+
+      _ ->
+        nil
+    end)
+  end
+
+  defp from_headers(_), do: nil
+
+  defp parse_retry_after(s) do
+    case Integer.parse(s) do
+      {seconds, ""} when seconds > 0 -> seconds * 1000
+      _ -> nil
+    end
+  end
+end
diff --git a/lib/nous/http/backend.ex b/lib/nous/http/backend.ex
@@ -10,8 +10,11 @@ defmodule Nous.HTTP.Backend do
   `docs/benchmarks/http_backend.md` for performance characteristics.
 
   Custom backends just need to implement `c:post/4` and return one of:
-  `{:ok, decoded_body}` for 2xx, `{:error, %{status: status, body: body}}`
-  for 4xx/5xx, or `{:error, term()}` for transport / decode failures.
+  `{:ok, decoded_body}` for 2xx,
+  `{:error, %{status: status, body: body, headers: headers}}` for 4xx/5xx
+  (headers as a list of `{name, value}` tuples — used by
+  `Nous.Errors.RetryInfo` to extract `Retry-After` hints), or
+  `{:error, term()}` for transport / decode failures.
 
   Streaming requests do NOT go through this behaviour — those always
   use hackney's `:async, :once` mode for backpressure (see

diff --git a/lib/nous/http/backend/hackney.ex b/lib/nous/http/backend/hackney.ex
@@ -63,19 +63,25 @@ defmodule Nous.HTTP.Backend.Hackney do
       {:ok, status, _resp_headers, body_bin} when status in 200..299 ->
         {:ok, decode_body(body_bin)}
 
-      {:ok, status, _resp_headers, body_bin} ->
+      {:ok, status, resp_headers, body_bin} ->
         decoded = decode_body(body_bin)
 
         Logger.warning("HTTP request failed with status #{status}: #{truncate_for_log(decoded)}")
 
-        {:error, %{status: status, body: decoded}}
+        # Headers surfaced for Nous.Errors.RetryInfo. Hackney returns names
+        # and values as charlists; convert to strings for downstream parsing.
+        {:error, %{status: status, body: decoded, headers: stringify_headers(resp_headers)}}
 
       {:error, reason} = err ->
         Logger.error("Hackney request error: #{inspect(reason)}")
         err
     end
   end
 
+  defp stringify_headers(headers) when is_list(headers) do
+    Enum.map(headers, fn {k, v} -> {to_string(k), to_string(v)} end)
+  end
+
   defp decode_body(""), do: %{}
 
   defp decode_body(bin) when is_binary(bin) do

diff --git a/lib/nous/http/backend/req.ex b/lib/nous/http/backend/req.ex
@@ -36,12 +36,16 @@ defmodule Nous.HTTP.Backend.Req do
       {:ok, %Req.Response{status: status, body: response_body}} when status in 200..299 ->
         {:ok, response_body}
 
-      {:ok, %Req.Response{status: status, body: response_body}} ->
+      {:ok, %Req.Response{status: status, body: response_body, headers: resp_headers}} ->
         Logger.warning(
           "HTTP request failed with status #{status}: #{truncate_for_log(response_body)}"
         )
 
-        {:error, %{status: status, body: response_body}}
+        # Headers are surfaced (Retry-After etc.) so Nous.Errors.RetryInfo
+        # can extract server-suggested backoff. Req returns headers as a
+        # map of lowercased name => list of values; flatten to a list of
+        # {name, value} pairs to match the shape other layers expect.
+        {:error, %{status: status, body: response_body, headers: normalize_headers(resp_headers)}}
 
       {:error, %Mint.TransportError{reason: reason} = error} ->
         Logger.error("Transport error: #{inspect(reason)}")
@@ -53,6 +57,12 @@ defmodule Nous.HTTP.Backend.Req do
     end
   end
 
+  # Req returns headers as %{"name" => ["value", ...]}. Flatten into the
+  # [{name, value}] shape that downstream consumers (RetryInfo) expect.
+  defp normalize_headers(headers) when is_map(headers) do
+    Enum.flat_map(headers, fn {k, vs} -> Enum.map(vs, &{k, &1}) end)
+  end
+
   defp truncate_for_log(data) when is_binary(data) do
     if byte_size(data) > 500 do
       String.slice(data, 0, 500) <> "... (truncated)"

diff --git a/lib/nous/http/stream_backend/req.ex b/lib/nous/http/stream_backend/req.ex
@@ -95,9 +95,19 @@ defmodule Nous.HTTP.StreamBackend.Req do
         {:ok, %Req.Response{status: status}} when status in 200..299 ->
           send(parent, {ref, :done})
 
-        {:ok, %Req.Response{status: status, body: response_body}} ->
+        {:ok, %Req.Response{status: status, body: response_body, headers: resp_headers}} ->
           Logger.error("Req stream got error status #{status}")
-          send(parent, {ref, {:error, %{status: status, body: response_body}}})
+
+          send(
+            parent,
+            {ref,
+             {:error,
+              %{
+                status: status,
+                body: response_body,
+                headers: normalize_headers(resp_headers)
+              }}}
+          )
 
         {:error, reason} ->
           Logger.error("Req stream error: #{inspect(reason)}")
@@ -172,6 +182,12 @@ defmodule Nous.HTTP.StreamBackend.Req do
     end
   end
 
+  # Mirror Nous.HTTP.Backend.Req.normalize_headers/1 — flatten the map shape
+  # Req returns into [{name, value}] tuples that RetryInfo expects.
+  defp normalize_headers(headers) when is_map(headers) do
+    Enum.flat_map(headers, fn {k, vs} -> Enum.map(vs, &{k, &1}) end)
+  end
+
   defp cleanup(%{task: nil}), do: :ok
 
   defp cleanup(%{task: task}) do

diff --git a/lib/nous/message/content_part.ex b/lib/nous/message/content_part.ex
@@ -473,8 +473,11 @@ defmodule Nous.Message.ContentPart do
   # Private functions
 
   defp changeset(content_part, attrs) do
+    # Override Ecto's default :empty_values, which treats whitespace-only
+    # strings as empty and drops them. Gemini/Vertex sometimes returns text
+    # parts that are just newlines, and they're legitimate content here.
     content_part
-    |> cast(attrs, [:type, :content, :options])
+    |> cast(attrs, [:type, :content, :options], empty_values: [""])
     |> validate_required([:type])
     |> validate_content()
   end
@@ -490,9 +493,6 @@ defmodule Nous.Message.ContentPart do
       {_, nil} ->
         add_error(changeset, :content, "content is required")
 
-      {_, ""} ->
-        add_error(changeset, :content, "content cannot be empty")
-
       {:image_url, content} ->
         validate_image_url(changeset, content)