diff --git a/guides/openai.md b/guides/openai.md index c0189fa2..d432f509 100644 --- a/guides/openai.md +++ b/guides/openai.md @@ -8,6 +8,11 @@ Access GPT models including standard chat models and reasoning models (o1, o3, G OPENAI_API_KEY=sk-... ``` +## Attachments + +OpenAI Chat Completions API only supports image attachments (JPEG, PNG, GIF, WebP). +For document support (PDFs, etc.), use Anthropic or Google providers. + ## Dual API Architecture OpenAI provider automatically routes between two APIs based on model metadata: diff --git a/guides/xai.md b/guides/xai.md index a5eef949..94924a44 100644 --- a/guides/xai.md +++ b/guides/xai.md @@ -8,6 +8,11 @@ Access Grok models with real-time web search and reasoning capabilities. XAI_API_KEY=xai-... ``` +## Attachments + +xAI Chat Completions API only supports image attachments (JPEG, PNG, GIF, WebP). +For document support (PDFs, etc.), use Anthropic or Google providers. + ## Provider Options Passed via `:provider_options` keyword: diff --git a/lib/req_llm/provider/defaults.ex b/lib/req_llm/provider/defaults.ex index 842cc46b..5d203b80 100644 --- a/lib/req_llm/provider/defaults.ex +++ b/lib/req_llm/provider/defaults.ex @@ -739,6 +739,42 @@ defmodule ReqLLM.Provider.Defaults do defp merge_content_metadata(base, _), do: base + @image_mimes ~w(image/jpeg image/png image/gif image/webp) + + @doc """ + Validates that a context contains only image file attachments. + + Returns `:ok` if all file attachments are images (JPEG, PNG, GIF, WebP), + or `{:error, reason}` with a descriptive message if non-image files are found. + + This is used by providers like OpenAI and xAI that only support image attachments + via their Chat Completions API. + """ + @spec validate_image_only_attachments(ReqLLM.Context.t()) :: :ok | {:error, String.t()} + def validate_image_only_attachments(%ReqLLM.Context{messages: messages}) do + non_image_parts = + messages + |> Enum.flat_map(fn msg -> msg.content || [] end) + |> Enum.filter(fn part -> + part.type == :file and part.media_type not in @image_mimes + end) + + case non_image_parts do + [] -> + :ok + + parts -> + mimes = parts |> Enum.map(& &1.media_type) |> Enum.uniq() |> Enum.join(", ") + + {:error, + "This provider only supports image attachments (JPEG, PNG, GIF, WebP). " <> + "Found unsupported file types: #{mimes}. " <> + "Consider using Anthropic or Google for document support."} + end + end + + def validate_image_only_attachments(_), do: :ok + @doc """ Decodes OpenAI-format response body to ReqLLM.Response. diff --git a/lib/req_llm/providers/openai.ex b/lib/req_llm/providers/openai.ex index dfc486e9..4d41b3f1 100644 --- a/lib/req_llm/providers/openai.ex +++ b/lib/req_llm/providers/openai.ex @@ -295,6 +295,7 @@ defmodule ReqLLM.Providers.OpenAI do def prepare_request(:chat, model_spec, prompt, opts) do with {:ok, model} <- ReqLLM.model(model_spec), {:ok, context} <- ReqLLM.Context.normalize(prompt, opts), + :ok <- validate_attachments(context), opts_with_context = Keyword.put(opts, :context, context), http_opts = Keyword.get(opts, :req_http_options, []), {:ok, processed_opts} <- @@ -653,4 +654,14 @@ defmodule ReqLLM.Providers.OpenAI do end defp enforce_strict_schema_requirements(schema), do: schema + + defp validate_attachments(context) do + case ReqLLM.Provider.Defaults.validate_image_only_attachments(context) do + :ok -> + :ok + + {:error, message} -> + {:error, ReqLLM.Error.Invalid.Parameter.exception(parameter: message)} + end + end end diff --git a/lib/req_llm/providers/xai.ex b/lib/req_llm/providers/xai.ex index 82bbbda5..91b68fa4 100644 --- a/lib/req_llm/providers/xai.ex +++ b/lib/req_llm/providers/xai.ex @@ -191,6 +191,7 @@ defmodule ReqLLM.Providers.XAI do defp prepare_chat_request(model_spec, prompt, opts) do with {:ok, model} <- ReqLLM.model(model_spec), {:ok, context} <- ReqLLM.Context.normalize(prompt, opts), + :ok <- validate_attachments(context), opts_with_context = Keyword.put(opts, :context, context), http_opts = Keyword.get(opts, :req_http_options, []), {:ok, processed_opts} <- @@ -1114,4 +1115,14 @@ defmodule ReqLLM.Providers.XAI do end defp maybe_add_additional_properties(schema, false), do: schema + + defp validate_attachments(context) do + case ReqLLM.Provider.Defaults.validate_image_only_attachments(context) do + :ok -> + :ok + + {:error, message} -> + {:error, ReqLLM.Error.Invalid.Parameter.exception(parameter: message)} + end + end end diff --git a/test/providers/openai_test.exs b/test/providers/openai_test.exs index 5619a3de..2d6d8809 100644 --- a/test/providers/openai_test.exs +++ b/test/providers/openai_test.exs @@ -1120,6 +1120,69 @@ defmodule ReqLLM.Providers.OpenAITest do end end + describe "attachment validation" do + test "accepts image attachments" do + {:ok, model} = ReqLLM.model("openai:gpt-4o") + + image_part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "image.png", "image/png") + message = %ReqLLM.Message{role: :user, content: [image_part]} + context = %ReqLLM.Context{messages: [message]} + + {:ok, _request} = OpenAI.prepare_request(:chat, model, context, []) + end + + test "accepts jpeg, gif, and webp attachments" do + {:ok, model} = ReqLLM.model("openai:gpt-4o") + + for mime <- ~w(image/jpeg image/gif image/webp) do + part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "image", mime) + message = %ReqLLM.Message{role: :user, content: [part]} + context = %ReqLLM.Context{messages: [message]} + + assert {:ok, _request} = OpenAI.prepare_request(:chat, model, context, []) + end + end + + test "rejects PDF attachments with clear error" do + {:ok, model} = ReqLLM.model("openai:gpt-4o") + + pdf_part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "doc.pdf", "application/pdf") + message = %ReqLLM.Message{role: :user, content: [pdf_part]} + context = %ReqLLM.Context{messages: [message]} + + {:error, error} = OpenAI.prepare_request(:chat, model, context, []) + + assert %ReqLLM.Error.Invalid.Parameter{} = error + assert error.parameter =~ "only supports image attachments" + assert error.parameter =~ "application/pdf" + assert error.parameter =~ "Anthropic or Google" + end + + test "rejects text file attachments" do + {:ok, model} = ReqLLM.model("openai:gpt-4o") + + text_part = ReqLLM.Message.ContentPart.file("content", "file.txt", "text/plain") + message = %ReqLLM.Message{role: :user, content: [text_part]} + context = %ReqLLM.Context{messages: [message]} + + {:error, error} = OpenAI.prepare_request(:chat, model, context, []) + + assert %ReqLLM.Error.Invalid.Parameter{} = error + assert error.parameter =~ "text/plain" + end + + test "allows mixed text and image content" do + {:ok, model} = ReqLLM.model("openai:gpt-4o") + + text_part = ReqLLM.Message.ContentPart.text("Describe this image") + image_part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "image.png", "image/png") + message = %ReqLLM.Message{role: :user, content: [text_part, image_part]} + context = %ReqLLM.Context{messages: [message]} + + {:ok, _request} = OpenAI.prepare_request(:chat, model, context, []) + end + end + describe "ResponsesAPI tool encoding" do test "passes through built-in web_search tool definitions" do {:ok, model} = ReqLLM.model("openai:gpt-5-nano") diff --git a/test/providers/xai_test.exs b/test/providers/xai_test.exs index e514aab0..bf4e6ac2 100644 --- a/test/providers/xai_test.exs +++ b/test/providers/xai_test.exs @@ -668,6 +668,69 @@ defmodule ReqLLM.Providers.XAITest do end end + describe "attachment validation" do + test "accepts image attachments" do + {:ok, model} = ReqLLM.model("xai:grok-3") + + image_part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "image.png", "image/png") + message = %ReqLLM.Message{role: :user, content: [image_part]} + context = %ReqLLM.Context{messages: [message]} + + {:ok, _request} = XAI.prepare_request(:chat, model, context, []) + end + + test "accepts jpeg, gif, and webp attachments" do + {:ok, model} = ReqLLM.model("xai:grok-3") + + for mime <- ~w(image/jpeg image/gif image/webp) do + part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "image", mime) + message = %ReqLLM.Message{role: :user, content: [part]} + context = %ReqLLM.Context{messages: [message]} + + assert {:ok, _request} = XAI.prepare_request(:chat, model, context, []) + end + end + + test "rejects PDF attachments with clear error" do + {:ok, model} = ReqLLM.model("xai:grok-3") + + pdf_part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "doc.pdf", "application/pdf") + message = %ReqLLM.Message{role: :user, content: [pdf_part]} + context = %ReqLLM.Context{messages: [message]} + + {:error, error} = XAI.prepare_request(:chat, model, context, []) + + assert %ReqLLM.Error.Invalid.Parameter{} = error + assert error.parameter =~ "only supports image attachments" + assert error.parameter =~ "application/pdf" + assert error.parameter =~ "Anthropic or Google" + end + + test "rejects text file attachments" do + {:ok, model} = ReqLLM.model("xai:grok-3") + + text_part = ReqLLM.Message.ContentPart.file("content", "file.txt", "text/plain") + message = %ReqLLM.Message{role: :user, content: [text_part]} + context = %ReqLLM.Context{messages: [message]} + + {:error, error} = XAI.prepare_request(:chat, model, context, []) + + assert %ReqLLM.Error.Invalid.Parameter{} = error + assert error.parameter =~ "text/plain" + end + + test "allows mixed text and image content" do + {:ok, model} = ReqLLM.model("xai:grok-3") + + text_part = ReqLLM.Message.ContentPart.text("Describe this image") + image_part = ReqLLM.Message.ContentPart.file(<<1, 2, 3>>, "image.png", "image/png") + message = %ReqLLM.Message{role: :user, content: [text_part, image_part]} + context = %ReqLLM.Context{messages: [message]} + + {:ok, _request} = XAI.prepare_request(:chat, model, context, []) + end + end + describe "context validation" do test "multiple system messages should fail" do invalid_context =