posit-dev
diff --git a/‎chatlas/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎chatlas/__init__.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎chatlas/_auto.py‎
Lines changed: 2 additions & 1 deletion b/‎chatlas/_auto.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎chatlas/_chat.py‎
Lines changed: 11 additions & 6 deletions b/‎chatlas/_chat.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎chatlas/_content.py‎
Lines changed: 38 additions & 0 deletions b/‎chatlas/_content.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎chatlas/_provider.py‎
Lines changed: 6 additions & 0 deletions b/‎chatlas/_provider.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎chatlas/_provider_anthropic.py‎
Lines changed: 12 additions & 59 deletions b/‎chatlas/_provider_anthropic.py‎
Lines changed: 12 additions & 59 deletions
diff --git a/‎chatlas/_provider_github.py‎
Lines changed: 2 additions & 1 deletion b/‎chatlas/_provider_github.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎chatlas/_provider_google.py‎
Lines changed: 14 additions & 14 deletions b/‎chatlas/_provider_google.py‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎chatlas/_provider_ollama.py‎
Lines changed: 2 additions & 1 deletion b/‎chatlas/_provider_ollama.py‎
Lines changed: 2 additions & 1 deletion
@@ -27,7 +27,9 @@
 from ._provider_huggingface import ChatHuggingFace
 from ._provider_mistral import ChatMistral
 from ._provider_ollama import ChatOllama
-from ._provider_openai import ChatAzureOpenAI, ChatOpenAI
+from ._provider_openai import ChatOpenAI
+from ._provider_openai_azure import ChatAzureOpenAI
+from ._provider_openai_responses import ChatOpenAIResponses
 from ._provider_openrouter import ChatOpenRouter
 from ._provider_perplexity import ChatPerplexity
 from ._provider_portkey import ChatPortkey
@@ -59,6 +61,7 @@
     "ChatMistral",
     "ChatOllama",
     "ChatOpenAI",
+    "ChatOpenAIResponses",
     "ChatOpenRouter",
     "ChatAzureOpenAI",
     "ChatPerplexity",
 
@@ -17,7 +17,8 @@
 from ._provider_huggingface import ChatHuggingFace
 from ._provider_mistral import ChatMistral
 from ._provider_ollama import ChatOllama
-from ._provider_openai import ChatAzureOpenAI, ChatOpenAI
+from ._provider_openai import ChatOpenAI
+from ._provider_openai_azure import ChatAzureOpenAI
 from ._provider_openrouter import ChatOpenRouter
 from ._provider_perplexity import ChatPerplexity
 from ._provider_portkey import ChatPortkey
 
@@ -46,7 +46,7 @@
 from ._logging import log_tool_error
 from ._mcp_manager import MCPSessionManager
 from ._provider import ModelInfo, Provider, StandardModelParams, SubmitInputArgsT
-from ._tokens import compute_cost, get_token_pricing
+from ._tokens import compute_cost, get_token_pricing, tokens_log
 from ._tools import Tool, ToolRejectError
 from ._turn import Turn, user_turn
 from ._typing_extensions import TypedDict, TypeGuard
@@ -2210,12 +2210,11 @@ def emit(text: str | Content):
                 result,
                 has_data_model=data_model is not None,
             )
-
             if echo == "all":
                 emit_other_contents(turn, emit)
 
         else:
-            response = self.provider.chat_perform(
+            result = self.provider.chat_perform(
                 stream=False,
                 turns=[*self._turns, user_turn],
                 tools=self._tools,
@@ -2224,7 +2223,7 @@ def emit(text: str | Content):
             )
 
             turn = self.provider.value_turn(
-                response, has_data_model=data_model is not None
+                result, has_data_model=data_model is not None
             )
             if turn.text:
                 emit(turn.text)
@@ -2233,6 +2232,9 @@ def emit(text: str | Content):
             if echo == "all":
                 emit_other_contents(turn, emit)
 
+        turn.tokens = self.provider.value_tokens(result)
+        if turn.tokens is not None:
+            tokens_log(self.provider, turn.tokens)
         self._turns.extend([user_turn, turn])
 
     async def _submit_turns_async(
@@ -2277,7 +2279,7 @@ def emit(text: str | Content):
                 emit_other_contents(turn, emit)
 
         else:
-            response = await self.provider.chat_perform_async(
+            result = await self.provider.chat_perform_async(
                 stream=False,
                 turns=[*self._turns, user_turn],
                 tools=self._tools,
@@ -2286,7 +2288,7 @@ def emit(text: str | Content):
             )
 
             turn = self.provider.value_turn(
-                response, has_data_model=data_model is not None
+                result, has_data_model=data_model is not None
             )
             if turn.text:
                 emit(turn.text)
@@ -2295,6 +2297,9 @@ def emit(text: str | Content):
             if echo == "all":
                 emit_other_contents(turn, emit)
 
+        turn.tokens = self.provider.value_tokens(result)
+        if turn.tokens is not None:
+            tokens_log(self.provider, turn.tokens)
         self._turns.extend([user_turn, turn])
 
     def _invoke_tool(self, request: ContentToolRequest):
 
@@ -125,6 +125,7 @@ def from_tool(cls, tool: "Tool") -> "ToolInfo":
     "tool_result_resource",
     "json",
     "pdf",
+    "thinking",
 ]
 """
 A discriminated union of all content types.
@@ -682,6 +683,40 @@ def __repr__(self, indent: int = 0):
         return " " * indent + f"<ContentPDF size={len(self.data)}>"
 
 
+class ContentThinking(Content):
+    """
+    Thinking/reasoning content
+
+    This content type represents reasoning traces from models that support
+    extended thinking (like OpenAI's o-series models). The thinking content
+    is not meant to be sent back to the model but is useful for debugging
+    and understanding the model's reasoning process.
+
+    Parameters
+    ----------
+    thinking
+        The thinking/reasoning text from the model.
+    extra
+        Additional metadata associated with the thinking content (e.g.,
+        encrypted content, status information).
+    """
+
+    thinking: str
+    extra: Optional[dict[str, Any]] = None
+
+    content_type: ContentTypeEnum = "thinking"
+
+    def __str__(self):
+        return f"<thinking>\n{self.thinking}\n</thinking>\n"
+
+    def _repr_markdown_(self):
+        return self.__str__()
+
+    def __repr__(self, indent: int = 0):
+        preview = self.thinking[:50] + "..." if len(self.thinking) > 50 else self.thinking
+        return " " * indent + f"<ContentThinking thinking='{preview}'>"
+
+
 ContentUnion = Union[
     ContentText,
     ContentImageRemote,
@@ -692,6 +727,7 @@ def __repr__(self, indent: int = 0):
     ContentToolResultResource,
     ContentJson,
     ContentPDF,
+    ContentThinking,
 ]
 
 
@@ -724,6 +760,8 @@ def create_content(data: dict[str, Any]) -> ContentUnion:
         return ContentJson.model_validate(data)
     elif ct == "pdf":
         return ContentPDF.model_validate(data)
+    elif ct == "thinking":
+        return ContentThinking.model_validate(data)
     else:
         raise ValueError(f"Unknown content type: {ct}")
 
 
@@ -249,6 +249,12 @@ def value_turn(
         has_data_model: bool,
     ) -> Turn: ...
 
+    @abstractmethod
+    def value_tokens(
+        self,
+        completion: ChatCompletionDictT,
+    ) -> tuple[int, int, int] | None: ...
+
     @abstractmethod
     def token_count(
         self,
 
@@ -30,7 +30,7 @@
     StandardModelParamNames,
     StandardModelParams,
 )
-from ._tokens import get_token_pricing, tokens_log
+from ._tokens import get_token_pricing
 from ._tools import Tool, basemodel_to_param_schema
 from ._turn import Turn, user_turn
 from ._utils import split_http_client_kwargs
@@ -242,28 +242,6 @@ def list_models(self):
 
         return res
 
-    @overload
-    def chat_perform(
-        self,
-        *,
-        stream: Literal[False],
-        turns: list[Turn],
-        tools: dict[str, Tool],
-        data_model: Optional[type[BaseModel]] = None,
-        kwargs: Optional["SubmitInputArgs"] = None,
-    ): ...
-
-    @overload
-    def chat_perform(
-        self,
-        *,
-        stream: Literal[True],
-        turns: list[Turn],
-        tools: dict[str, Tool],
-        data_model: Optional[type[BaseModel]] = None,
-        kwargs: Optional["SubmitInputArgs"] = None,
-    ): ...
-
     def chat_perform(
         self,
         *,
@@ -276,28 +254,6 @@ def chat_perform(
         kwargs = self._chat_perform_args(stream, turns, tools, data_model, kwargs)
         return self._client.messages.create(**kwargs)  # type: ignore
 
-    @overload
-    async def chat_perform_async(
-        self,
-        *,
-        stream: Literal[False],
-        turns: list[Turn],
-        tools: dict[str, Tool],
-        data_model: Optional[type[BaseModel]] = None,
-        kwargs: Optional["SubmitInputArgs"] = None,
-    ): ...
-
-    @overload
-    async def chat_perform_async(
-        self,
-        *,
-        stream: Literal[True],
-        turns: list[Turn],
-        tools: dict[str, Tool],
-        data_model: Optional[type[BaseModel]] = None,
-        kwargs: Optional["SubmitInputArgs"] = None,
-    ): ...
-
     async def chat_perform_async(
         self,
         *,
@@ -411,6 +367,17 @@ def stream_turn(self, completion, has_data_model) -> Turn:
     def value_turn(self, completion, has_data_model) -> Turn:
         return self._as_turn(completion, has_data_model)
 
+    def value_tokens(self, completion):
+        usage = completion.usage
+        # N.B. Currently, Anthropic doesn't cache by default and we currently do not support
+        # manual caching in chatlas. Note also that this only tracks reads, NOT writes, which
+        # have their own cost. To track that properly, we would need another caching category and per-token cost.
+        return (
+            completion.usage.input_tokens,
+            completion.usage.output_tokens,
+            usage.cache_read_input_tokens if usage.cache_read_input_tokens else 0,
+        )
+
     def token_count(
         self,
         *args: Content | str,
@@ -619,23 +586,9 @@ def _as_turn(self, completion: Message, has_data_model=False) -> Turn:
                         )
                     )
 
-        usage = completion.usage
-        # N.B. Currently, Anthropic doesn't cache by default and we currently do not support
-        # manual caching in chatlas. Note also that this only tracks reads, NOT writes, which
-        # have their own cost. To track that properly, we would need another caching category and per-token cost.
-
-        tokens = (
-            completion.usage.input_tokens,
-            completion.usage.output_tokens,
-            usage.cache_read_input_tokens if usage.cache_read_input_tokens else 0,
-        )
-
-        tokens_log(self, tokens)
-
         return Turn(
             "assistant",
             contents,
-            tokens=tokens,
             finish_reason=completion.stop_reason,
             completion=completion,
         )
 
@@ -7,7 +7,8 @@
 
 from ._chat import Chat
 from ._logging import log_model_default
-from ._provider_openai import ModelInfo, OpenAIProvider
+from ._provider import ModelInfo
+from ._provider_openai import OpenAIProvider
 from ._utils import MISSING, MISSING_TYPE, is_testing
 
 if TYPE_CHECKING:
 
@@ -22,7 +22,7 @@
 from ._logging import log_model_default
 from ._merge import merge_dicts
 from ._provider import ModelInfo, Provider, StandardModelParamNames, StandardModelParams
-from ._tokens import get_token_pricing, tokens_log
+from ._tokens import get_token_pricing
 from ._tools import Tool
 from ._turn import Turn, user_turn
 
@@ -228,6 +228,7 @@ def chat_perform(
 
     def chat_perform(
         self,
+        *,
         stream: bool,
         turns: list[Turn],
         tools: dict[str, Tool],
@@ -264,6 +265,7 @@ async def chat_perform_async(
 
     async def chat_perform_async(
         self,
+        *,
         stream: bool,
         turns: list[Turn],
         tools: dict[str, Tool],
@@ -349,6 +351,17 @@ def value_turn(self, completion, has_data_model) -> Turn:
         completion = cast("GenerateContentResponseDict", completion.model_dump())
         return self._as_turn(completion, has_data_model)
 
+    def value_tokens(self, completion):
+        usage = completion.get("usage_metadata")
+        if usage is None:
+            return None
+        cached = usage.get("cached_content_token_count") or 0
+        return (
+            (usage.get("prompt_token_count") or 0) - cached,
+            usage.get("candidates_token_count") or 0,
+            usage.get("cached_content_token_count") or 0,
+        )
+
     def token_count(
         self,
         *args: Content | str,
@@ -528,25 +541,12 @@ def _as_turn(
                         )
                     )
 
-        usage = message.get("usage_metadata")
-        tokens = (0, 0, 0)
-        if usage:
-            cached = usage.get("cached_content_token_count") or 0
-            tokens = (
-                (usage.get("prompt_token_count") or 0) - cached,
-                usage.get("candidates_token_count") or 0,
-                usage.get("cached_content_token_count") or 0,
-            )
-
-        tokens_log(self, tokens)
-
         if isinstance(finish_reason, FinishReason):
             finish_reason = finish_reason.name
 
         return Turn(
             "assistant",
             contents,
-            tokens=tokens,
             finish_reason=finish_reason,
             completion=message,
         )
 
@@ -7,7 +7,8 @@
 import orjson
 
 from ._chat import Chat
-from ._provider_openai import ModelInfo, OpenAIProvider
+from ._provider import ModelInfo
+from ._provider_openai import OpenAIProvider
 from ._utils import MISSING_TYPE, is_testing
 
 if TYPE_CHECKING: