chore: models update 2024-08-18 (#59)

mdjastrzebski · web-flow · commit 1b1182265d9c · 2024-08-18T21:18:23.000+02:00
diff --git a/.yarn/releases/yarn-4.2.1.cjs b/.yarn/releases/yarn-4.2.1.cjs
diff --git a/.yarn/releases/yarn-4.4.0.cjs b/.yarn/releases/yarn-4.4.0.cjs
diff --git a/.yarnrc.yml b/.yarnrc.yml
@@ -6,4 +6,4 @@ nmHoistingLimits: workspaces
 
 nodeLinker: node-modules
 
-yarnPath: .yarn/releases/yarn-4.2.1.cjs
+yarnPath: .yarn/releases/yarn-4.4.0.cjs
diff --git a/package.json b/package.json
@@ -41,20 +41,20 @@
     "registry": "https://registry.npmjs.org/"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.24.3",
+    "@anthropic-ai/sdk": "^0.26.1",
     "@inkjs/ui": "^1.0.0",
-    "@mistralai/mistralai": "^0.5.0",
+    "@mistralai/mistralai": "^1.0.2",
     "chalk": "^5.3.0",
     "date-fns": "^3.6.0",
     "dotenv": "^16.4.5",
     "ink": "^4.4.1",
     "ink-link": "^3.0.0",
     "ink-text-input": "^5.0.1",
-    "openai": "^4.52.2",
+    "openai": "^4.56.0",
     "prompts": "^2.4.2",
     "react": "^18.2.0",
     "redent": "^4.0.0",
-    "tiktoken": "^1.0.15",
+    "tiktoken": "^1.0.16",
     "update-notifier": "^7.0.0",
     "yargs": "^17.7.2",
     "zod": "^3.23.6",
@@ -80,7 +80,7 @@
     "release-it": "^15.11.0",
     "typescript": "^5.4.5"
   },
-  "packageManager": "yarn@4.2.1",
+  "packageManager": "yarn@4.4.0",
   "engines": {
     "node": ">= 18.0.0"
   },
diff --git a/src/commands/chat/state/init.ts b/src/commands/chat/state/init.ts
@@ -23,7 +23,7 @@ export function initChatState(
 
   const modelOrAlias = options.model ?? providerFileConfig.model;
   const model = modelOrAlias
-    ? provider.modelAliases[modelOrAlias] ?? modelOrAlias
+    ? (provider.modelAliases[modelOrAlias] ?? modelOrAlias)
     : provider.defaultModel;
 
   const systemPrompt = providerFileConfig.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
diff --git a/src/engine/providers/anthropic.ts b/src/engine/providers/anthropic.ts
@@ -21,10 +21,13 @@ const Anthropic: Provider = {
   // Price per 1M tokens [input, output].
   // Source: https://docs.anthropic.com/en/docs/about-claude/models
   modelPricing: {
-    'claude-3-haiku-20240307': { inputTokensCost: 0.25, outputTokensCost: 1.25 },
+    // Current models
     'claude-3-5-sonnet-20240620': { inputTokensCost: 3.0, outputTokensCost: 15.0 },
+    'claude-3-haiku-20240307': { inputTokensCost: 0.25, outputTokensCost: 1.25 },
     'claude-3-sonnet-20240229': { inputTokensCost: 3.0, outputTokensCost: 15.0 },
     'claude-3-opus-20240229': { inputTokensCost: 15.0, outputTokensCost: 75.0 },
+
+    // Legacy models
     'claude-2.1': { inputTokensCost: 8.0, outputTokensCost: 24.0 },
     'claude-2.0': { inputTokensCost: 8.0, outputTokensCost: 24.0 },
     'claude-instant-1.2': { inputTokensCost: 0.8, outputTokensCost: 2.4 },
diff --git a/src/engine/providers/mistral.ts b/src/engine/providers/mistral.ts
@@ -1,4 +1,5 @@
-import MistralClient, { type ChatCompletionResponseChunk } from '@mistralai/mistralai';
+import { Mistral as MistralClient } from '@mistralai/mistralai';
+import type { CompletionEvent } from '@mistralai/mistralai/models/components/completionevent.js';
 import { type Message, type ModelResponseUpdate } from '../inference.js';
 import { estimateInputTokens, estimateOutputTokens } from '../tokenizer.js';
 import { responseStyles, type ProviderConfig } from './config.js';
@@ -9,54 +10,56 @@ const Mistral: Provider = {
   name: 'mistral',
   apiKeyUrl: 'https://console.mistral.ai/api-keys/',
 
-  // OpenAI models: https://docs.mistral.ai/platform/endpoints/
+  // Mistral models: https://docs.mistral.ai/getting-started/models/
   defaultModel: 'mistral-large-latest',
 
   // Price per 1M tokens [input, output].
   // Source: https://docs.mistral.ai/platform/pricing/
   modelPricing: {
+    // Current models
+    'open-mistral-nemo': { inputTokensCost: 0.3, outputTokensCost: 0.3 },
+    'open-mistral-nemo-2407': { inputTokensCost: 0.3, outputTokensCost: 0.3 },
+    'mistral-large-latest': { inputTokensCost: 3, outputTokensCost: 9 },
+    'mistral-large-2407': { inputTokensCost: 3, outputTokensCost: 9 },
+    'codestral-latest': { inputTokensCost: 1, outputTokensCost: 3 },
+    'codestral-2405': { inputTokensCost: 1, outputTokensCost: 3 },
+
+    // Legacy models
     'open-mistral-7b': { inputTokensCost: 0.25, outputTokensCost: 0.25 },
     'open-mixtral-8x7b': { inputTokensCost: 0.7, outputTokensCost: 0.7 },
     'open-mixtral-8x22b': { inputTokensCost: 2, outputTokensCost: 6 },
     'mistral-small-latest': { inputTokensCost: 1, outputTokensCost: 3 },
     'mistral-small-2402': { inputTokensCost: 1, outputTokensCost: 3 },
     'mistral-medium-latest': { inputTokensCost: 2.7, outputTokensCost: 8.1 },
     'mistral-medium-2312': { inputTokensCost: 2.7, outputTokensCost: 8.1 },
-    'mistral-large-latest': { inputTokensCost: 4, outputTokensCost: 12 },
-    'mistral-large-2402': { inputTokensCost: 4, outputTokensCost: 12 },
-    'codestral-latest': { inputTokensCost: 1, outputTokensCost: 3 },
-    'codestral-2405': { inputTokensCost: 1, outputTokensCost: 3 },
   },
 
   modelAliases: {
-    mistral: 'open-mistral-7b',
-    mixtral: 'open-mixtral-8x22b',
-    small: 'mistral-small-latest',
-    medium: 'mistral-medium-latest',
+    nemo: 'open-mistral-nemo-2407',
     large: 'mistral-large-latest',
     codestral: 'codestral-latest',
   },
 
   getChatCompletion: async (config: ProviderConfig, messages: Message[]) => {
-    const api = new MistralClient(config.apiKey);
+    const api = new MistralClient({ apiKey: config.apiKey });
     const allMessages = getMessages(config, messages);
 
     const startTime = performance.now();
-    const response = await api.chat({
-      messages: allMessages,
+    const response = await api.chat.complete({
       model: config.model,
+      messages: allMessages,
       ...getMistralResponseStyle(config),
     });
     const responseTime = performance.now() - startTime;
 
     return {
       message: {
         role: 'assistant',
-        content: response.choices[0]?.message.content ?? '',
+        content: response.choices?.[0]?.message?.content ?? '',
       },
       usage: {
-        inputTokens: response.usage?.prompt_tokens ?? 0,
-        outputTokens: response.usage?.completion_tokens ?? 0,
+        inputTokens: response.usage.promptTokens,
+        outputTokens: response.usage.completionTokens,
         requests: 1,
       },
       responseTime,
@@ -70,21 +73,21 @@ const Mistral: Provider = {
     messages: Message[],
     onResponseUpdate: (update: ModelResponseUpdate) => void,
   ) {
-    const api = new MistralClient(config.apiKey);
+    const api = new MistralClient({ apiKey: config.apiKey });
     const allMessages = getMessages(config, messages);
 
     const startTime = performance.now();
-    const stream = await api.chatStream({
+    const stream = await api.chat.stream({
       messages: allMessages,
       model: config.model,
       ...getMistralResponseStyle(config),
     });
 
-    let lastChunk: ChatCompletionResponseChunk | null = null;
+    let lastChunk: CompletionEvent | null = null;
     let content = '';
     for await (const chunk of stream) {
       lastChunk = chunk;
-      content += chunk.choices[0]?.delta?.content || '';
+      content += chunk.data.choices[0]?.delta?.content || '';
       onResponseUpdate({ content });
     }
 
@@ -96,12 +99,12 @@ const Mistral: Provider = {
         content,
       },
       usage: {
-        inputTokens: lastChunk?.usage?.prompt_tokens ?? estimateInputTokens(allMessages),
-        outputTokens: lastChunk?.usage?.completion_tokens ?? estimateOutputTokens(content),
+        inputTokens: lastChunk?.data.usage?.promptTokens ?? estimateInputTokens(allMessages),
+        outputTokens: lastChunk?.data.usage?.completionTokens ?? estimateOutputTokens(content),
         requests: 1,
       },
       responseTime,
-      responseModel: lastChunk?.model || 'unknown',
+      responseModel: lastChunk?.data.model || 'unknown',
       data: lastChunk,
     };
   },
diff --git a/src/engine/providers/open-ai.ts b/src/engine/providers/open-ai.ts
@@ -10,12 +10,16 @@ const OpenAi: Provider = {
   apiKeyUrl: 'https://platform.openai.com/api-keys',
 
   // OpenAI models: https://platform.openai.com/docs/models
-  defaultModel: 'gpt-4o',
+  defaultModel: 'gpt-4o-2024-08-06',
 
   // Price per 1M tokens [input, output].
   // Source: https://openai.com/pricing
   modelPricing: {
     'gpt-4o': { inputTokensCost: 5, outputTokensCost: 15 },
+    'gpt-4o-2024-08-06': { inputTokensCost: 2.5, outputTokensCost: 10 },
+    'gpt-4o-2024-05-13': { inputTokensCost: 5, outputTokensCost: 15 },
+    'gpt-4o-mini': { inputTokensCost: 0.15, outputTokensCost: 0.6 },
+    'gpt-4o-mini-2024-07-18': { inputTokensCost: 0.15, outputTokensCost: 0.6 },
     'gpt-4-turbo': { inputTokensCost: 10, outputTokensCost: 30 },
     'gpt-4-turbo-2024-04-09': { inputTokensCost: 10, outputTokensCost: 30 },
     'gpt-4': { inputTokensCost: 30, outputTokensCost: 60 },
@@ -25,10 +29,7 @@ const OpenAi: Provider = {
     'gpt-3.5-turbo-instruct': { inputTokensCost: 1.5, outputTokensCost: 2.0 },
   },
 
-  modelAliases: {
-    'gpt-4-turbo-preview': 'gpt-4-turbo',
-    'gpt-3.5': 'gpt-3.5-turbo',
-  },
+  modelAliases: {},
 
   getChatCompletion: async (config: ProviderConfig, messages: Message[]) => {
     const api = new OpenAI({
diff --git a/src/engine/providers/perplexity.ts b/src/engine/providers/perplexity.ts
@@ -10,35 +10,37 @@ const Perplexity: Provider = {
   apiKeyUrl: 'https://perplexity.ai/settings/api',
 
   // Perplexity models: https://docs.perplexity.ai/docs/model-cards
-  defaultModel: 'llama-3-sonar-large-32k-chat',
+  defaultModel: 'llama-3.1-sonar-huge-128k-online',
 
   // Price per 1M tokens [input, output], per 1k requests.
   // Source: https://docs.perplexity.ai/docs/model-cards
   // Source: https://docs.perplexity.ai/docs/pricing
   modelPricing: {
-    'llama-3-sonar-small-32k-chat': { inputTokensCost: 0.2, outputTokensCost: 0.2 },
-    'llama-3-sonar-small-32k-online': {
+    'llama-3.1-sonar-small-128k-online': {
       inputTokensCost: 0.2,
       outputTokensCost: 0.2,
       requestsCost: 5,
     },
-    'llama-3-sonar-large-32k-chat': { inputTokensCost: 1, outputTokensCost: 1 },
-    'llama-3-sonar-large-32k-online': {
+    'llama-3.1-sonar-large-128k-online': {
       inputTokensCost: 1,
       outputTokensCost: 1,
       requestsCost: 5,
     },
-    'llama-3-8b-instruct': { inputTokensCost: 0.2, outputTokensCost: 0.2 },
-    'llama-3-70b-instruct': { inputTokensCost: 1, outputTokensCost: 1 },
-    'mixtral-8x7b-instruct': { inputTokensCost: 0.6, outputTokensCost: 0.6 },
+    'llama-3.1-sonar-huge-128k-online': {
+      inputTokensCost: 5,
+      outputTokensCost: 5,
+      requestsCost: 5,
+    },
+    'llama-3.1-sonar-small-128k-chat': { inputTokensCost: 0.2, outputTokensCost: 0.2 },
+    'llama-3.1-sonar-large-128k-chat': { inputTokensCost: 1, outputTokensCost: 1 },
+    'llama-3.1-8b-instruct': { inputTokensCost: 0.2, outputTokensCost: 0.2 },
+    'llama-3.1-70b-instruct': { inputTokensCost: 1, outputTokensCost: 1 },
   },
 
   modelAliases: {
-    'small': 'llama-3-sonar-small-32k-chat',
-    'large': 'llama-3-sonar-large-32k-chat',
-    'online': 'llama-3-sonar-large-32k-online',
-    'llama-3': 'llama-3-70b-instruct',
-    'mixtral': 'mixtral-8x7b-instruct',
+    small: 'llama-3.1-sonar-small-128k-online',
+    large: 'llama-3.1-sonar-large-128k-online',
+    huge: 'llama-3.1-sonar-huge-128k-online',
   },
 
   getChatCompletion: async (config: ProviderConfig, messages: Message[]) => {
diff --git a/website/docs/getting-started.md b/website/docs/getting-started.md
@@ -68,30 +68,29 @@ CLI options are passed when invoking the `ai` commend:
 
 You should have a relevant API key in your `~/.airc.json` file.
 
-### Models
+### Current Models
 
 <Tabs groupId="provider">
 <TabItem value="openAi" label="Open AI">
 
-| Model           | Alias     | Price: in \| out \* | Notes   |
-| --------------- | --------- | ------------------- | ------- |
-| `gpt-4o`        |           | $5 \| $15           | Default |
-| `gpt-4-turbo`   |           | $10 \| $30          |         |
-| `gpt-4`         |           | $30 \| $60          |         |
-| `gpt-3.5-turbo` | `gpt-3.5` | $0.5 \| $1.5        |         |
+| Model               | Alias | Price: in \| out \* | Notes   |
+| ------------------- | ----- | ------------------- | ------- |
+| `gpt-4o-2024-08-06` |       | $2.5 \| $10         | Default |
+| `gpt-4o`            |       | $5 \| $15           |         |
+| `gpt-4o-mini`       |       | $0.15 \| $0.6       |         |
 
 \* API prices per 1 million input/output tokens
 
-More info: [OpenAI docs](https://platform.openai.com/docs/models)
+More models & info: [OpenAI docs](https://platform.openai.com/docs/models)
 
 </TabItem>
 <TabItem value="anthropic" label="Anthropic">
 
-| Model                      | Alias    | Price: in \| out \* | Notes   |
-| -------------------------- | -------- | ------------------- | ------- |
-| `claude-3-opus-20240229`   | `opus`   | $15 \| $75          |         |
-| `claude-3-sonnet-20240229` | `sonnet` | $3 \| $15           | Default |
-| `claude-3-haiku-20240307`  | `haiku`  | $0.25 \| $1.25      |         |
+| Model                        | Alias    | Price: in \| out \* | Notes   |
+| ---------------------------- | -------- | ------------------- | ------- |
+| `claude-3-5-sonnet-20240620` | `sonnet` | $3 \| $15           | Default |
+| `claude-3-opus-20240229`     | `opus`   | $15 \| $75          |         |
+| `claude-3-haiku-20240307`    | `haiku`  | $0.25 \| $1.25      |         |
 
 \* API prices per 1 million input/output tokens
 
@@ -100,15 +99,15 @@ More info: [Anthropic docs](https://docs.anthropic.com/claude/docs/models-overvi
 </TabItem>
 <TabItem value="perplexity" label="Perplexity">
 
-| Model                            | Alias     | Price: in \| out \*         | Notes      |
-| -------------------------------- | --------- | --------------------------- | ---------- |
-| `llama-3-sonar-large-32k-chat`   | `large`   | $1 \| $1                    | Default    |
-| `llama-3-sonar-large-32k-online` | `online`  | $1 \| $1 \| request: $5     | Online\*\* |
-| `llama-3-sonar-small-32k-chat`   | `small`   | $0.2 \| $0.2                |            |
-| `llama-3-sonar-small-32k-online` |           | $0.2 \| $0.2 \| request: $5 | Online\*\* |
-| `llama-3-70b-instruct`           | `llama-3` | $1 \| $1                    |            |
-| `llama-3-8b-instruct`            |           | $0.2 \| $0.2                |            |
-| `mixtral-8x7b-instruct`          | `mixtral` | $0.6 \| $0.6                |            |
+| Model                               | Alias   | Price: in \| out \*          | Notes               |
+| ----------------------------------- | ------- | ---------------------------- | ------------------- |
+| `llama-3.1-sonar-huge-128k-online`  | `huge`  | $5 \| $5 \| requests: $5     | Default, Online\*\* |
+| `llama-3.1-sonar-large-128k-online` | `large` | $1 \| $1 \| requests: $5     | Online\*\*          |
+| `llama-3.1-sonar-small-128k-online` | `small` | $0.2 \| $0.2 \| requests: $5 | Online\*\*          |
+| `llama-3.1-sonar-large-128k-chat`   |         | $1 \| $1                     |                     |
+| `llama-3.1-sonar-small-128k-chat`   |         | $0.2 \| $0.2                 |                     |
+| `llama-3.1-70b-instruct`            |         | $1 \| $1                     |                     |
+| `llama-3.1-8b-instruct`             |         | $0.2 \| $0.2                 |                     |
 
 \* API prices per 1 million input/output tokens, per 1 thousands requests
 
@@ -119,14 +118,11 @@ More info: [Perplexity docs](https://docs.perplexity.ai/docs/model-cards)
 </TabItem>
 <TabItem value="mistral" label="Mistral">
 
-| Model                   | Alias     | Price: in \| out \* | Notes   |
-| ----------------------- | --------- | ------------------- | ------- |
-| `mistral-large-latest`  | `large`   | $4 \| $12           | Default |
-| `mistral-medium-latest` | `medium`  | $2.7 \| $8.1        |         |
-| `mistral-small-latest`  | `small`   | $1 \| $3            |         |
-| `open-mixtral-8x22b`    | `mixtral` | $2 \| $6            |         |
-| `open-mixtral-8x7b`     |           | $0.7 \| $0.7        |         |
-| `open-mistral-7b`       | `mistral` | $0.25 \| $0.25      |         |
+| Model                  | Alias       | Price: in \| out \* | Notes   |
+| ---------------------- | ----------- | ------------------- | ------- |
+| `mistral-large-latest` | `large`     | $3 \| $9            | Default |
+| `open-mistral-nemo`    | `nemo`      | $0.3 \| $0.3        |         |
+| `codestral-latest`     | `codestral` | $1 \| $3            |         |
 
 \* API prices per 1 million input/output tokens
 
diff --git a/yarn.lock b/yarn.lock

Original file line number	Diff line number	Diff line change
`@@ -6,4 +6,4 @@ nmHoistingLimits: workspaces`
`6`	`6`
`7`	`7`	`nodeLinker: node-modules`
`8`	`8`
`9`		`-yarnPath: .yarn/releases/yarn-4.2.1.cjs`
	`9`	`+yarnPath: .yarn/releases/yarn-4.4.0.cjs`