From 72823538506f7be4eb12b8ff0a15bb42e85f81db Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Sat, 28 Oct 2023 15:15:50 -0700
Subject: [PATCH 1/6] Starting point for AOAI + OAI

---
 azuremain.tsp                                 |   53 +
 common/models.tsp                             |    2 +-
 completions/models.tsp                        |   45 +-
 completions/operations.tsp                    |    2 +
 edits/models.tsp                              |    3 +-
 edits/operations.tsp                          |    2 +
 embeddings/models.tsp                         |    2 +-
 embeddings/operations.tsp                     |    2 +
 files/models.tsp                              |    2 +-
 files/operations.tsp                          |    2 +
 fine-tuning/models.tsp                        |    2 +-
 fine-tuning/operations.tsp                    |    2 +
 images/models.tsp                             |    2 +-
 images/operations.tsp                         |    2 +
 main.tsp                                      |    9 +-
 moderation/models.tsp                         |    2 +-
 moderation/operations.tsp                     |    2 +
 .../openapi3/openapi.AzureOpenAI.yaml         | 1118 +++++++++++++++++
 .../{openapi.yaml => openapi.OpenAI.yaml}     |  609 +++++----
 versions.tsp                                  |    9 +
 20 files changed, 1544 insertions(+), 328 deletions(-)
 create mode 100644 azuremain.tsp
 create mode 100644 tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
 rename tsp-output/@typespec/openapi3/{openapi.yaml => openapi.OpenAI.yaml} (91%)
 create mode 100644 versions.tsp

diff --git a/azuremain.tsp b/azuremain.tsp
new file mode 100644
index 000000000..943f7e796
--- /dev/null
+++ b/azuremain.tsp
@@ -0,0 +1,53 @@
+import "@typespec/http";
+import "@typespec/openapi3";
+import "@typespec/openapi";
+import "@typespec/versioning";
+
+import "./audio";
+import "./completions";
+import "./edits";
+import "./embeddings";
+import "./files";
+import "./fine-tuning";
+import "./images";
+import "./moderation";
+import "./versions.tsp";
+
+import "./main.tsp";
+
+using TypeSpec.Http;
+
+/** The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details. */
+@service({
+  title: "Azure OpenAI API",
+  termsOfService: "https://openai.com/policies/terms-of-use",
+  contact: {
+    name: "OpenAI Support",
+    url: "https://help.openai.com",
+  },
+  license: {
+    name: "MIT",
+    url: "https://github.com/openai/openai-openapi/blob/master/LICENSE",
+  },
+})
+@server("{base_url}/openai", "OpenAI Endpoint", {
+    base_url: url;
+})
+@useAuth(BearerAuth)
+@TypeSpec.Versioning.useDependency(ModelDefinitions.OpenAIFlavors.Azure)
+namespace AzureOpenAI {
+    @route("deployments/{deploymentId}/chat")
+    namespace Chat {
+        interface Completions {
+            @route("completions")
+            op createCompletion is OpenAI.Chat.Completions.createChatCompletion;
+
+            @route("extensions/completions")
+            op createCompletionOnYourOwnData(... ModelDefinitions.CreateChatCompletionRequest): ModelDefinitions.CreateChatCompletionResponse;
+        };
+    }
+    @route("deployments/{deploymentId}/completions")
+    namespace Completions {
+      op createCompletion is OpenAI.Completions.createCompletion;
+    }
+}
diff --git a/common/models.tsp b/common/models.tsp
index d6d0d4f91..52b4fc5c8 100644
--- a/common/models.tsp
+++ b/common/models.tsp
@@ -1,4 +1,4 @@
-namespace OpenAI;
+namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
 
 model ListModelsResponse {
diff --git a/completions/models.tsp b/completions/models.tsp
index 5aa332b32..df764c743 100644
--- a/completions/models.tsp
+++ b/completions/models.tsp
@@ -1,5 +1,11 @@
-namespace OpenAI;
+import "@typespec/versioning";
+
+import "../versions.tsp";
+
+namespace ModelDefinitions;
+
 using TypeSpec.OpenAPI;
+using TypeSpec.Versioning;
 
 alias CHAT_COMPLETION_MODELS =
   | "gpt4"
@@ -142,6 +148,9 @@ scalar N extends safeint;
 scalar MaxTokens extends safeint;
 
 model CreateChatCompletionRequest {
+  @added(ModelDefinitions.OpenAIFlavors.Azure)
+  deploymentId: string;
+
   /**
    * ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
    * table for details on which models work with the Chat API.
@@ -204,6 +213,9 @@ model ChatCompletionFunctions {
 model ChatCompletionFunctionParameters is Record<unknown>;
 
 model ChatCompletionRequestMessage {
+  @added(ModelDefinitions.OpenAIFlavors.Azure)
+  deploymentId: string;
+
   /** The role of the messages author. One of `system`, `user`, `assistant`, or `function`. */
   role: "system" | "user" | "assistant" | "function";
 
@@ -309,9 +321,16 @@ model ChatCompletionResponseMessage {
      */
     arguments: string;
   };
+
+  @added(OpenAIFlavors.Azure)
+  content_filter_results?: ContentFilterResults
+
 }
 
 model CreateCompletionRequest {
+  @added(OpenAIFlavors.Azure)
+  @TypeSpec.Http.path deploymentId: string;
+
   /**
    * ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to
    * see all of your available models, or see our [Model overview](/docs/models/overview) for
@@ -417,4 +436,28 @@ model CreateCompletionResponse {
   }[];
 
   usage?: CompletionUsage;
+
+  @added(OpenAIFlavors.Azure)
+  content_filter_results?: ContentFilterResults
+}
+
+@added(OpenAIFlavors.Azure)
+model ContentFilterResult {
+  severity: "safe" | "low" | "medium" | "high";
+  filtered: boolean;
 }
+
+@added(OpenAIFlavors.Azure)
+model ErrorBase {
+  code?: string;
+  message?: string;
+};
+
+@added(OpenAIFlavors.Azure)
+model ContentFilterResults {
+  sexual: ContentFilterResult;
+  violence: ContentFilterResult;
+  hate: ContentFilterResult;
+  self_harm: ContentFilterResult;
+  error: ErrorBase;
+}
\ No newline at end of file
diff --git a/completions/operations.tsp b/completions/operations.tsp
index d53245f7c..50f0f4af1 100644
--- a/completions/operations.tsp
+++ b/completions/operations.tsp
@@ -10,6 +10,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/chat")
 namespace Chat {
   @route("/completions")
diff --git a/edits/models.tsp b/edits/models.tsp
index d76372649..45ed4fac1 100644
--- a/edits/models.tsp
+++ b/edits/models.tsp
@@ -1,4 +1,5 @@
-namespace OpenAI;
+namespace ModelDefinitions;
+
 using TypeSpec.OpenAPI;
 
 model CreateEditRequest {
diff --git a/edits/operations.tsp b/edits/operations.tsp
index 08497364e..8418bfcaa 100644
--- a/edits/operations.tsp
+++ b/edits/operations.tsp
@@ -9,6 +9,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/edits")
 interface Edits {
   #deprecated "deprecated"
diff --git a/embeddings/models.tsp b/embeddings/models.tsp
index ab46275b2..027109362 100644
--- a/embeddings/models.tsp
+++ b/embeddings/models.tsp
@@ -1,6 +1,6 @@
 import "../common/models.tsp";
 
-namespace OpenAI;
+namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
 
 model CreateEmbeddingRequest {
diff --git a/embeddings/operations.tsp b/embeddings/operations.tsp
index 012d97c58..aad7b3853 100644
--- a/embeddings/operations.tsp
+++ b/embeddings/operations.tsp
@@ -9,6 +9,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/embeddings")
 interface Embeddings {
   @tag("OpenAI")
diff --git a/files/models.tsp b/files/models.tsp
index 990c1ea11..90329143f 100644
--- a/files/models.tsp
+++ b/files/models.tsp
@@ -1,4 +1,4 @@
-namespace OpenAI;
+namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
 
 model ListFilesResponse {
diff --git a/files/operations.tsp b/files/operations.tsp
index 2e601ae03..4392e3b49 100644
--- a/files/operations.tsp
+++ b/files/operations.tsp
@@ -9,6 +9,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/files")
 interface Files {
   @tag("OpenAI")
diff --git a/fine-tuning/models.tsp b/fine-tuning/models.tsp
index bf846072b..0ea21cd1e 100644
--- a/fine-tuning/models.tsp
+++ b/fine-tuning/models.tsp
@@ -1,4 +1,4 @@
-namespace OpenAI;
+namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
 
 model FineTuningJob {
diff --git a/fine-tuning/operations.tsp b/fine-tuning/operations.tsp
index 15491f62e..5424d4002 100644
--- a/fine-tuning/operations.tsp
+++ b/fine-tuning/operations.tsp
@@ -9,6 +9,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/fine_tuning")
 namespace FineTuning {
   @route("jobs")
diff --git a/images/models.tsp b/images/models.tsp
index 3d7020b51..4b8f19da7 100644
--- a/images/models.tsp
+++ b/images/models.tsp
@@ -1,6 +1,6 @@
 import "../common/models.tsp";
 
-namespace OpenAI;
+namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
 
 alias SharedImageProperties = {
diff --git a/images/operations.tsp b/images/operations.tsp
index 09203262b..35a57a7c5 100644
--- a/images/operations.tsp
+++ b/images/operations.tsp
@@ -9,6 +9,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/images")
 interface Images {
   @route("generations")
diff --git a/main.tsp b/main.tsp
index 2ea8cbbc3..52b34bed1 100644
--- a/main.tsp
+++ b/main.tsp
@@ -1,6 +1,7 @@
 import "@typespec/http";
 import "@typespec/openapi3";
 import "@typespec/openapi";
+import "@typespec/versioning";
 
 import "./audio";
 import "./completions";
@@ -10,8 +11,11 @@ import "./files";
 import "./fine-tuning";
 import "./images";
 import "./moderation";
+import "./versions.tsp";
 
 using TypeSpec.Http;
+using TypeSpec.Versioning;
+
 
 /** The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details. */
 @service({
@@ -24,9 +28,10 @@ using TypeSpec.Http;
   license: {
     name: "MIT",
     url: "https://github.com/openai/openai-openapi/blob/master/LICENSE",
-  },
-  version: "2.0.0",
+  }
 })
 @server("https://api.openai.com/v1", "OpenAI Endpoint")
 @useAuth(BearerAuth)
+@useDependency(ModelDefinitions.OpenAIFlavors.OpenAI)
 namespace OpenAI;
+
diff --git a/moderation/models.tsp b/moderation/models.tsp
index f47b21be1..5069058e2 100644
--- a/moderation/models.tsp
+++ b/moderation/models.tsp
@@ -1,4 +1,4 @@
-namespace OpenAI;
+namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
 
 model CreateModerationRequest {
diff --git a/moderation/operations.tsp b/moderation/operations.tsp
index 5f29bc3be..d3992db2d 100644
--- a/moderation/operations.tsp
+++ b/moderation/operations.tsp
@@ -9,6 +9,8 @@ using TypeSpec.OpenAPI;
 
 namespace OpenAI;
 
+using ModelDefinitions;
+
 @route("/moderations")
 interface Moderations {
   @operationId("createModeration")
diff --git a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
new file mode 100644
index 000000000..35e11f388
--- /dev/null
+++ b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
@@ -0,0 +1,1118 @@
+openapi: 3.0.0
+info:
+  title: Azure OpenAI API
+  version: 0000-00-00
+  description: The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details.
+tags:
+  - name: OpenAI
+paths:
+  /deployments/{deploymentId}/chat/completions:
+    post:
+      tags:
+        - OpenAI
+      operationId: createChatCompletion
+      parameters:
+        - $ref: '#/components/parameters/ModelDefinitions.CreateChatCompletionRequest.deploymentId'
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - model
+                - messages
+              properties:
+                model:
+                  anyOf:
+                    - type: string
+                    - type: string
+                      enum:
+                        - gpt4
+                        - gpt-4-0314
+                        - gpt-4-0613
+                        - gpt-4-32k
+                        - gpt-4-32k-0314
+                        - gpt-4-32k-0613
+                        - gpt-3.5-turbo
+                        - gpt-3.5-turbo-16k
+                        - gpt-3.5-turbo-0301
+                        - gpt-3.5-turbo-0613
+                        - gpt-3.5-turbo-16k-0613
+                  description: |-
+                    ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
+                    table for details on which models work with the Chat API.
+                  x-oaiTypeLabel: string
+                messages:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
+                  description: |-
+                    A list of messages comprising the conversation so far.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+                  minItems: 1
+                functions:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
+                  description: A list of functions the model may generate JSON inputs for.
+                  minItems: 1
+                  maxItems: 128
+                function_call:
+                  anyOf:
+                    - type: string
+                      enum:
+                        - none
+                        - auto
+                    - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
+                  description: |-
+                    Controls how the model responds to function calls. `none` means the model does not call a
+                    function, and responds to the end-user. `auto` means the model can pick between an end-user or
+                    calling a function.  Specifying a particular function via `{\"name":\ \"my_function\"}` forces the
+                    model to call that function. `none` is the default when no functions are present. `auto` is the
+                    default if functions are present.
+                temperature:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Temperature'
+                  nullable: true
+                  description: |-
+                    What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
+                    more random, while lower values like 0.2 will make it more focused and deterministic.
+
+                    We generally recommend altering this or `top_p` but not both.
+                  default: 1
+                top_p:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.TopP'
+                  nullable: true
+                  description: |-
+                    An alternative to sampling with temperature, called nucleus sampling, where the model considers
+                    the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
+                    the top 10% probability mass are considered.
+
+                    We generally recommend altering this or `temperature` but not both.
+                  default: 1
+                n:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.N'
+                  nullable: true
+                  description: |-
+                    How many completions to generate for each prompt.
+                    **Note:** Because this parameter generates many completions, it can quickly consume your token
+                    quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+                  default: 1
+                max_tokens:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
+                  nullable: true
+                  description: |-
+                    The maximum number of [tokens](/tokenizer) to generate in the completion.
+
+                    The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+                    for counting tokens.
+                  default: 16
+                stop:
+                  allOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Stop'
+                  description: Up to 4 sequences where the API will stop generating further tokens.
+                  default: null
+                presence_penalty:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+                  nullable: true
+                  description: |-
+                    Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
+                    in the text so far, increasing the model's likelihood to talk about new topics.
+
+                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+                frequency_penalty:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+                  nullable: true
+                  description: |-
+                    Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+                    frequency in the text so far, decreasing the model's likelihood to repeat the same line
+                    verbatim.
+
+                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+                logit_bias:
+                  type: object
+                  description: |-
+                    Modify the likelihood of specified tokens appearing in the completion.
+                    Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
+                    associated bias value from -100 to 100. Mathematically, the bias is added to the logits
+                    generated by the model prior to sampling. The exact effect will vary per model, but values
+                    between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
+                    should result in a ban or exclusive selection of the relevant token.
+                  additionalProperties:
+                    type: integer
+                    format: int64
+                  nullable: true
+                  x-oaiTypeLabel: map
+                user:
+                  allOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.User'
+                  description: |-
+                    A unique identifier representing your end-user, which can help OpenAI to monitor and detect
+                    abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
+                stream:
+                  type: boolean
+                  nullable: true
+                  description: |-
+                    If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
+                    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+                    as they become available, with the stream terminated by a `data: [DONE]` message.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
+                  default: true
+  /deployments/{deploymentId}/chat/extensions/completions:
+    post:
+      operationId: Completions_createCompletionOnYourOwnData
+      parameters:
+        - $ref: '#/components/parameters/ModelDefinitions.CreateChatCompletionRequest.deploymentId'
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - model
+                - messages
+              properties:
+                model:
+                  anyOf:
+                    - type: string
+                    - type: string
+                      enum:
+                        - gpt4
+                        - gpt-4-0314
+                        - gpt-4-0613
+                        - gpt-4-32k
+                        - gpt-4-32k-0314
+                        - gpt-4-32k-0613
+                        - gpt-3.5-turbo
+                        - gpt-3.5-turbo-16k
+                        - gpt-3.5-turbo-0301
+                        - gpt-3.5-turbo-0613
+                        - gpt-3.5-turbo-16k-0613
+                  description: |-
+                    ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
+                    table for details on which models work with the Chat API.
+                  x-oaiTypeLabel: string
+                messages:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
+                  description: |-
+                    A list of messages comprising the conversation so far.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+                  minItems: 1
+                functions:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
+                  description: A list of functions the model may generate JSON inputs for.
+                  minItems: 1
+                  maxItems: 128
+                function_call:
+                  anyOf:
+                    - type: string
+                      enum:
+                        - none
+                        - auto
+                    - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
+                  description: |-
+                    Controls how the model responds to function calls. `none` means the model does not call a
+                    function, and responds to the end-user. `auto` means the model can pick between an end-user or
+                    calling a function.  Specifying a particular function via `{\"name":\ \"my_function\"}` forces the
+                    model to call that function. `none` is the default when no functions are present. `auto` is the
+                    default if functions are present.
+                temperature:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Temperature'
+                  nullable: true
+                  description: |-
+                    What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
+                    more random, while lower values like 0.2 will make it more focused and deterministic.
+
+                    We generally recommend altering this or `top_p` but not both.
+                  default: 1
+                top_p:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.TopP'
+                  nullable: true
+                  description: |-
+                    An alternative to sampling with temperature, called nucleus sampling, where the model considers
+                    the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
+                    the top 10% probability mass are considered.
+
+                    We generally recommend altering this or `temperature` but not both.
+                  default: 1
+                n:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.N'
+                  nullable: true
+                  description: |-
+                    How many completions to generate for each prompt.
+                    **Note:** Because this parameter generates many completions, it can quickly consume your token
+                    quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+                  default: 1
+                max_tokens:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
+                  nullable: true
+                  description: |-
+                    The maximum number of [tokens](/tokenizer) to generate in the completion.
+
+                    The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+                    for counting tokens.
+                  default: 16
+                stop:
+                  allOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Stop'
+                  description: Up to 4 sequences where the API will stop generating further tokens.
+                  default: null
+                presence_penalty:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+                  nullable: true
+                  description: |-
+                    Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
+                    in the text so far, increasing the model's likelihood to talk about new topics.
+
+                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+                frequency_penalty:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+                  nullable: true
+                  description: |-
+                    Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+                    frequency in the text so far, decreasing the model's likelihood to repeat the same line
+                    verbatim.
+
+                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+                logit_bias:
+                  type: object
+                  description: |-
+                    Modify the likelihood of specified tokens appearing in the completion.
+                    Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
+                    associated bias value from -100 to 100. Mathematically, the bias is added to the logits
+                    generated by the model prior to sampling. The exact effect will vary per model, but values
+                    between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
+                    should result in a ban or exclusive selection of the relevant token.
+                  additionalProperties:
+                    type: integer
+                    format: int64
+                  nullable: true
+                  x-oaiTypeLabel: map
+                user:
+                  allOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.User'
+                  description: |-
+                    A unique identifier representing your end-user, which can help OpenAI to monitor and detect
+                    abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
+                stream:
+                  type: boolean
+                  nullable: true
+                  description: |-
+                    If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
+                    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+                    as they become available, with the stream terminated by a `data: [DONE]` message.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
+                  default: true
+  /deployments/{deploymentId}/completions:
+    post:
+      tags:
+        - OpenAI
+      operationId: createCompletion
+      parameters:
+        - $ref: '#/components/parameters/ModelDefinitions.CreateCompletionRequest.deploymentId'
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateCompletionResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateCompletionRequest'
+      x-oaiMeta:
+        name: Create chat completion
+        group: chat
+        returns: |-
+          Returns a [chat completion](/docs/api-reference/chat/object) object, or a streamed sequence of
+          [chat completion chunk](/docs/api-reference/chat/streaming) objects if the request is streamed.
+        path: create
+        examples:
+          - title: No streaming
+            request:
+              curl: |-
+                curl https://api.openai.com/v1/chat/completions \
+                -H "Content-Type: application/json" \
+                -H "Authorization: Bearer $OPENAI_API_KEY" \
+                -d '{
+                  "model": "VAR_model_id",
+                  "messages": [
+                    {
+                      "role": "system",
+                      "content": "You are a helpful assistant."
+                    },
+                    {
+                      "role": "user",
+                      "content": "Hello!"
+                    }
+                  ]
+              python: |-
+                import os
+                import openai
+                openai.api_key = os.getenv("OPENAI_API_KEY")
+
+                completion = openai.ChatCompletion.create(
+                  model="VAR_model_id",
+                  messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "Hello!"}
+                  ]
+                )
+
+                print(completion.choices[0].message)
+              node.js: |-
+                import OpenAI from "openai";
+
+                const openai = new OpenAI();
+
+                async function main() {
+                  const completion = await openai.chat.completions.create({
+                    messages: [{ role: "system", content: "string" }],
+                    model: "VAR_model_id",
+                  });
+
+                  console.log(completion.choices[0]);
+                }
+
+                main();
+            response: |-
+              {
+                "id": "chatcmpl-123",
+                "object": "chat.completion",
+                "created": 1677652288,
+                "model": "gpt-3.5-turbo-0613",
+                "choices": [{
+                  "index": 0,
+                  "message": {
+                    "role": "assistant",
+                    "content": "
+
+              Hello there, how may I assist you today?",
+                  },
+                  "finish_reason": "stop"
+                }],
+                "usage": {
+                  "prompt_tokens": 9,
+                  "completion_tokens": 12,
+                  "total_tokens": 21
+                }
+              }
+          - title: Streaming
+            request:
+              curl: |-
+                curl https://api.openai.com/v1/chat/completions \
+                -H "Content-Type: application/json" \
+                -H "Authorization: Bearer $OPENAI_API_KEY" \
+                -d '{
+                  "model": "VAR_model_id",
+                  "messages": [
+                    {
+                      "role": "system",
+                      "content": "You are a helpful assistant."
+                    },
+                    {
+                      "role": "user",
+                      "content": "Hello!"
+                    }
+                  ],
+                  "stream": true
+                }'
+              python: |-
+                import os
+                import openai
+                openai.api_key = os.getenv("OPENAI_API_KEY")
+
+                completion = openai.ChatCompletion.create(
+                  model="VAR_model_id",
+                  messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "Hello!"}
+                  ],
+                  stream=True
+                )
+
+                for chunk in completion:
+                  print(chunk.choices[0].delta)
+              node.js: |-
+                import OpenAI from "openai";
+
+                const openai = new OpenAI();
+
+                async function main() {
+                  const completion = await openai.chat.completions.create({
+                    model: "VAR_model_id",
+                    messages: [
+                      {"role": "system", "content": "You are a helpful assistant."},
+                      {"role": "user", "content": "Hello!"}
+                    ],
+                    stream: true,
+                  });
+
+                  for await (const chunk of completion) {
+                    console.log(chunk.choices[0].delta.content);
+                  }
+                }
+
+                main();
+            response: |-
+              {
+                "id": "chatcmpl-123",
+                "object": "chat.completion.chunk",
+                "created": 1677652288,
+                "model": "gpt-3.5-turbo",
+                "choices": [{
+                  "index": 0,
+                  "delta": {
+                    "content": "Hello",
+                  },
+                  "finish_reason": "stop"
+                }]
+              }
+security:
+  - BearerAuth: []
+components:
+  parameters:
+    ModelDefinitions.CreateChatCompletionRequest.deploymentId:
+      name: deploymentId
+      in: path
+      required: true
+      schema:
+        type: string
+    ModelDefinitions.CreateCompletionRequest.deploymentId:
+      name: deploymentId
+      in: path
+      required: true
+      schema:
+        type: string
+  schemas:
+    ModelDefinitions.ChatCompletionFunctionCallOption:
+      type: object
+      required:
+        - name
+      properties:
+        name:
+          type: string
+          description: The name of the function to call.
+    ModelDefinitions.ChatCompletionFunctionParameters:
+      type: object
+      additionalProperties: {}
+    ModelDefinitions.ChatCompletionFunctions:
+      type: object
+      required:
+        - name
+        - parameters
+      properties:
+        name:
+          type: string
+          description: |-
+            The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and
+            dashes, with a maximum length of 64.
+        description:
+          type: string
+          description: |-
+            A description of what the function does, used by the model to choose when and how to call the
+            function.
+        parameters:
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionParameters'
+          description: |-
+            The parameters the functions accepts, described as a JSON Schema object. See the
+            [guide](/docs/guides/gpt/function-calling) for examples, and the
+            [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation
+            about the format.\n\nTo describe a function that accepts no parameters, provide the value
+            `{\"type\": \"object\", \"properties\": {}}`.
+    ModelDefinitions.ChatCompletionRequestMessage:
+      type: object
+      required:
+        - deploymentId
+        - role
+        - content
+      properties:
+        deploymentId:
+          type: string
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - function
+          description: The role of the messages author. One of `system`, `user`, `assistant`, or `function`.
+        content:
+          type: string
+          nullable: true
+          description: |-
+            The contents of the message. `content` is required for all messages, and may be null for
+            assistant messages with function calls.
+        name:
+          type: string
+          description: |-
+            The name of the author of this message. `name` is required if role is `function`, and it
+            should be the name of the function whose response is in the `content`. May contain a-z,
+            A-Z, 0-9, and underscores, with a maximum length of 64 characters.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called, as generated by the model.
+          required:
+            - name
+            - arguments
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+            arguments:
+              type: string
+              description: |-
+                The arguments to call the function with, as generated by the model in JSON format. Note that
+                the model does not always generate valid JSON, and may hallucinate parameters not defined by
+                your function schema. Validate the arguments in your code before calling your function.
+    ModelDefinitions.ChatCompletionResponseMessage:
+      type: object
+      required:
+        - role
+        - content
+      properties:
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - function
+          description: The role of the author of this message.
+        content:
+          type: string
+          nullable: true
+          description: The contents of the message.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called, as generated by the model.
+          required:
+            - name
+            - arguments
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+            arguments:
+              type: string
+              description: |-
+                The arguments to call the function with, as generated by the model in JSON format. Note that
+                the model does not always generate valid JSON, and may hallucinate parameters not defined by
+                your function schema. Validate the arguments in your code before calling your function.
+        content_filter_results:
+          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResults'
+    ModelDefinitions.CompletionUsage:
+      type: object
+      description: Usage statistics for the completion request.
+      required:
+        - prompt_tokens
+        - completion_tokens
+        - total_tokens
+      properties:
+        prompt_tokens:
+          type: integer
+          format: int64
+          description: Number of tokens in the prompt.
+        completion_tokens:
+          type: integer
+          format: int64
+          description: Number of tokens in the generated completion
+        total_tokens:
+          type: integer
+          format: int64
+          description: Total number of tokens used in the request (prompt + completion).
+    ModelDefinitions.ContentFilterResult:
+      type: object
+      required:
+        - severity
+        - filtered
+      properties:
+        severity:
+          type: string
+          enum:
+            - safe
+            - low
+            - medium
+            - high
+        filtered:
+          type: boolean
+    ModelDefinitions.ContentFilterResults:
+      type: object
+      required:
+        - sexual
+        - violence
+        - hate
+        - self_harm
+        - error
+      properties:
+        sexual:
+          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+        violence:
+          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+        hate:
+          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+        self_harm:
+          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+        error:
+          $ref: '#/components/schemas/ModelDefinitions.ErrorBase'
+    ModelDefinitions.CreateChatCompletionResponse:
+      type: object
+      description: Represents a chat completion response returned by model, based on the provided input.
+      required:
+        - id
+        - object
+        - created
+        - model
+        - choices
+      properties:
+        id:
+          type: string
+          description: A unique identifier for the chat completion.
+        object:
+          type: string
+          description: The object type, which is always `chat.completion`.
+        created:
+          type: integer
+          format: unixtime
+          description: The Unix timestamp (in seconds) of when the chat completion was created.
+        model:
+          type: string
+          description: The model used for the chat completion.
+        choices:
+          type: array
+          items:
+            type: object
+            required:
+              - index
+              - message
+              - finish_reason
+            properties:
+              index:
+                type: integer
+                format: int64
+                description: The index of the choice in the list of choices.
+              message:
+                $ref: '#/components/schemas/ModelDefinitions.ChatCompletionResponseMessage'
+              finish_reason:
+                type: string
+                enum:
+                  - stop
+                  - length
+                  - function_call
+                  - content_filter
+                description: |-
+                  The reason the model stopped generating tokens. This will be `stop` if the model hit a
+                  natural stop point or a provided stop sequence, `length` if the maximum number of tokens
+                  specified in the request was reached, `content_filter` if the content was omitted due to
+                  a flag from our content filters, or `function_call` if the model called a function.
+          description: A list of chat completion choices. Can be more than one if `n` is greater than 1.
+        usage:
+          $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
+      x-oaiMeta:
+        name: The chat completion object
+        group: chat
+        example: ''
+    ModelDefinitions.CreateCompletionRequest:
+      type: object
+      required:
+        - model
+        - prompt
+      properties:
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - babbage-002
+                - davinci-002
+                - text-davinci-003
+                - text-davinci-002
+                - text-davinci-001
+                - code-davinci-002
+                - text-curie-001
+                - text-babbage-001
+                - text-ada-001
+          description: |-
+            ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to
+            see all of your available models, or see our [Model overview](/docs/models/overview) for
+            descriptions of them.
+          x-oaiTypeLabel: string
+        prompt:
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Prompt'
+          description: |-
+            The prompt(s) to generate completions for, encoded as a string, array of strings, array of
+            tokens, or array of token arrays.
+
+            Note that <|endoftext|> is the document separator that the model sees during training, so if a
+            prompt is not specified the model will generate as if from the beginning of a new document.
+          default: <|endoftext|>
+        suffix:
+          type: string
+          nullable: true
+          description: The suffix that comes after a completion of inserted text.
+          default: null
+        temperature:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Temperature'
+          nullable: true
+          description: |-
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
+            more random, while lower values like 0.2 will make it more focused and deterministic.
+
+            We generally recommend altering this or `top_p` but not both.
+          default: 1
+        top_p:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.TopP'
+          nullable: true
+          description: |-
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers
+            the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
+            the top 10% probability mass are considered.
+
+            We generally recommend altering this or `temperature` but not both.
+          default: 1
+        n:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.N'
+          nullable: true
+          description: |-
+            How many completions to generate for each prompt.
+            **Note:** Because this parameter generates many completions, it can quickly consume your token
+            quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+          default: 1
+        max_tokens:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
+          nullable: true
+          description: |-
+            The maximum number of [tokens](/tokenizer) to generate in the completion.
+
+            The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+            for counting tokens.
+          default: 16
+        stop:
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Stop'
+          description: Up to 4 sequences where the API will stop generating further tokens.
+          default: null
+        presence_penalty:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+          nullable: true
+          description: |-
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
+            in the text so far, increasing the model's likelihood to talk about new topics.
+
+            [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+        frequency_penalty:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+          nullable: true
+          description: |-
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+            frequency in the text so far, decreasing the model's likelihood to repeat the same line
+            verbatim.
+
+            [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+        logit_bias:
+          type: object
+          description: |-
+            Modify the likelihood of specified tokens appearing in the completion.
+            Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
+            associated bias value from -100 to 100. Mathematically, the bias is added to the logits
+            generated by the model prior to sampling. The exact effect will vary per model, but values
+            between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
+            should result in a ban or exclusive selection of the relevant token.
+          additionalProperties:
+            type: integer
+            format: int64
+          nullable: true
+          x-oaiTypeLabel: map
+        user:
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.User'
+          description: |-
+            A unique identifier representing your end-user, which can help OpenAI to monitor and detect
+            abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
+        stream:
+          type: boolean
+          nullable: true
+          description: |-
+            If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
+            [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+            as they become available, with the stream terminated by a `data: [DONE]` message.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
+          default: true
+        logprobs:
+          type: integer
+          format: int64
+          nullable: true
+          description: |-
+            Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens.
+            For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The
+            API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1`
+            elements in the response.
+
+            The maximum value for `logprobs` is 5.
+          default: null
+        echo:
+          type: boolean
+          nullable: true
+          description: Echo back the prompt in addition to the completion
+          default: false
+        best_of:
+          type: integer
+          format: int64
+          nullable: true
+          description: |-
+            Generates `best_of` completions server-side and returns the "best" (the one with the highest
+            log probability per token). Results cannot be streamed.
+
+            When used with `n`, `best_of` controls the number of candidate completions and `n` specifies
+            how many to return – `best_of` must be greater than `n`.
+
+            **Note:** Because this parameter generates many completions, it can quickly consume your token
+            quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+          default: 1
+    ModelDefinitions.CreateCompletionResponse:
+      type: object
+      description: |-
+        Represents a completion response from the API. Note: both the streamed and non-streamed response
+        objects share the same shape (unlike the chat endpoint).
+      required:
+        - id
+        - object
+        - created
+        - model
+        - choices
+      properties:
+        id:
+          type: string
+          description: A unique identifier for the completion.
+        object:
+          type: string
+          description: The object type, which is always `text_completion`.
+        created:
+          type: integer
+          format: unixtime
+          description: The Unix timestamp (in seconds) of when the completion was created.
+        model:
+          type: string
+          description: The model used for the completion.
+        choices:
+          type: array
+          items:
+            type: object
+            required:
+              - index
+              - text
+              - logprobs
+              - finish_reason
+            properties:
+              index:
+                type: integer
+                format: int64
+              text:
+                type: string
+              logprobs:
+                type: object
+                required:
+                  - tokens
+                  - token_logprobs
+                  - top_logprobs
+                  - text_offset
+                properties:
+                  tokens:
+                    type: array
+                    items:
+                      type: string
+                  token_logprobs:
+                    type: array
+                    items:
+                      type: number
+                      format: double
+                  top_logprobs:
+                    type: array
+                    items:
+                      type: object
+                      additionalProperties:
+                        type: integer
+                        format: int64
+                  text_offset:
+                    type: array
+                    items:
+                      type: integer
+                      format: int64
+                nullable: true
+              finish_reason:
+                type: string
+                enum:
+                  - stop
+                  - length
+                  - content_filter
+                description: |-
+                  The reason the model stopped generating tokens. This will be `stop` if the model hit a
+                  natural stop point or a provided stop sequence, or `content_filter` if content was omitted
+                  due to a flag from our content filters, `length` if the maximum number of tokens specified
+                  in the request was reached, or `content_filter` if content was omitted due to a flag from our
+                  content filters.
+          description: The list of completion choices the model generated for the input.
+        usage:
+          $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
+        content_filter_results:
+          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResults'
+      x-oaiMeta:
+        name: The  completion object
+        legacy: true
+        example: ''
+    ModelDefinitions.ErrorBase:
+      type: object
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+    ModelDefinitions.MaxTokens:
+      type: integer
+      format: int64
+      minimum: 0
+    ModelDefinitions.N:
+      type: integer
+      format: int64
+      minimum: 1
+      maximum: 128
+    ModelDefinitions.Penalty:
+      type: number
+      format: double
+      minimum: -2
+      maximum: 2
+    ModelDefinitions.Prompt:
+      oneOf:
+        - type: string
+        - type: array
+          items:
+            type: string
+        - $ref: '#/components/schemas/ModelDefinitions.TokenArray'
+        - $ref: '#/components/schemas/ModelDefinitions.TokenArrayArray'
+      nullable: true
+    ModelDefinitions.Stop:
+      oneOf:
+        - type: string
+        - $ref: '#/components/schemas/ModelDefinitions.StopSequences'
+      nullable: true
+    ModelDefinitions.StopSequences:
+      type: array
+      items:
+        type: string
+      minItems: 1
+      maxItems: 4
+    ModelDefinitions.Temperature:
+      type: number
+      format: double
+      minimum: 0
+      maximum: 2
+    ModelDefinitions.TokenArray:
+      type: array
+      items:
+        type: integer
+        format: int64
+      minItems: 1
+    ModelDefinitions.TokenArrayArray:
+      type: array
+      items:
+        $ref: '#/components/schemas/ModelDefinitions.TokenArray'
+      minItems: 1
+    ModelDefinitions.TopP:
+      type: number
+      format: double
+      minimum: 0
+      maximum: 1
+    ModelDefinitions.User:
+      type: string
+    OpenAI.Error:
+      type: object
+      required:
+        - type
+        - message
+        - param
+        - code
+      properties:
+        type:
+          type: string
+        message:
+          type: string
+        param:
+          type: string
+          nullable: true
+        code:
+          type: string
+          nullable: true
+    OpenAI.ErrorResponse:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          $ref: '#/components/schemas/OpenAI.Error'
+  securitySchemes:
+    BearerAuth:
+      type: http
+      scheme: bearer
+servers:
+  - url: '{base_url}/openai'
+    description: OpenAI Endpoint
+    variables:
+      base_url:
+        default: ''
diff --git a/tsp-output/@typespec/openapi3/openapi.yaml b/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
similarity index 91%
rename from tsp-output/@typespec/openapi3/openapi.yaml
rename to tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
index d37490680..c092d5ab0 100644
--- a/tsp-output/@typespec/openapi3/openapi.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
@@ -1,8 +1,7 @@
 openapi: 3.0.0
 info:
   title: OpenAI API
-  version: 2.0.0
-  description: The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details.
+  version: 0000-00-00
 tags:
   - name: OpenAI
 paths:
@@ -70,7 +69,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateChatCompletionResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionResponse'
         default:
           description: An unexpected error response.
           content:
@@ -82,7 +81,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateChatCompletionRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionRequest'
   /completions:
     post:
       tags:
@@ -95,7 +94,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateCompletionResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateCompletionResponse'
         default:
           description: An unexpected error response.
           content:
@@ -107,7 +106,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateCompletionRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateCompletionRequest'
       x-oaiMeta:
         name: Create chat completion
         group: chat
@@ -268,7 +267,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateEditResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateEditResponse'
         default:
           description: An unexpected error response.
           content:
@@ -280,7 +279,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateEditRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateEditRequest'
       deprecated: true
   /embeddings:
     post:
@@ -295,7 +294,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateEmbeddingResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateEmbeddingResponse'
         default:
           description: An unexpected error response.
           content:
@@ -307,7 +306,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateEmbeddingRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateEmbeddingRequest'
   /files:
     get:
       tags:
@@ -321,7 +320,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListFilesResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ListFilesResponse'
         default:
           description: An unexpected error response.
           content:
@@ -340,7 +339,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/OpenAIFile'
+                $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
         default:
           description: An unexpected error response.
           content:
@@ -352,7 +351,7 @@ paths:
         content:
           multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateFileRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateFileRequest'
   /files/files/{file_id}:
     post:
       tags:
@@ -372,7 +371,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/OpenAIFile'
+                $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
         default:
           description: An unexpected error response.
           content:
@@ -397,7 +396,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/DeleteFileResponse'
+                $ref: '#/components/schemas/ModelDefinitions.DeleteFileResponse'
         default:
           description: An unexpected error response.
           content:
@@ -448,7 +447,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FineTune'
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
         default:
           description: An unexpected error response.
           content:
@@ -460,7 +459,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateFineTuneRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateFineTuneRequest'
       deprecated: true
     get:
       tags:
@@ -474,7 +473,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListFineTunesResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTunesResponse'
         default:
           description: An unexpected error response.
           content:
@@ -504,7 +503,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FineTune'
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
         default:
           description: An unexpected error response.
           content:
@@ -531,7 +530,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FineTune'
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
         default:
           description: An unexpected error response.
           content:
@@ -572,7 +571,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListFineTuneEventsResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTuneEventsResponse'
         default:
           description: An unexpected error response.
           content:
@@ -599,7 +598,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FineTuningJob'
+                $ref: '#/components/schemas/ModelDefinitions.FineTuningJob'
         default:
           description: An unexpected error response.
           content:
@@ -611,7 +610,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateFineTuningJobRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateFineTuningJobRequest'
     get:
       tags:
         - OpenAI
@@ -637,7 +636,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListPaginatedFineTuningJobsResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ListPaginatedFineTuningJobsResponse'
         default:
           description: An unexpected error response.
           content:
@@ -665,7 +664,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FineTuningJob'
+                $ref: '#/components/schemas/ModelDefinitions.FineTuningJob'
         default:
           description: An unexpected error response.
           content:
@@ -691,7 +690,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FineTuningJob'
+                $ref: '#/components/schemas/ModelDefinitions.FineTuningJob'
         default:
           description: An unexpected error response.
           content:
@@ -730,7 +729,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListFineTuningJobEventsResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTuningJobEventsResponse'
         default:
           description: An unexpected error response.
           content:
@@ -750,7 +749,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ImagesResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ImagesResponse'
         default:
           description: An unexpected error response.
           content:
@@ -762,7 +761,7 @@ paths:
         content:
           multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateImageEditRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateImageEditRequest'
   /images/generations:
     post:
       tags:
@@ -776,7 +775,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ImagesResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ImagesResponse'
         default:
           description: An unexpected error response.
           content:
@@ -788,7 +787,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateImageRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateImageRequest'
   /images/variations:
     post:
       tags:
@@ -802,7 +801,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ImagesResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ImagesResponse'
         default:
           description: An unexpected error response.
           content:
@@ -814,7 +813,7 @@ paths:
         content:
           multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateImageVariationRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateImageVariationRequest'
   /models:
     get:
       tags:
@@ -830,7 +829,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListModelsResponse'
+                $ref: '#/components/schemas/ModelDefinitions.ListModelsResponse'
         default:
           description: An unexpected error response.
           content:
@@ -858,7 +857,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/Model'
+                $ref: '#/components/schemas/ModelDefinitions.Model'
         default:
           description: An unexpected error response.
           content:
@@ -883,7 +882,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/DeleteModelResponse'
+                $ref: '#/components/schemas/ModelDefinitions.DeleteModelResponse'
         default:
           description: An unexpected error response.
           content:
@@ -903,7 +902,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateModerationResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateModerationResponse'
         default:
           description: An unexpected error response.
           content:
@@ -915,12 +914,155 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateModerationRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateModerationRequest'
 security:
   - BearerAuth: []
 components:
   schemas:
-    ChatCompletionFunctionCallOption:
+    CreateTranscriptionRequest:
+      type: object
+      required:
+        - file
+        - model
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4,
+            mpeg, mpga, m4a, ogg, wav, or webm.
+          x-oaiTypeLabel: file
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - whisper-1
+          description: ID of the model to use. Only `whisper-1` is currently available.
+          x-oaiTypeLabel: string
+        prompt:
+          type: string
+          description: |-
+            An optional text to guide the model's style or continue a previous audio segment. The
+            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
+        response_format:
+          type: string
+          enum:
+            - json
+            - text
+            - srt
+            - verbose_json
+            - vtt
+          description: |-
+            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
+            vtt.
+          default: json
+        temperature:
+          type: number
+          format: double
+          description: |-
+            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+            automatically increase the temperature until certain thresholds are hit.
+          minimum: 0
+          maximum: 1
+          default: 0
+        language:
+          type: string
+          description: |-
+            The language of the input audio. Supplying the input language in
+            [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy
+            and latency.
+    CreateTranscriptionResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
+    CreateTranslationRequest:
+      type: object
+      required:
+        - file
+        - model
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            The audio file object (not file name) to translate, in one of these formats: flac, mp3, mp4,
+            mpeg, mpga, m4a, ogg, wav, or webm.
+          x-oaiTypeLabel: file
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - whisper-1
+          description: ID of the model to use. Only `whisper-1` is currently available.
+          x-oaiTypeLabel: string
+        prompt:
+          type: string
+          description: |-
+            An optional text to guide the model's style or continue a previous audio segment. The
+            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
+        response_format:
+          type: string
+          enum:
+            - json
+            - text
+            - srt
+            - verbose_json
+            - vtt
+          description: |-
+            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
+            vtt.
+          default: json
+        temperature:
+          type: number
+          format: double
+          description: |-
+            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+            automatically increase the temperature until certain thresholds are hit.
+          minimum: 0
+          maximum: 1
+          default: 0
+    CreateTranslationResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
+    Error:
+      type: object
+      required:
+        - type
+        - message
+        - param
+        - code
+      properties:
+        type:
+          type: string
+        message:
+          type: string
+        param:
+          type: string
+          nullable: true
+        code:
+          type: string
+          nullable: true
+    ErrorResponse:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          $ref: '#/components/schemas/Error'
+    ModelDefinitions.ChatCompletionFunctionCallOption:
       type: object
       required:
         - name
@@ -928,10 +1070,10 @@ components:
         name:
           type: string
           description: The name of the function to call.
-    ChatCompletionFunctionParameters:
+    ModelDefinitions.ChatCompletionFunctionParameters:
       type: object
       additionalProperties: {}
-    ChatCompletionFunctions:
+    ModelDefinitions.ChatCompletionFunctions:
       type: object
       required:
         - name
@@ -949,14 +1091,14 @@ components:
             function.
         parameters:
           allOf:
-            - $ref: '#/components/schemas/ChatCompletionFunctionParameters'
+            - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionParameters'
           description: |-
             The parameters the functions accepts, described as a JSON Schema object. See the
             [guide](/docs/guides/gpt/function-calling) for examples, and the
             [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation
             about the format.\n\nTo describe a function that accepts no parameters, provide the value
             `{\"type\": \"object\", \"properties\": {}}`.
-    ChatCompletionRequestMessage:
+    ModelDefinitions.ChatCompletionRequestMessage:
       type: object
       required:
         - role
@@ -998,7 +1140,7 @@ components:
                 The arguments to call the function with, as generated by the model in JSON format. Note that
                 the model does not always generate valid JSON, and may hallucinate parameters not defined by
                 your function schema. Validate the arguments in your code before calling your function.
-    ChatCompletionResponseMessage:
+    ModelDefinitions.ChatCompletionResponseMessage:
       type: object
       required:
         - role
@@ -1032,7 +1174,7 @@ components:
                 The arguments to call the function with, as generated by the model in JSON format. Note that
                 the model does not always generate valid JSON, and may hallucinate parameters not defined by
                 your function schema. Validate the arguments in your code before calling your function.
-    CompletionUsage:
+    ModelDefinitions.CompletionUsage:
       type: object
       description: Usage statistics for the completion request.
       required:
@@ -1052,7 +1194,7 @@ components:
           type: integer
           format: int64
           description: Total number of tokens used in the request (prompt + completion).
-    CreateChatCompletionRequest:
+    ModelDefinitions.CreateChatCompletionRequest:
       type: object
       required:
         - model
@@ -1081,7 +1223,7 @@ components:
         messages:
           type: array
           items:
-            $ref: '#/components/schemas/ChatCompletionRequestMessage'
+            $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
           description: |-
             A list of messages comprising the conversation so far.
             [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
@@ -1089,7 +1231,7 @@ components:
         functions:
           type: array
           items:
-            $ref: '#/components/schemas/ChatCompletionFunctions'
+            $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
           description: A list of functions the model may generate JSON inputs for.
           minItems: 1
           maxItems: 128
@@ -1099,7 +1241,7 @@ components:
               enum:
                 - none
                 - auto
-            - $ref: '#/components/schemas/ChatCompletionFunctionCallOption'
+            - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
           description: |-
             Controls how the model responds to function calls. `none` means the model does not call a
             function, and responds to the end-user. `auto` means the model can pick between an end-user or
@@ -1108,7 +1250,7 @@ components:
             default if functions are present.
         temperature:
           oneOf:
-            - $ref: '#/components/schemas/Temperature'
+            - $ref: '#/components/schemas/ModelDefinitions.Temperature'
           nullable: true
           description: |-
             What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
@@ -1118,7 +1260,7 @@ components:
           default: 1
         top_p:
           oneOf:
-            - $ref: '#/components/schemas/TopP'
+            - $ref: '#/components/schemas/ModelDefinitions.TopP'
           nullable: true
           description: |-
             An alternative to sampling with temperature, called nucleus sampling, where the model considers
@@ -1129,7 +1271,7 @@ components:
           default: 1
         n:
           oneOf:
-            - $ref: '#/components/schemas/N'
+            - $ref: '#/components/schemas/ModelDefinitions.N'
           nullable: true
           description: |-
             How many completions to generate for each prompt.
@@ -1138,7 +1280,7 @@ components:
           default: 1
         max_tokens:
           oneOf:
-            - $ref: '#/components/schemas/MaxTokens'
+            - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
           nullable: true
           description: |-
             The maximum number of [tokens](/tokenizer) to generate in the completion.
@@ -1149,12 +1291,12 @@ components:
           default: 16
         stop:
           allOf:
-            - $ref: '#/components/schemas/Stop'
+            - $ref: '#/components/schemas/ModelDefinitions.Stop'
           description: Up to 4 sequences where the API will stop generating further tokens.
           default: null
         presence_penalty:
           oneOf:
-            - $ref: '#/components/schemas/Penalty'
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
           nullable: true
           description: |-
             Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
@@ -1163,7 +1305,7 @@ components:
             [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
         frequency_penalty:
           oneOf:
-            - $ref: '#/components/schemas/Penalty'
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
           nullable: true
           description: |-
             Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
@@ -1187,7 +1329,7 @@ components:
           x-oaiTypeLabel: map
         user:
           allOf:
-            - $ref: '#/components/schemas/User'
+            - $ref: '#/components/schemas/ModelDefinitions.User'
           description: |-
             A unique identifier representing your end-user, which can help OpenAI to monitor and detect
             abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
@@ -1200,7 +1342,7 @@ components:
             as they become available, with the stream terminated by a `data: [DONE]` message.
             [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
           default: true
-    CreateChatCompletionResponse:
+    ModelDefinitions.CreateChatCompletionResponse:
       type: object
       description: Represents a chat completion response returned by model, based on the provided input.
       required:
@@ -1237,7 +1379,7 @@ components:
                 format: int64
                 description: The index of the choice in the list of choices.
               message:
-                $ref: '#/components/schemas/ChatCompletionResponseMessage'
+                $ref: '#/components/schemas/ModelDefinitions.ChatCompletionResponseMessage'
               finish_reason:
                 type: string
                 enum:
@@ -1252,12 +1394,12 @@ components:
                   a flag from our content filters, or `function_call` if the model called a function.
           description: A list of chat completion choices. Can be more than one if `n` is greater than 1.
         usage:
-          $ref: '#/components/schemas/CompletionUsage'
+          $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
       x-oaiMeta:
         name: The chat completion object
         group: chat
         example: ''
-    CreateCompletionRequest:
+    ModelDefinitions.CreateCompletionRequest:
       type: object
       required:
         - model
@@ -1284,7 +1426,7 @@ components:
           x-oaiTypeLabel: string
         prompt:
           allOf:
-            - $ref: '#/components/schemas/Prompt'
+            - $ref: '#/components/schemas/ModelDefinitions.Prompt'
           description: |-
             The prompt(s) to generate completions for, encoded as a string, array of strings, array of
             tokens, or array of token arrays.
@@ -1299,7 +1441,7 @@ components:
           default: null
         temperature:
           oneOf:
-            - $ref: '#/components/schemas/Temperature'
+            - $ref: '#/components/schemas/ModelDefinitions.Temperature'
           nullable: true
           description: |-
             What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
@@ -1309,7 +1451,7 @@ components:
           default: 1
         top_p:
           oneOf:
-            - $ref: '#/components/schemas/TopP'
+            - $ref: '#/components/schemas/ModelDefinitions.TopP'
           nullable: true
           description: |-
             An alternative to sampling with temperature, called nucleus sampling, where the model considers
@@ -1320,7 +1462,7 @@ components:
           default: 1
         n:
           oneOf:
-            - $ref: '#/components/schemas/N'
+            - $ref: '#/components/schemas/ModelDefinitions.N'
           nullable: true
           description: |-
             How many completions to generate for each prompt.
@@ -1329,7 +1471,7 @@ components:
           default: 1
         max_tokens:
           oneOf:
-            - $ref: '#/components/schemas/MaxTokens'
+            - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
           nullable: true
           description: |-
             The maximum number of [tokens](/tokenizer) to generate in the completion.
@@ -1340,12 +1482,12 @@ components:
           default: 16
         stop:
           allOf:
-            - $ref: '#/components/schemas/Stop'
+            - $ref: '#/components/schemas/ModelDefinitions.Stop'
           description: Up to 4 sequences where the API will stop generating further tokens.
           default: null
         presence_penalty:
           oneOf:
-            - $ref: '#/components/schemas/Penalty'
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
           nullable: true
           description: |-
             Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
@@ -1354,7 +1496,7 @@ components:
             [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
         frequency_penalty:
           oneOf:
-            - $ref: '#/components/schemas/Penalty'
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
           nullable: true
           description: |-
             Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
@@ -1378,7 +1520,7 @@ components:
           x-oaiTypeLabel: map
         user:
           allOf:
-            - $ref: '#/components/schemas/User'
+            - $ref: '#/components/schemas/ModelDefinitions.User'
           description: |-
             A unique identifier representing your end-user, which can help OpenAI to monitor and detect
             abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
@@ -1422,7 +1564,7 @@ components:
             **Note:** Because this parameter generates many completions, it can quickly consume your token
             quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
           default: 1
-    CreateCompletionResponse:
+    ModelDefinitions.CreateCompletionResponse:
       type: object
       description: |-
         Represents a completion response from the API. Note: both the streamed and non-streamed response
@@ -1506,12 +1648,12 @@ components:
                   content filters.
           description: The list of completion choices the model generated for the input.
         usage:
-          $ref: '#/components/schemas/CompletionUsage'
+          $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
       x-oaiMeta:
         name: The  completion object
         legacy: true
         example: ''
-    CreateEditRequest:
+    ModelDefinitions.CreateEditRequest:
       type: object
       required:
         - model
@@ -1538,13 +1680,13 @@ components:
           description: The instruction that tells the model how to edit the prompt.
         n:
           oneOf:
-            - $ref: '#/components/schemas/EditN'
+            - $ref: '#/components/schemas/ModelDefinitions.EditN'
           nullable: true
           description: How many edits to generate for the input and instruction.
           default: 1
         temperature:
           oneOf:
-            - $ref: '#/components/schemas/Temperature'
+            - $ref: '#/components/schemas/ModelDefinitions.Temperature'
           nullable: true
           description: |-
             What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
@@ -1554,7 +1696,7 @@ components:
           default: 1
         top_p:
           oneOf:
-            - $ref: '#/components/schemas/TopP'
+            - $ref: '#/components/schemas/ModelDefinitions.TopP'
           nullable: true
           description: |-
             An alternative to sampling with temperature, called nucleus sampling, where the model considers
@@ -1563,7 +1705,7 @@ components:
 
             We generally recommend altering this or `temperature` but not both.
           default: 1
-    CreateEditResponse:
+    ModelDefinitions.CreateEditResponse:
       type: object
       required:
         - object
@@ -1607,8 +1749,8 @@ components:
                   specified in the request was reached.
           description: 'description: A list of edit choices. Can be more than one if `n` is greater than 1.'
         usage:
-          $ref: '#/components/schemas/CompletionUsage'
-    CreateEmbeddingRequest:
+          $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
+    ModelDefinitions.CreateEmbeddingRequest:
       type: object
       required:
         - model
@@ -1628,8 +1770,8 @@ components:
             - type: array
               items:
                 type: string
-            - $ref: '#/components/schemas/TokenArray'
-            - $ref: '#/components/schemas/TokenArrayArray'
+            - $ref: '#/components/schemas/ModelDefinitions.TokenArray'
+            - $ref: '#/components/schemas/ModelDefinitions.TokenArrayArray'
           description: |-
             Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a
             single request, pass an array of strings or array of token arrays. Each input must not exceed
@@ -1637,8 +1779,8 @@ components:
             [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
             for counting tokens.
         user:
-          $ref: '#/components/schemas/User'
-    CreateEmbeddingResponse:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+    ModelDefinitions.CreateEmbeddingResponse:
       type: object
       required:
         - object
@@ -1657,7 +1799,7 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/Embedding'
+            $ref: '#/components/schemas/ModelDefinitions.Embedding'
           description: The list of embeddings generated by the model.
         usage:
           type: object
@@ -1674,7 +1816,7 @@ components:
               type: integer
               format: int64
               description: The total number of tokens used by the request.
-    CreateFileRequest:
+    ModelDefinitions.CreateFileRequest:
       type: object
       required:
         - file
@@ -1693,7 +1835,7 @@ components:
             The intended purpose of the uploaded documents. Use "fine-tune" for
             [fine-tuning](/docs/api-reference/fine-tuning). This allows us to validate the format of the
             uploaded file.
-    CreateFineTuneRequest:
+    ModelDefinitions.CreateFineTuneRequest:
       type: object
       required:
         - training_file
@@ -1834,7 +1976,7 @@ components:
           default: null
         suffix:
           oneOf:
-            - $ref: '#/components/schemas/SuffixString'
+            - $ref: '#/components/schemas/ModelDefinitions.SuffixString'
           nullable: true
           description: |-
             A string of up to 18 characters that will be added to your fine-tuned model name.
@@ -1842,7 +1984,7 @@ components:
             For example, a `suffix` of "custom-model-name" would produce a model name like
             `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`.
           default: null
-    CreateFineTuningJobRequest:
+    ModelDefinitions.CreateFineTuningJobRequest:
       type: object
       required:
         - training_file
@@ -1894,14 +2036,14 @@ components:
                 - type: string
                   enum:
                     - auto
-                - $ref: '#/components/schemas/NEpochs'
+                - $ref: '#/components/schemas/ModelDefinitions.NEpochs'
               description: |-
                 The number of epochs to train the model for. An epoch refers to one full cycle through the
                 training dataset.
               default: auto
         suffix:
           oneOf:
-            - $ref: '#/components/schemas/SuffixString'
+            - $ref: '#/components/schemas/ModelDefinitions.SuffixString'
           nullable: true
           description: |-
             A string of up to 18 characters that will be added to your fine-tuned model name.
@@ -1909,7 +2051,7 @@ components:
             For example, a `suffix` of "custom-model-name" would produce a model name like
             `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
           default: null
-    CreateImageEditRequest:
+    ModelDefinitions.CreateImageEditRequest:
       type: object
       required:
         - prompt
@@ -1933,7 +2075,7 @@ components:
             as `image`.
         n:
           oneOf:
-            - $ref: '#/components/schemas/ImagesN'
+            - $ref: '#/components/schemas/ModelDefinitions.ImagesN'
           nullable: true
           description: The number of images to generate. Must be between 1 and 10.
           default: 1
@@ -1955,8 +2097,8 @@ components:
           description: The format in which the generated images are returned. Must be one of `url` or `b64_json`.
           default: url
         user:
-          $ref: '#/components/schemas/User'
-    CreateImageRequest:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+    ModelDefinitions.CreateImageRequest:
       type: object
       required:
         - prompt
@@ -1966,7 +2108,7 @@ components:
           description: A text description of the desired image(s). The maximum length is 1000 characters.
         n:
           oneOf:
-            - $ref: '#/components/schemas/ImagesN'
+            - $ref: '#/components/schemas/ModelDefinitions.ImagesN'
           nullable: true
           description: The number of images to generate. Must be between 1 and 10.
           default: 1
@@ -1988,8 +2130,8 @@ components:
           description: The format in which the generated images are returned. Must be one of `url` or `b64_json`.
           default: url
         user:
-          $ref: '#/components/schemas/User'
-    CreateImageVariationRequest:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+    ModelDefinitions.CreateImageVariationRequest:
       type: object
       required:
         - image
@@ -2002,7 +2144,7 @@ components:
             and square.
         n:
           oneOf:
-            - $ref: '#/components/schemas/ImagesN'
+            - $ref: '#/components/schemas/ModelDefinitions.ImagesN'
           nullable: true
           description: The number of images to generate. Must be between 1 and 10.
           default: 1
@@ -2024,8 +2166,8 @@ components:
           description: The format in which the generated images are returned. Must be one of `url` or `b64_json`.
           default: url
         user:
-          $ref: '#/components/schemas/User'
-    CreateModerationRequest:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+    ModelDefinitions.CreateModerationRequest:
       type: object
       required:
         - input
@@ -2052,7 +2194,7 @@ components:
             of `text-moderation-stable` may be slightly lower than for `text-moderation-latest`.
           x-oaiTypeLabel: string
           default: text-moderation-latest
-    CreateModerationResponse:
+    ModelDefinitions.CreateModerationResponse:
       type: object
       required:
         - id
@@ -2201,125 +2343,7 @@ components:
                     format: double
                     description: The score for the category 'violence/graphic'.
           description: A list of moderation objects.
-    CreateTranscriptionRequest:
-      type: object
-      required:
-        - file
-        - model
-      properties:
-        file:
-          type: string
-          format: binary
-          description: |-
-            The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4,
-            mpeg, mpga, m4a, ogg, wav, or webm.
-          x-oaiTypeLabel: file
-        model:
-          anyOf:
-            - type: string
-            - type: string
-              enum:
-                - whisper-1
-          description: ID of the model to use. Only `whisper-1` is currently available.
-          x-oaiTypeLabel: string
-        prompt:
-          type: string
-          description: |-
-            An optional text to guide the model's style or continue a previous audio segment. The
-            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
-        response_format:
-          type: string
-          enum:
-            - json
-            - text
-            - srt
-            - verbose_json
-            - vtt
-          description: |-
-            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
-            vtt.
-          default: json
-        temperature:
-          type: number
-          format: double
-          description: |-
-            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
-            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
-            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-            automatically increase the temperature until certain thresholds are hit.
-          minimum: 0
-          maximum: 1
-          default: 0
-        language:
-          type: string
-          description: |-
-            The language of the input audio. Supplying the input language in
-            [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy
-            and latency.
-    CreateTranscriptionResponse:
-      type: object
-      required:
-        - text
-      properties:
-        text:
-          type: string
-    CreateTranslationRequest:
-      type: object
-      required:
-        - file
-        - model
-      properties:
-        file:
-          type: string
-          format: binary
-          description: |-
-            The audio file object (not file name) to translate, in one of these formats: flac, mp3, mp4,
-            mpeg, mpga, m4a, ogg, wav, or webm.
-          x-oaiTypeLabel: file
-        model:
-          anyOf:
-            - type: string
-            - type: string
-              enum:
-                - whisper-1
-          description: ID of the model to use. Only `whisper-1` is currently available.
-          x-oaiTypeLabel: string
-        prompt:
-          type: string
-          description: |-
-            An optional text to guide the model's style or continue a previous audio segment. The
-            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
-        response_format:
-          type: string
-          enum:
-            - json
-            - text
-            - srt
-            - verbose_json
-            - vtt
-          description: |-
-            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
-            vtt.
-          default: json
-        temperature:
-          type: number
-          format: double
-          description: |-
-            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
-            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
-            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-            automatically increase the temperature until certain thresholds are hit.
-          minimum: 0
-          maximum: 1
-          default: 0
-    CreateTranslationResponse:
-      type: object
-      required:
-        - text
-      properties:
-        text:
-          type: string
-    DeleteFileResponse:
+    ModelDefinitions.DeleteFileResponse:
       type: object
       required:
         - id
@@ -2332,7 +2356,7 @@ components:
           type: string
         deleted:
           type: boolean
-    DeleteModelResponse:
+    ModelDefinitions.DeleteModelResponse:
       type: object
       required:
         - id
@@ -2345,12 +2369,12 @@ components:
           type: string
         deleted:
           type: boolean
-    EditN:
+    ModelDefinitions.EditN:
       type: integer
       format: int64
       minimum: 0
       maximum: 20
-    Embedding:
+    ModelDefinitions.Embedding:
       type: object
       description: Represents an embedding vector returned by embedding endpoint.
       required:
@@ -2375,32 +2399,7 @@ components:
           description: |-
             The embedding vector, which is a list of floats. The length of vector depends on the model as\
             listed in the [embedding guide](/docs/guides/embeddings).
-    Error:
-      type: object
-      required:
-        - type
-        - message
-        - param
-        - code
-      properties:
-        type:
-          type: string
-        message:
-          type: string
-        param:
-          type: string
-          nullable: true
-        code:
-          type: string
-          nullable: true
-    ErrorResponse:
-      type: object
-      required:
-        - error
-      properties:
-        error:
-          $ref: '#/components/schemas/Error'
-    FineTune:
+    ModelDefinitions.FineTune:
       type: object
       description: The `FineTune` object represents a legacy fine-tune job that has been created through the API.
       required:
@@ -2498,41 +2497,24 @@ components:
         training_files:
           type: array
           items:
-            $ref: '#/components/schemas/OpenAIFile'
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
           description: The list of files used for training.
         validation_files:
           type: array
           items:
-            $ref: '#/components/schemas/OpenAIFile'
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
           description: The list of files used for validation.
         result_files:
           type: array
           items:
-            $ref: '#/components/schemas/OpenAIFile'
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
           description: The compiled results files for the fine-tuning job.
         events:
           type: array
           items:
-            $ref: '#/components/schemas/FineTuneEvent'
+            $ref: '#/components/schemas/ModelDefinitions.FineTuneEvent'
           description: The list of events that have been observed in the lifecycle of the FineTune job.
-    FineTuneEvent:
-      type: object
-      required:
-        - object
-        - created_at
-        - level
-        - message
-      properties:
-        object:
-          type: string
-        created_at:
-          type: integer
-          format: unixtime
-        level:
-          type: string
-        message:
-          type: string
-    FineTuningEvent:
+    ModelDefinitions.FineTuneEvent:
       type: object
       required:
         - object
@@ -2549,16 +2531,7 @@ components:
           type: string
         message:
           type: string
-        data:
-          type: object
-          additionalProperties: {}
-          nullable: true
-        type:
-          type: string
-          enum:
-            - message
-            - metrics
-    FineTuningJob:
+    ModelDefinitions.FineTuningJob:
       type: object
       required:
         - id
@@ -2630,7 +2603,7 @@ components:
                 - type: string
                   enum:
                     - auto
-                - $ref: '#/components/schemas/NEpochs'
+                - $ref: '#/components/schemas/ModelDefinitions.NEpochs'
               description: |-
                 The number of epochs to train the model for. An epoch refers to one full cycle through the
                 training dataset.
@@ -2682,7 +2655,7 @@ components:
                 The parameter that was invalid, usually `training_file` or `validation_file`. This field
                 will be null if the failure was not parameter-specific.
           nullable: true
-    FineTuningJobEvent:
+    ModelDefinitions.FineTuningJobEvent:
       type: object
       required:
         - id
@@ -2706,7 +2679,7 @@ components:
             - error
         message:
           type: string
-    Image:
+    ModelDefinitions.Image:
       type: object
       description: Represents the url or the content of an image generated by the OpenAI API.
       properties:
@@ -2718,12 +2691,12 @@ components:
           type: string
           format: base64
           description: The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
-    ImagesN:
+    ModelDefinitions.ImagesN:
       type: integer
       format: int64
       minimum: 1
       maximum: 10
-    ImagesResponse:
+    ModelDefinitions.ImagesResponse:
       type: object
       required:
         - created
@@ -2735,8 +2708,8 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/Image'
-    ListFilesResponse:
+            $ref: '#/components/schemas/ModelDefinitions.Image'
+    ModelDefinitions.ListFilesResponse:
       type: object
       required:
         - object
@@ -2747,8 +2720,8 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/OpenAIFile'
-    ListFineTuneEventsResponse:
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+    ModelDefinitions.ListFineTuneEventsResponse:
       type: object
       required:
         - object
@@ -2759,8 +2732,8 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/FineTuneEvent'
-    ListFineTunesResponse:
+            $ref: '#/components/schemas/ModelDefinitions.FineTuneEvent'
+    ModelDefinitions.ListFineTunesResponse:
       type: object
       required:
         - object
@@ -2771,8 +2744,8 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/FineTune'
-    ListFineTuningJobEventsResponse:
+            $ref: '#/components/schemas/ModelDefinitions.FineTune'
+    ModelDefinitions.ListFineTuningJobEventsResponse:
       type: object
       required:
         - object
@@ -2783,8 +2756,8 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/FineTuningJobEvent'
-    ListModelsResponse:
+            $ref: '#/components/schemas/ModelDefinitions.FineTuningJobEvent'
+    ModelDefinitions.ListModelsResponse:
       type: object
       required:
         - object
@@ -2795,8 +2768,8 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/Model'
-    ListPaginatedFineTuningJobsResponse:
+            $ref: '#/components/schemas/ModelDefinitions.Model'
+    ModelDefinitions.ListPaginatedFineTuningJobsResponse:
       type: object
       required:
         - object
@@ -2808,14 +2781,14 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/FineTuningJob'
+            $ref: '#/components/schemas/ModelDefinitions.FineTuningJob'
         has_more:
           type: boolean
-    MaxTokens:
+    ModelDefinitions.MaxTokens:
       type: integer
       format: int64
       minimum: 0
-    Model:
+    ModelDefinitions.Model:
       type: object
       description: Describes an OpenAI model offering that can be used with the API.
       required:
@@ -2839,17 +2812,17 @@ components:
         owned_by:
           type: string
           description: The organization that owns the model.
-    N:
+    ModelDefinitions.N:
       type: integer
       format: int64
       minimum: 1
       maximum: 128
-    NEpochs:
+    ModelDefinitions.NEpochs:
       type: integer
       format: int64
       minimum: 1
       maximum: 50
-    OpenAIFile:
+    ModelDefinitions.OpenAIFile:
       type: object
       description: The `File` object represents a document that has been uploaded to OpenAI.
       required:
@@ -2901,57 +2874,57 @@ components:
           description: |-
             Additional details about the status of the file. If the file is in the `error` state, this will
             include a message describing the error.
-    Penalty:
+    ModelDefinitions.Penalty:
       type: number
       format: double
       minimum: -2
       maximum: 2
-    Prompt:
+    ModelDefinitions.Prompt:
       oneOf:
         - type: string
         - type: array
           items:
             type: string
-        - $ref: '#/components/schemas/TokenArray'
-        - $ref: '#/components/schemas/TokenArrayArray'
+        - $ref: '#/components/schemas/ModelDefinitions.TokenArray'
+        - $ref: '#/components/schemas/ModelDefinitions.TokenArrayArray'
       nullable: true
-    Stop:
+    ModelDefinitions.Stop:
       oneOf:
         - type: string
-        - $ref: '#/components/schemas/StopSequences'
+        - $ref: '#/components/schemas/ModelDefinitions.StopSequences'
       nullable: true
-    StopSequences:
+    ModelDefinitions.StopSequences:
       type: array
       items:
         type: string
       minItems: 1
       maxItems: 4
-    SuffixString:
+    ModelDefinitions.SuffixString:
       type: string
       minLength: 1
       maxLength: 40
-    Temperature:
+    ModelDefinitions.Temperature:
       type: number
       format: double
       minimum: 0
       maximum: 2
-    TokenArray:
+    ModelDefinitions.TokenArray:
       type: array
       items:
         type: integer
         format: int64
       minItems: 1
-    TokenArrayArray:
+    ModelDefinitions.TokenArrayArray:
       type: array
       items:
-        $ref: '#/components/schemas/TokenArray'
+        $ref: '#/components/schemas/ModelDefinitions.TokenArray'
       minItems: 1
-    TopP:
+    ModelDefinitions.TopP:
       type: number
       format: double
       minimum: 0
       maximum: 1
-    User:
+    ModelDefinitions.User:
       type: string
   securitySchemes:
     BearerAuth:
diff --git a/versions.tsp b/versions.tsp
new file mode 100644
index 000000000..da59f2503
--- /dev/null
+++ b/versions.tsp
@@ -0,0 +1,9 @@
+import "@typespec/versioning";
+
+@Versioning.versioned(OpenAIFlavors)
+namespace ModelDefinitions;
+
+enum OpenAIFlavors {
+    OpenAI,
+    Azure,
+}
\ No newline at end of file

From e6c540a74be12ae21fb951c1e46f467155c7282c Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Mon, 30 Oct 2023 15:15:40 -0700
Subject: [PATCH 2/6] Filled in all AOAI routes

---
 audio/models.tsp                              |  12 +-
 audio/operations.tsp                          |   3 +
 azuremain.tsp                                 |  17 +-
 completions/azuremodels.tsp                   |  27 ++
 completions/models.tsp                        |  31 +-
 embeddings/models.tsp                         |   5 +
 .../openapi3/openapi.AzureOpenAI.yaml         | 316 ++++++++++++++++++
 .../@typespec/openapi3/openapi.OpenAI.yaml    | 244 +++++++-------
 8 files changed, 507 insertions(+), 148 deletions(-)
 create mode 100644 completions/azuremodels.tsp

diff --git a/audio/models.tsp b/audio/models.tsp
index a2a440a90..28c57b079 100644
--- a/audio/models.tsp
+++ b/audio/models.tsp
@@ -1,7 +1,14 @@
-namespace OpenAI;
+import "../versions.tsp";
+
+namespace ModelDefinitions;
+
 using TypeSpec.OpenAPI;
+using TypeSpec.Versioning;
 
 model CreateTranscriptionRequest {
+  @added(OpenAIFlavors.Azure)
+  @TypeSpec.Http.path deploymentId: string;
+
   /**
    * The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4,
    * mpeg, mpga, m4a, ogg, wav, or webm.
@@ -50,6 +57,9 @@ model CreateTranscriptionResponse {
 }
 
 model CreateTranslationRequest {
+  @added(OpenAIFlavors.Azure)
+  @TypeSpec.Http.path deploymentId: string;
+
   /**
    * The audio file object (not file name) to translate, in one of these formats: flac, mp3, mp4,
    * mpeg, mpga, m4a, ogg, wav, or webm.
diff --git a/audio/operations.tsp b/audio/operations.tsp
index 636fb941a..b21d7ccf2 100644
--- a/audio/operations.tsp
+++ b/audio/operations.tsp
@@ -7,6 +7,9 @@ using TypeSpec.Http;
 using TypeSpec.OpenAPI;
 
 namespace OpenAI;
+
+using ModelDefinitions;
+
 @route("/audio")
 namespace Audio {
   @route("transcriptions")
diff --git a/azuremain.tsp b/azuremain.tsp
index 943f7e796..18cb6b71e 100644
--- a/azuremain.tsp
+++ b/azuremain.tsp
@@ -43,11 +43,26 @@ namespace AzureOpenAI {
             op createCompletion is OpenAI.Chat.Completions.createChatCompletion;
 
             @route("extensions/completions")
-            op createCompletionOnYourOwnData(... ModelDefinitions.CreateChatCompletionRequest): ModelDefinitions.CreateChatCompletionResponse;
+            op createCompletionOnYourOwnData(... ModelDefinitions.CreateChatCompletionRequest, dataSources: DataSource[]): ModelDefinitions.CreateChatCompletionResponse;
         };
     }
     @route("deployments/{deploymentId}/completions")
     namespace Completions {
       op createCompletion is OpenAI.Completions.createCompletion;
     }
+
+    @route("deployments/{deploymentId}/embeddings")
+    namespace Embeddings {
+      op createEmbedding is OpenAI.Embeddings.createEmbedding;
+    }
+
+    @route("deployments/{deploymentId}/audio")
+    namespace Audio {
+      @route("transcriptions")
+      op transcriptions is OpenAI.Audio.Transcriptions.createTranscription;
+
+      @route("translations")
+      op translations is OpenAI.Audio.Translations.createTranslation;
+    }
+    model DataSource {};
 }
diff --git a/completions/azuremodels.tsp b/completions/azuremodels.tsp
new file mode 100644
index 000000000..91d986475
--- /dev/null
+++ b/completions/azuremodels.tsp
@@ -0,0 +1,27 @@
+import "../versions.tsp";
+
+namespace ModelDefinitions;
+
+using TypeSpec.Versioning;
+
+@added(OpenAIFlavors.Azure)
+model ContentFilterResult {
+  severity: "safe" | "low" | "medium" | "high";
+  filtered: boolean;
+}
+
+@added(OpenAIFlavors.Azure)
+model ErrorBase {
+  code?: string;
+  message?: string;
+};
+
+@added(OpenAIFlavors.Azure)
+model ContentFilterResults {
+  sexual: ContentFilterResult;
+  violence: ContentFilterResult;
+  hate: ContentFilterResult;
+  self_harm: ContentFilterResult;
+  error: ErrorBase;
+}
+
diff --git a/completions/models.tsp b/completions/models.tsp
index df764c743..1a604720e 100644
--- a/completions/models.tsp
+++ b/completions/models.tsp
@@ -1,6 +1,5 @@
-import "@typespec/versioning";
-
 import "../versions.tsp";
+import "./azuremodels.tsp";
 
 namespace ModelDefinitions;
 
@@ -271,6 +270,9 @@ model CreateChatCompletionResponse {
   /** The model used for the chat completion. */
   `model`: string;
 
+  @added(OpenAIFlavors.Azure)
+  promp_filter_results: ContentFilterResult[];
+
   /** A list of chat completion choices. Can be more than one if `n` is greater than 1. */
   choices: {
     /** The index of the choice in the list of choices. */
@@ -285,6 +287,9 @@ model CreateChatCompletionResponse {
      * a flag from our content filters, or `function_call` if the model called a function.
      */
     finish_reason: "stop" | "length" | "function_call" | "content_filter";
+
+    @added(OpenAIFlavors.Azure)
+    content_filter_results?: ContentFilterResult;
   }[];
 
   usage?: CompletionUsage;
@@ -324,7 +329,6 @@ model ChatCompletionResponseMessage {
 
   @added(OpenAIFlavors.Azure)
   content_filter_results?: ContentFilterResults
-
 }
 
 model CreateCompletionRequest {
@@ -439,25 +443,4 @@ model CreateCompletionResponse {
 
   @added(OpenAIFlavors.Azure)
   content_filter_results?: ContentFilterResults
-}
-
-@added(OpenAIFlavors.Azure)
-model ContentFilterResult {
-  severity: "safe" | "low" | "medium" | "high";
-  filtered: boolean;
-}
-
-@added(OpenAIFlavors.Azure)
-model ErrorBase {
-  code?: string;
-  message?: string;
-};
-
-@added(OpenAIFlavors.Azure)
-model ContentFilterResults {
-  sexual: ContentFilterResult;
-  violence: ContentFilterResult;
-  hate: ContentFilterResult;
-  self_harm: ContentFilterResult;
-  error: ErrorBase;
 }
\ No newline at end of file
diff --git a/embeddings/models.tsp b/embeddings/models.tsp
index 027109362..d8ef8d70c 100644
--- a/embeddings/models.tsp
+++ b/embeddings/models.tsp
@@ -1,9 +1,14 @@
 import "../common/models.tsp";
+import "../versions.tsp";
 
 namespace ModelDefinitions;
 using TypeSpec.OpenAPI;
+using TypeSpec.Versioning;
 
 model CreateEmbeddingRequest {
+  @added(OpenAIFlavors.Azure)
+  @TypeSpec.Http.path deploymentId: string;
+
   /** ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. */
   @extension("x-oaiTypeLabel", "string")
   `model`: string | "text-embedding-ada-002";
diff --git a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
index 35e11f388..ccabe947c 100644
--- a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
@@ -6,6 +6,68 @@ info:
 tags:
   - name: OpenAI
 paths:
+  /deployments/{deploymentId}/audio/transcriptions:
+    post:
+      tags:
+        - OpenAI
+      operationId: createTranscription
+      summary: Transcribes audio into the input language.
+      parameters:
+        - name: deploymentId
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateTranscriptionResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateTranscriptionRequest'
+  /deployments/{deploymentId}/audio/translations:
+    post:
+      tags:
+        - OpenAI
+      operationId: createTranslation
+      summary: Transcribes audio into the input language.
+      parameters:
+        - name: deploymentId
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateTranslationResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateTranslationRequest'
   /deployments/{deploymentId}/chat/completions:
     post:
       tags:
@@ -199,6 +261,7 @@ paths:
               required:
                 - model
                 - messages
+                - dataSources
               properties:
                 model:
                   anyOf:
@@ -342,6 +405,10 @@ paths:
                     as they become available, with the stream terminated by a `data: [DONE]` message.
                     [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
                   default: true
+                dataSources:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/DataSource'
   /deployments/{deploymentId}/completions:
     post:
       tags:
@@ -516,6 +583,37 @@ paths:
                   "finish_reason": "stop"
                 }]
               }
+  /deployments/{deploymentId}/embeddings:
+    post:
+      tags:
+        - OpenAI
+      operationId: createEmbedding
+      summary: Creates an embedding vector representing the input text.
+      parameters:
+        - name: deploymentId
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateEmbeddingResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateEmbeddingRequest'
 security:
   - BearerAuth: []
 components:
@@ -533,6 +631,8 @@ components:
       schema:
         type: string
   schemas:
+    DataSource:
+      type: object
     ModelDefinitions.ChatCompletionFunctionCallOption:
       type: object
       required:
@@ -712,6 +812,7 @@ components:
         - object
         - created
         - model
+        - promp_filter_results
         - choices
       properties:
         id:
@@ -727,6 +828,10 @@ components:
         model:
           type: string
           description: The model used for the chat completion.
+        promp_filter_results:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
         choices:
           type: array
           items:
@@ -754,6 +859,8 @@ components:
                   natural stop point or a provided stop sequence, `length` if the maximum number of tokens
                   specified in the request was reached, `content_filter` if the content was omitted due to
                   a flag from our content filters, or `function_call` if the model called a function.
+              content_filter_results:
+                $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
           description: A list of chat completion choices. Can be more than one if `n` is greater than 1.
         usage:
           $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
@@ -1017,6 +1124,215 @@ components:
         name: The  completion object
         legacy: true
         example: ''
+    ModelDefinitions.CreateEmbeddingRequest:
+      type: object
+      required:
+        - model
+        - input
+      properties:
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - text-embedding-ada-002
+          description: ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
+          x-oaiTypeLabel: string
+        input:
+          anyOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+            - $ref: '#/components/schemas/ModelDefinitions.TokenArray'
+            - $ref: '#/components/schemas/ModelDefinitions.TokenArrayArray'
+          description: |-
+            Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a
+            single request, pass an array of strings or array of token arrays. Each input must not exceed
+            the max input tokens for the model (8191 tokens for `text-embedding-ada-002`) and cannot be an empty string.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+            for counting tokens.
+        user:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+    ModelDefinitions.CreateEmbeddingResponse:
+      type: object
+      required:
+        - object
+        - model
+        - data
+        - usage
+      properties:
+        object:
+          type: string
+          enum:
+            - embedding
+          description: The object type, which is always "embedding".
+        model:
+          type: string
+          description: The name of the model used to generate the embedding.
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.Embedding'
+          description: The list of embeddings generated by the model.
+        usage:
+          type: object
+          description: The usage information for the request.
+          required:
+            - prompt_tokens
+            - total_tokens
+          properties:
+            prompt_tokens:
+              type: integer
+              format: int64
+              description: The number of tokens used by the prompt.
+            total_tokens:
+              type: integer
+              format: int64
+              description: The total number of tokens used by the request.
+    ModelDefinitions.CreateTranscriptionRequest:
+      type: object
+      required:
+        - file
+        - model
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4,
+            mpeg, mpga, m4a, ogg, wav, or webm.
+          x-oaiTypeLabel: file
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - whisper-1
+          description: ID of the model to use. Only `whisper-1` is currently available.
+          x-oaiTypeLabel: string
+        prompt:
+          type: string
+          description: |-
+            An optional text to guide the model's style or continue a previous audio segment. The
+            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
+        response_format:
+          type: string
+          enum:
+            - json
+            - text
+            - srt
+            - verbose_json
+            - vtt
+          description: |-
+            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
+            vtt.
+          default: json
+        temperature:
+          type: number
+          format: double
+          description: |-
+            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+            automatically increase the temperature until certain thresholds are hit.
+          minimum: 0
+          maximum: 1
+          default: 0
+        language:
+          type: string
+          description: |-
+            The language of the input audio. Supplying the input language in
+            [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy
+            and latency.
+    ModelDefinitions.CreateTranscriptionResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
+    ModelDefinitions.CreateTranslationRequest:
+      type: object
+      required:
+        - file
+        - model
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            The audio file object (not file name) to translate, in one of these formats: flac, mp3, mp4,
+            mpeg, mpga, m4a, ogg, wav, or webm.
+          x-oaiTypeLabel: file
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - whisper-1
+          description: ID of the model to use. Only `whisper-1` is currently available.
+          x-oaiTypeLabel: string
+        prompt:
+          type: string
+          description: |-
+            An optional text to guide the model's style or continue a previous audio segment. The
+            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
+        response_format:
+          type: string
+          enum:
+            - json
+            - text
+            - srt
+            - verbose_json
+            - vtt
+          description: |-
+            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
+            vtt.
+          default: json
+        temperature:
+          type: number
+          format: double
+          description: |-
+            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+            automatically increase the temperature until certain thresholds are hit.
+          minimum: 0
+          maximum: 1
+          default: 0
+    ModelDefinitions.CreateTranslationResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
+    ModelDefinitions.Embedding:
+      type: object
+      description: Represents an embedding vector returned by embedding endpoint.
+      required:
+        - index
+        - object
+        - embedding
+      properties:
+        index:
+          type: integer
+          format: int64
+          description: The index of the embedding in the list of embeddings.
+        object:
+          type: string
+          enum:
+            - embedding
+          description: The object type, which is always "embedding".
+        embedding:
+          type: array
+          items:
+            type: number
+            format: double
+          description: |-
+            The embedding vector, which is a list of floats. The length of vector depends on the model as\
+            listed in the [embedding guide](/docs/guides/embeddings).
     ModelDefinitions.ErrorBase:
       type: object
       properties:
diff --git a/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
index c092d5ab0..47c372d7e 100644
--- a/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
@@ -18,7 +18,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateTranscriptionResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateTranscriptionResponse'
         default:
           description: An unexpected error response.
           content:
@@ -30,7 +30,7 @@ paths:
         content:
           multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateTranscriptionRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateTranscriptionRequest'
   /audio/translations:
     post:
       tags:
@@ -44,7 +44,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CreateTranslationResponse'
+                $ref: '#/components/schemas/ModelDefinitions.CreateTranslationResponse'
         default:
           description: An unexpected error response.
           content:
@@ -56,7 +56,7 @@ paths:
         content:
           multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateTranslationRequest'
+              $ref: '#/components/schemas/ModelDefinitions.CreateTranslationRequest'
   /chat/completions:
     post:
       tags:
@@ -919,124 +919,6 @@ security:
   - BearerAuth: []
 components:
   schemas:
-    CreateTranscriptionRequest:
-      type: object
-      required:
-        - file
-        - model
-      properties:
-        file:
-          type: string
-          format: binary
-          description: |-
-            The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4,
-            mpeg, mpga, m4a, ogg, wav, or webm.
-          x-oaiTypeLabel: file
-        model:
-          anyOf:
-            - type: string
-            - type: string
-              enum:
-                - whisper-1
-          description: ID of the model to use. Only `whisper-1` is currently available.
-          x-oaiTypeLabel: string
-        prompt:
-          type: string
-          description: |-
-            An optional text to guide the model's style or continue a previous audio segment. The
-            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
-        response_format:
-          type: string
-          enum:
-            - json
-            - text
-            - srt
-            - verbose_json
-            - vtt
-          description: |-
-            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
-            vtt.
-          default: json
-        temperature:
-          type: number
-          format: double
-          description: |-
-            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
-            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
-            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-            automatically increase the temperature until certain thresholds are hit.
-          minimum: 0
-          maximum: 1
-          default: 0
-        language:
-          type: string
-          description: |-
-            The language of the input audio. Supplying the input language in
-            [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy
-            and latency.
-    CreateTranscriptionResponse:
-      type: object
-      required:
-        - text
-      properties:
-        text:
-          type: string
-    CreateTranslationRequest:
-      type: object
-      required:
-        - file
-        - model
-      properties:
-        file:
-          type: string
-          format: binary
-          description: |-
-            The audio file object (not file name) to translate, in one of these formats: flac, mp3, mp4,
-            mpeg, mpga, m4a, ogg, wav, or webm.
-          x-oaiTypeLabel: file
-        model:
-          anyOf:
-            - type: string
-            - type: string
-              enum:
-                - whisper-1
-          description: ID of the model to use. Only `whisper-1` is currently available.
-          x-oaiTypeLabel: string
-        prompt:
-          type: string
-          description: |-
-            An optional text to guide the model's style or continue a previous audio segment. The
-            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
-        response_format:
-          type: string
-          enum:
-            - json
-            - text
-            - srt
-            - verbose_json
-            - vtt
-          description: |-
-            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
-            vtt.
-          default: json
-        temperature:
-          type: number
-          format: double
-          description: |-
-            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
-            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
-            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-            automatically increase the temperature until certain thresholds are hit.
-          minimum: 0
-          maximum: 1
-          default: 0
-    CreateTranslationResponse:
-      type: object
-      required:
-        - text
-      properties:
-        text:
-          type: string
     Error:
       type: object
       required:
@@ -2343,6 +2225,124 @@ components:
                     format: double
                     description: The score for the category 'violence/graphic'.
           description: A list of moderation objects.
+    ModelDefinitions.CreateTranscriptionRequest:
+      type: object
+      required:
+        - file
+        - model
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4,
+            mpeg, mpga, m4a, ogg, wav, or webm.
+          x-oaiTypeLabel: file
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - whisper-1
+          description: ID of the model to use. Only `whisper-1` is currently available.
+          x-oaiTypeLabel: string
+        prompt:
+          type: string
+          description: |-
+            An optional text to guide the model's style or continue a previous audio segment. The
+            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
+        response_format:
+          type: string
+          enum:
+            - json
+            - text
+            - srt
+            - verbose_json
+            - vtt
+          description: |-
+            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
+            vtt.
+          default: json
+        temperature:
+          type: number
+          format: double
+          description: |-
+            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+            automatically increase the temperature until certain thresholds are hit.
+          minimum: 0
+          maximum: 1
+          default: 0
+        language:
+          type: string
+          description: |-
+            The language of the input audio. Supplying the input language in
+            [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy
+            and latency.
+    ModelDefinitions.CreateTranscriptionResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
+    ModelDefinitions.CreateTranslationRequest:
+      type: object
+      required:
+        - file
+        - model
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            The audio file object (not file name) to translate, in one of these formats: flac, mp3, mp4,
+            mpeg, mpga, m4a, ogg, wav, or webm.
+          x-oaiTypeLabel: file
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - whisper-1
+          description: ID of the model to use. Only `whisper-1` is currently available.
+          x-oaiTypeLabel: string
+        prompt:
+          type: string
+          description: |-
+            An optional text to guide the model's style or continue a previous audio segment. The
+            [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.
+        response_format:
+          type: string
+          enum:
+            - json
+            - text
+            - srt
+            - verbose_json
+            - vtt
+          description: |-
+            The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
+            vtt.
+          default: json
+        temperature:
+          type: number
+          format: double
+          description: |-
+            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
+            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+            automatically increase the temperature until certain thresholds are hit.
+          minimum: 0
+          maximum: 1
+          default: 0
+    ModelDefinitions.CreateTranslationResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
     ModelDefinitions.DeleteFileResponse:
       type: object
       required:

From d1b99d6c5a33c73649ec6bca3dbe5b6324fd49a9 Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Tue, 31 Oct 2023 15:46:40 -0700
Subject: [PATCH 3/6] Added @azure decorator + openapi extensions to call out
 what is AOAI added

---
 audio/models.tsp                              |   5 +-
 azuremain.tsp                                 |  10 +-
 completions/azuremodels.tsp                   |   6 +-
 completions/models.tsp                        |  12 +-
 embeddings/models.tsp                         |   2 +-
 openai.js                                     |  17 +
 .../openapi3/openapi.AzureOpenAI.yaml         | 362 +++++++++---------
 .../@typespec/openapi3/openapi.OpenAI.yaml    |  11 -
 versions.tsp                                  |  15 +-
 9 files changed, 239 insertions(+), 201 deletions(-)
 create mode 100644 openai.js

diff --git a/audio/models.tsp b/audio/models.tsp
index 28c57b079..d394f033e 100644
--- a/audio/models.tsp
+++ b/audio/models.tsp
@@ -6,7 +6,7 @@ using TypeSpec.OpenAPI;
 using TypeSpec.Versioning;
 
 model CreateTranscriptionRequest {
-  @added(OpenAIFlavors.Azure)
+  @azure
   @TypeSpec.Http.path deploymentId: string;
 
   /**
@@ -57,7 +57,7 @@ model CreateTranscriptionResponse {
 }
 
 model CreateTranslationRequest {
-  @added(OpenAIFlavors.Azure)
+  @azure
   @TypeSpec.Http.path deploymentId: string;
 
   /**
@@ -93,6 +93,7 @@ model CreateTranslationRequest {
    */
   @minValue(0)
   @maxValue(1)
+  @azure
   temperature?: float64 = 0;
 }
 
diff --git a/azuremain.tsp b/azuremain.tsp
index 18cb6b71e..d20ce125d 100644
--- a/azuremain.tsp
+++ b/azuremain.tsp
@@ -41,11 +41,15 @@ namespace AzureOpenAI {
         interface Completions {
             @route("completions")
             op createCompletion is OpenAI.Chat.Completions.createChatCompletion;
-
-            @route("extensions/completions")
-            op createCompletionOnYourOwnData(... ModelDefinitions.CreateChatCompletionRequest, dataSources: DataSource[]): ModelDefinitions.CreateChatCompletionResponse;
         };
     }
+
+    @route("deployments/{deploymentId}/extensions/chat")
+    namespace ChatExtensions {
+      @route("completions")
+      op createCompletionOnYourOwnData(... ModelDefinitions.CreateChatCompletionRequest, dataSources: DataSource[]): ModelDefinitions.CreateChatCompletionResponse;
+    }
+
     @route("deployments/{deploymentId}/completions")
     namespace Completions {
       op createCompletion is OpenAI.Completions.createCompletion;
diff --git a/completions/azuremodels.tsp b/completions/azuremodels.tsp
index 91d986475..080bc259e 100644
--- a/completions/azuremodels.tsp
+++ b/completions/azuremodels.tsp
@@ -4,19 +4,19 @@ namespace ModelDefinitions;
 
 using TypeSpec.Versioning;
 
-@added(OpenAIFlavors.Azure)
+@azure
 model ContentFilterResult {
   severity: "safe" | "low" | "medium" | "high";
   filtered: boolean;
 }
 
-@added(OpenAIFlavors.Azure)
+@azure
 model ErrorBase {
   code?: string;
   message?: string;
 };
 
-@added(OpenAIFlavors.Azure)
+@azure
 model ContentFilterResults {
   sexual: ContentFilterResult;
   violence: ContentFilterResult;
diff --git a/completions/models.tsp b/completions/models.tsp
index 1a604720e..91041f97e 100644
--- a/completions/models.tsp
+++ b/completions/models.tsp
@@ -147,7 +147,7 @@ scalar N extends safeint;
 scalar MaxTokens extends safeint;
 
 model CreateChatCompletionRequest {
-  @added(ModelDefinitions.OpenAIFlavors.Azure)
+  @azure
   deploymentId: string;
 
   /**
@@ -212,7 +212,7 @@ model ChatCompletionFunctions {
 model ChatCompletionFunctionParameters is Record<unknown>;
 
 model ChatCompletionRequestMessage {
-  @added(ModelDefinitions.OpenAIFlavors.Azure)
+  @azure
   deploymentId: string;
 
   /** The role of the messages author. One of `system`, `user`, `assistant`, or `function`. */
@@ -270,7 +270,7 @@ model CreateChatCompletionResponse {
   /** The model used for the chat completion. */
   `model`: string;
 
-  @added(OpenAIFlavors.Azure)
+  @azure
   promp_filter_results: ContentFilterResult[];
 
   /** A list of chat completion choices. Can be more than one if `n` is greater than 1. */
@@ -288,7 +288,7 @@ model CreateChatCompletionResponse {
      */
     finish_reason: "stop" | "length" | "function_call" | "content_filter";
 
-    @added(OpenAIFlavors.Azure)
+    @azure
     content_filter_results?: ContentFilterResult;
   }[];
 
@@ -327,12 +327,12 @@ model ChatCompletionResponseMessage {
     arguments: string;
   };
 
-  @added(OpenAIFlavors.Azure)
+  @azure
   content_filter_results?: ContentFilterResults
 }
 
 model CreateCompletionRequest {
-  @added(OpenAIFlavors.Azure)
+  @azure
   @TypeSpec.Http.path deploymentId: string;
 
   /**
diff --git a/embeddings/models.tsp b/embeddings/models.tsp
index d8ef8d70c..7235adb2b 100644
--- a/embeddings/models.tsp
+++ b/embeddings/models.tsp
@@ -6,7 +6,7 @@ using TypeSpec.OpenAPI;
 using TypeSpec.Versioning;
 
 model CreateEmbeddingRequest {
-  @added(OpenAIFlavors.Azure)
+  @azure
   @TypeSpec.Http.path deploymentId: string;
 
   /** ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. */
diff --git a/openai.js b/openai.js
new file mode 100644
index 000000000..e731f1848
--- /dev/null
+++ b/openai.js
@@ -0,0 +1,17 @@
+import "@typespec/versioning";
+import {
+    $extension
+} from "@typespec/openapi";
+
+import { $added } from "@typespec/versioning";
+
+var map = {};
+
+export function $azureVersion(context, target) {
+    map.azureVersion = target;
+};
+
+export function $azure(context, target) {
+    $added(context, target, map.azureVersion);
+    $extension(context, target, "x-ms-azure-openai", true);
+}
\ No newline at end of file
diff --git a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
index ccabe947c..e0557e811 100644
--- a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
@@ -18,6 +18,8 @@ paths:
           required: true
           schema:
             type: string
+            x-ms-azure-openai: true
+          x-ms-azure-openai: true
       responses:
         '200':
           description: The request has succeeded.
@@ -49,6 +51,8 @@ paths:
           required: true
           schema:
             type: string
+            x-ms-azure-openai: true
+          x-ms-azure-openai: true
       responses:
         '200':
           description: The request has succeeded.
@@ -240,175 +244,6 @@ paths:
                     as they become available, with the stream terminated by a `data: [DONE]` message.
                     [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
                   default: true
-  /deployments/{deploymentId}/chat/extensions/completions:
-    post:
-      operationId: Completions_createCompletionOnYourOwnData
-      parameters:
-        - $ref: '#/components/parameters/ModelDefinitions.CreateChatCompletionRequest.deploymentId'
-      responses:
-        '200':
-          description: The request has succeeded.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionResponse'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - model
-                - messages
-                - dataSources
-              properties:
-                model:
-                  anyOf:
-                    - type: string
-                    - type: string
-                      enum:
-                        - gpt4
-                        - gpt-4-0314
-                        - gpt-4-0613
-                        - gpt-4-32k
-                        - gpt-4-32k-0314
-                        - gpt-4-32k-0613
-                        - gpt-3.5-turbo
-                        - gpt-3.5-turbo-16k
-                        - gpt-3.5-turbo-0301
-                        - gpt-3.5-turbo-0613
-                        - gpt-3.5-turbo-16k-0613
-                  description: |-
-                    ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
-                    table for details on which models work with the Chat API.
-                  x-oaiTypeLabel: string
-                messages:
-                  type: array
-                  items:
-                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
-                  description: |-
-                    A list of messages comprising the conversation so far.
-                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
-                  minItems: 1
-                functions:
-                  type: array
-                  items:
-                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
-                  description: A list of functions the model may generate JSON inputs for.
-                  minItems: 1
-                  maxItems: 128
-                function_call:
-                  anyOf:
-                    - type: string
-                      enum:
-                        - none
-                        - auto
-                    - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
-                  description: |-
-                    Controls how the model responds to function calls. `none` means the model does not call a
-                    function, and responds to the end-user. `auto` means the model can pick between an end-user or
-                    calling a function.  Specifying a particular function via `{\"name":\ \"my_function\"}` forces the
-                    model to call that function. `none` is the default when no functions are present. `auto` is the
-                    default if functions are present.
-                temperature:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Temperature'
-                  nullable: true
-                  description: |-
-                    What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
-                    more random, while lower values like 0.2 will make it more focused and deterministic.
-
-                    We generally recommend altering this or `top_p` but not both.
-                  default: 1
-                top_p:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.TopP'
-                  nullable: true
-                  description: |-
-                    An alternative to sampling with temperature, called nucleus sampling, where the model considers
-                    the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
-                    the top 10% probability mass are considered.
-
-                    We generally recommend altering this or `temperature` but not both.
-                  default: 1
-                n:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.N'
-                  nullable: true
-                  description: |-
-                    How many completions to generate for each prompt.
-                    **Note:** Because this parameter generates many completions, it can quickly consume your token
-                    quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
-                  default: 1
-                max_tokens:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
-                  nullable: true
-                  description: |-
-                    The maximum number of [tokens](/tokenizer) to generate in the completion.
-
-                    The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
-                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
-                    for counting tokens.
-                  default: 16
-                stop:
-                  allOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Stop'
-                  description: Up to 4 sequences where the API will stop generating further tokens.
-                  default: null
-                presence_penalty:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
-                  nullable: true
-                  description: |-
-                    Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
-                    in the text so far, increasing the model's likelihood to talk about new topics.
-
-                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
-                frequency_penalty:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
-                  nullable: true
-                  description: |-
-                    Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
-                    frequency in the text so far, decreasing the model's likelihood to repeat the same line
-                    verbatim.
-
-                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
-                logit_bias:
-                  type: object
-                  description: |-
-                    Modify the likelihood of specified tokens appearing in the completion.
-                    Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
-                    associated bias value from -100 to 100. Mathematically, the bias is added to the logits
-                    generated by the model prior to sampling. The exact effect will vary per model, but values
-                    between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
-                    should result in a ban or exclusive selection of the relevant token.
-                  additionalProperties:
-                    type: integer
-                    format: int64
-                  nullable: true
-                  x-oaiTypeLabel: map
-                user:
-                  allOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.User'
-                  description: |-
-                    A unique identifier representing your end-user, which can help OpenAI to monitor and detect
-                    abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
-                stream:
-                  type: boolean
-                  nullable: true
-                  description: |-
-                    If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
-                    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-                    as they become available, with the stream terminated by a `data: [DONE]` message.
-                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
-                  default: true
-                dataSources:
-                  type: array
-                  items:
-                    $ref: '#/components/schemas/DataSource'
   /deployments/{deploymentId}/completions:
     post:
       tags:
@@ -595,6 +430,8 @@ paths:
           required: true
           schema:
             type: string
+            x-ms-azure-openai: true
+          x-ms-azure-openai: true
       responses:
         '200':
           description: The request has succeeded.
@@ -614,6 +451,175 @@ paths:
           application/json:
             schema:
               $ref: '#/components/schemas/ModelDefinitions.CreateEmbeddingRequest'
+  /deployments/{deploymentId}/extensions/chat/completions:
+    post:
+      operationId: ChatExtensions_createCompletionOnYourOwnData
+      parameters:
+        - $ref: '#/components/parameters/ModelDefinitions.CreateChatCompletionRequest.deploymentId'
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - model
+                - messages
+                - dataSources
+              properties:
+                model:
+                  anyOf:
+                    - type: string
+                    - type: string
+                      enum:
+                        - gpt4
+                        - gpt-4-0314
+                        - gpt-4-0613
+                        - gpt-4-32k
+                        - gpt-4-32k-0314
+                        - gpt-4-32k-0613
+                        - gpt-3.5-turbo
+                        - gpt-3.5-turbo-16k
+                        - gpt-3.5-turbo-0301
+                        - gpt-3.5-turbo-0613
+                        - gpt-3.5-turbo-16k-0613
+                  description: |-
+                    ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
+                    table for details on which models work with the Chat API.
+                  x-oaiTypeLabel: string
+                messages:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
+                  description: |-
+                    A list of messages comprising the conversation so far.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+                  minItems: 1
+                functions:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
+                  description: A list of functions the model may generate JSON inputs for.
+                  minItems: 1
+                  maxItems: 128
+                function_call:
+                  anyOf:
+                    - type: string
+                      enum:
+                        - none
+                        - auto
+                    - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
+                  description: |-
+                    Controls how the model responds to function calls. `none` means the model does not call a
+                    function, and responds to the end-user. `auto` means the model can pick between an end-user or
+                    calling a function.  Specifying a particular function via `{\"name":\ \"my_function\"}` forces the
+                    model to call that function. `none` is the default when no functions are present. `auto` is the
+                    default if functions are present.
+                temperature:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Temperature'
+                  nullable: true
+                  description: |-
+                    What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
+                    more random, while lower values like 0.2 will make it more focused and deterministic.
+
+                    We generally recommend altering this or `top_p` but not both.
+                  default: 1
+                top_p:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.TopP'
+                  nullable: true
+                  description: |-
+                    An alternative to sampling with temperature, called nucleus sampling, where the model considers
+                    the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
+                    the top 10% probability mass are considered.
+
+                    We generally recommend altering this or `temperature` but not both.
+                  default: 1
+                n:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.N'
+                  nullable: true
+                  description: |-
+                    How many completions to generate for each prompt.
+                    **Note:** Because this parameter generates many completions, it can quickly consume your token
+                    quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+                  default: 1
+                max_tokens:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
+                  nullable: true
+                  description: |-
+                    The maximum number of [tokens](/tokenizer) to generate in the completion.
+
+                    The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+                    for counting tokens.
+                  default: 16
+                stop:
+                  allOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Stop'
+                  description: Up to 4 sequences where the API will stop generating further tokens.
+                  default: null
+                presence_penalty:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+                  nullable: true
+                  description: |-
+                    Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
+                    in the text so far, increasing the model's likelihood to talk about new topics.
+
+                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+                frequency_penalty:
+                  oneOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+                  nullable: true
+                  description: |-
+                    Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+                    frequency in the text so far, decreasing the model's likelihood to repeat the same line
+                    verbatim.
+
+                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+                logit_bias:
+                  type: object
+                  description: |-
+                    Modify the likelihood of specified tokens appearing in the completion.
+                    Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
+                    associated bias value from -100 to 100. Mathematically, the bias is added to the logits
+                    generated by the model prior to sampling. The exact effect will vary per model, but values
+                    between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
+                    should result in a ban or exclusive selection of the relevant token.
+                  additionalProperties:
+                    type: integer
+                    format: int64
+                  nullable: true
+                  x-oaiTypeLabel: map
+                user:
+                  allOf:
+                    - $ref: '#/components/schemas/ModelDefinitions.User'
+                  description: |-
+                    A unique identifier representing your end-user, which can help OpenAI to monitor and detect
+                    abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
+                stream:
+                  type: boolean
+                  nullable: true
+                  description: |-
+                    If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
+                    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+                    as they become available, with the stream terminated by a `data: [DONE]` message.
+                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
+                  default: true
+                dataSources:
+                  type: array
+                  items:
+                    $ref: '#/components/schemas/DataSource'
 security:
   - BearerAuth: []
 components:
@@ -624,12 +630,16 @@ components:
       required: true
       schema:
         type: string
+        x-ms-azure-openai: true
+      x-ms-azure-openai: true
     ModelDefinitions.CreateCompletionRequest.deploymentId:
       name: deploymentId
       in: path
       required: true
       schema:
         type: string
+        x-ms-azure-openai: true
+      x-ms-azure-openai: true
   schemas:
     DataSource:
       type: object
@@ -678,6 +688,7 @@ components:
       properties:
         deploymentId:
           type: string
+          x-ms-azure-openai: true
         role:
           type: string
           enum:
@@ -749,7 +760,9 @@ components:
                 the model does not always generate valid JSON, and may hallucinate parameters not defined by
                 your function schema. Validate the arguments in your code before calling your function.
         content_filter_results:
-          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResults'
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.ContentFilterResults'
+          x-ms-azure-openai: true
     ModelDefinitions.CompletionUsage:
       type: object
       description: Usage statistics for the completion request.
@@ -785,6 +798,7 @@ components:
             - high
         filtered:
           type: boolean
+      x-ms-azure-openai: true
     ModelDefinitions.ContentFilterResults:
       type: object
       required:
@@ -804,6 +818,7 @@ components:
           $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
         error:
           $ref: '#/components/schemas/ModelDefinitions.ErrorBase'
+      x-ms-azure-openai: true
     ModelDefinitions.CreateChatCompletionResponse:
       type: object
       description: Represents a chat completion response returned by model, based on the provided input.
@@ -832,6 +847,7 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+          x-ms-azure-openai: true
         choices:
           type: array
           items:
@@ -860,7 +876,9 @@ components:
                   specified in the request was reached, `content_filter` if the content was omitted due to
                   a flag from our content filters, or `function_call` if the model called a function.
               content_filter_results:
-                $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+                allOf:
+                  - $ref: '#/components/schemas/ModelDefinitions.ContentFilterResult'
+                x-ms-azure-openai: true
           description: A list of chat completion choices. Can be more than one if `n` is greater than 1.
         usage:
           $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
@@ -1300,6 +1318,7 @@ components:
             automatically increase the temperature until certain thresholds are hit.
           minimum: 0
           maximum: 1
+          x-ms-azure-openai: true
           default: 0
     ModelDefinitions.CreateTranslationResponse:
       type: object
@@ -1340,6 +1359,7 @@ components:
           type: string
         message:
           type: string
+      x-ms-azure-openai: true
     ModelDefinitions.MaxTokens:
       type: integer
       format: int64
diff --git a/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
index 47c372d7e..4aa96d37e 100644
--- a/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.OpenAI.yaml
@@ -2325,17 +2325,6 @@ components:
             The format of the transcript output, in one of these options: json, text, srt, verbose_json, or
             vtt.
           default: json
-        temperature:
-          type: number
-          format: double
-          description: |-
-            The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
-            random, while lower values like 0.2 will make it more focused and deterministic. If set to 0,
-            the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-            automatically increase the temperature until certain thresholds are hit.
-          minimum: 0
-          maximum: 1
-          default: 0
     ModelDefinitions.CreateTranslationResponse:
       type: object
       required:
diff --git a/versions.tsp b/versions.tsp
index da59f2503..6db6292b6 100644
--- a/versions.tsp
+++ b/versions.tsp
@@ -1,9 +1,16 @@
 import "@typespec/versioning";
+import "./openai.js";
+
+extern dec azure(target);
+extern dec azureVersion(target);
 
 @Versioning.versioned(OpenAIFlavors)
-namespace ModelDefinitions;
+namespace ModelDefinitions {
+
 
-enum OpenAIFlavors {
-    OpenAI,
-    Azure,
+    enum OpenAIFlavors {
+        OpenAI,
+        @azureVersion
+        Azure,
+    }
 }
\ No newline at end of file

From 81f294a8d2896e160ef34871fa7b13fa51bce390 Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Fri, 3 Nov 2023 16:28:42 -0700
Subject: [PATCH 4/6] Added remaining routes to AOAI

---
 azuremain.tsp                                 |   16 +
 completions/models.tsp                        |    5 +-
 embeddings/models.tsp                         |    3 +
 openai.js                                     |    6 +-
 .../openapi3/openapi.AzureOpenAI.yaml         | 1294 ++++++++++++++++-
 versions.tsp                                  |    4 +-
 6 files changed, 1267 insertions(+), 61 deletions(-)

diff --git a/azuremain.tsp b/azuremain.tsp
index d20ce125d..83f3c2a4c 100644
--- a/azuremain.tsp
+++ b/azuremain.tsp
@@ -68,5 +68,21 @@ namespace AzureOpenAI {
       @route("translations")
       op translations is OpenAI.Audio.Translations.createTranslation;
     }
+
+    @route("/files")
+    interface Files extends OpenAI.Files {};
+
+    @route("/fine-tunes")
+    interface FineTunes extends OpenAI.FineTunes {};
+
+    @route("/fine-tuning")
+    interface FineTuning extends OpenAI.FineTunes {};
+
+    @route("/models")
+    interface Models extends OpenAI.Models {};
+
+    @route("/models")
+    interface Images extends OpenAI.Images {};
+
     model DataSource {};
 }
diff --git a/completions/models.tsp b/completions/models.tsp
index 91041f97e..b83014390 100644
--- a/completions/models.tsp
+++ b/completions/models.tsp
@@ -382,6 +382,9 @@ model CreateCompletionRequest {
    * quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
    */
   best_of?: safeint | null = 1;
+
+  @azure
+  completion_config?: string | null;
 }
 
 @oneOf
@@ -441,6 +444,6 @@ model CreateCompletionResponse {
 
   usage?: CompletionUsage;
 
-  @added(OpenAIFlavors.Azure)
+  @azure
   content_filter_results?: ContentFilterResults
 }
\ No newline at end of file
diff --git a/embeddings/models.tsp b/embeddings/models.tsp
index 7235adb2b..f39b8922b 100644
--- a/embeddings/models.tsp
+++ b/embeddings/models.tsp
@@ -23,6 +23,9 @@ model CreateEmbeddingRequest {
   input: string | string[] | TokenArray | TokenArrayArray;
 
   user?: User;
+
+  @azure
+  input_type?: string;
 }
 model CreateEmbeddingResponse {
   /** The object type, which is always "embedding". */
diff --git a/openai.js b/openai.js
index e731f1848..d9ecf499b 100644
--- a/openai.js
+++ b/openai.js
@@ -5,13 +5,13 @@ import {
 
 import { $added } from "@typespec/versioning";
 
-var map = {};
+var state = {};
 
 export function $azureVersion(context, target) {
-    map.azureVersion = target;
+    state.azureVersion = target;
 };
 
 export function $azure(context, target) {
-    $added(context, target, map.azureVersion);
+    $added(context, target, state.azureVersion);
     $extension(context, target, "x-ms-azure-openai", true);
 }
\ No newline at end of file
diff --git a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
index e0557e811..433dc6536 100644
--- a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
@@ -620,6 +620,581 @@ paths:
                   type: array
                   items:
                     $ref: '#/components/schemas/DataSource'
+  /files:
+    get:
+      tags:
+        - OpenAI
+      operationId: listFiles
+      summary: Returns a list of files that belong to the user's organization.
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ListFilesResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+    post:
+      tags:
+        - OpenAI
+      operationId: createFile
+      summary: Returns a list of files that belong to the user's organization.
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateFileRequest'
+  /files/files/{file_id}:
+    post:
+      tags:
+        - OpenAI
+      operationId: retrieveFile
+      summary: Returns information about a specific file.
+      parameters:
+        - name: file_id
+          in: path
+          required: true
+          description: The ID of the file to use for this request.
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+    delete:
+      tags:
+        - OpenAI
+      operationId: deleteFile
+      summary: Delete a file
+      parameters:
+        - name: file_id
+          in: path
+          required: true
+          description: The ID of the file to use for this request.
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.DeleteFileResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+  /files/files/{file_id}/content:
+    get:
+      tags:
+        - OpenAI
+      operationId: downloadFile
+      summary: Returns the contents of the specified file.
+      parameters:
+        - name: file_id
+          in: path
+          required: true
+          description: The ID of the file to use for this request.
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                type: string
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+  /fine-tunes:
+    post:
+      tags:
+        - OpenAI
+      operationId: createFineTune
+      summary: |-
+        Creates a job that fine-tunes a specified model from a given dataset.
+
+        Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete.
+
+        [Learn more about fine-tuning](/docs/guides/legacy-fine-tuning)
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateFineTuneRequest'
+      deprecated: true
+    get:
+      tags:
+        - OpenAI
+      operationId: listFineTunes
+      summary: List your organization's fine-tuning jobs
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTunesResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tunes/{fine_tune_id}:
+    get:
+      tags:
+        - OpenAI
+      operationId: retrieveFineTune
+      summary: |-
+        Gets info about the fine-tune job.
+
+        [Learn more about fine-tuning](/docs/guides/legacy-fine-tuning)
+      parameters:
+        - name: fine_tune_id
+          in: path
+          required: true
+          description: The ID of the fine-tune job
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tunes/{fine_tune_id}/cancel:
+    post:
+      tags:
+        - OpenAI
+      operationId: cancelFineTune
+      summary: Immediately cancel a fine-tune job.
+      parameters:
+        - name: fine_tune_id
+          in: path
+          required: true
+          description: The ID of the fine-tune job to cancel
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tunes/{fine_tune_id}/events:
+    get:
+      tags:
+        - OpenAI
+      operationId: listFineTuneEvents
+      summary: Get fine-grained status updates for a fine-tune job.
+      parameters:
+        - name: fine_tune_id
+          in: path
+          required: true
+          description: The ID of the fine-tune job to get events for.
+          schema:
+            type: string
+        - name: stream
+          in: query
+          required: false
+          description: |-
+            Whether to stream events for the fine-tune job. If set to true, events will be sent as
+            data-only
+            [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+            as they become available. The stream will terminate with a `data: [DONE]` message when the
+            job is finished (succeeded, cancelled, or failed).
+
+            If set to false, only events generated so far will be returned.
+          schema:
+            type: boolean
+            default: false
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTuneEventsResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tuning:
+    post:
+      tags:
+        - OpenAI
+      operationId: createFineTune
+      summary: |-
+        Creates a job that fine-tunes a specified model from a given dataset.
+
+        Response includes details of the enqueued job including job status and the name of the fine-tuned models once complete.
+
+        [Learn more about fine-tuning](/docs/guides/legacy-fine-tuning)
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateFineTuneRequest'
+      deprecated: true
+    get:
+      tags:
+        - OpenAI
+      operationId: listFineTunes
+      summary: List your organization's fine-tuning jobs
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTunesResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tuning/{fine_tune_id}:
+    get:
+      tags:
+        - OpenAI
+      operationId: retrieveFineTune
+      summary: |-
+        Gets info about the fine-tune job.
+
+        [Learn more about fine-tuning](/docs/guides/legacy-fine-tuning)
+      parameters:
+        - name: fine_tune_id
+          in: path
+          required: true
+          description: The ID of the fine-tune job
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tuning/{fine_tune_id}/cancel:
+    post:
+      tags:
+        - OpenAI
+      operationId: cancelFineTune
+      summary: Immediately cancel a fine-tune job.
+      parameters:
+        - name: fine_tune_id
+          in: path
+          required: true
+          description: The ID of the fine-tune job to cancel
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.FineTune'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /fine-tuning/{fine_tune_id}/events:
+    get:
+      tags:
+        - OpenAI
+      operationId: listFineTuneEvents
+      summary: Get fine-grained status updates for a fine-tune job.
+      parameters:
+        - name: fine_tune_id
+          in: path
+          required: true
+          description: The ID of the fine-tune job to get events for.
+          schema:
+            type: string
+        - name: stream
+          in: query
+          required: false
+          description: |-
+            Whether to stream events for the fine-tune job. If set to true, events will be sent as
+            data-only
+            [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+            as they become available. The stream will terminate with a `data: [DONE]` message when the
+            job is finished (succeeded, cancelled, or failed).
+
+            If set to false, only events generated so far will be returned.
+          schema:
+            type: boolean
+            default: false
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ListFineTuneEventsResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      deprecated: true
+  /models:
+    get:
+      tags:
+        - OpenAI
+      operationId: listModels
+      summary: |-
+        Lists the currently available models, and provides basic information about each one such as the
+        owner and availability.
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ListModelsResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+  /models/edits:
+    post:
+      tags:
+        - OpenAI
+      operationId: createImageEdit
+      summary: Creates an edited or extended image given an original image and a prompt.
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ImagesResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateImageEditRequest'
+  /models/generations:
+    post:
+      tags:
+        - OpenAI
+      operationId: createImage
+      summary: Creates an image given a prompt
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ImagesResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateImageRequest'
+  /models/variations:
+    post:
+      tags:
+        - OpenAI
+      operationId: createImageVariation
+      summary: Creates an edited or extended image given an original image and a prompt.
+      parameters: []
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.ImagesResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/ModelDefinitions.CreateImageVariationRequest'
+  /models/{model}:
+    get:
+      tags:
+        - OpenAI
+      operationId: retrieveModel
+      summary: |-
+        Retrieves a model instance, providing basic information about the model such as the owner and
+        permissioning.
+      parameters:
+        - name: model
+          in: path
+          required: true
+          description: The ID of the model to use for this request.
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.Model'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
+    delete:
+      tags:
+        - OpenAI
+      operationId: deleteModel
+      summary: Delete a fine-tuned model. You must have the Owner role in your organization to delete a model.
+      parameters:
+        - name: model
+          in: path
+          required: true
+          description: The model to delete
+          schema:
+            type: string
+      responses:
+        '200':
+          description: The request has succeeded.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelDefinitions.DeleteModelResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAI.ErrorResponse'
 security:
   - BearerAuth: []
 components:
@@ -1051,6 +1626,10 @@ components:
             **Note:** Because this parameter generates many completions, it can quickly consume your token
             quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
           default: 1
+        completion_config:
+          type: string
+          nullable: true
+          x-ms-azure-openai: true
     ModelDefinitions.CreateCompletionResponse:
       type: object
       description: |-
@@ -1137,7 +1716,9 @@ components:
         usage:
           $ref: '#/components/schemas/ModelDefinitions.CompletionUsage'
         content_filter_results:
-          $ref: '#/components/schemas/ModelDefinitions.ContentFilterResults'
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.ContentFilterResults'
+          x-ms-azure-openai: true
       x-oaiMeta:
         name: The  completion object
         legacy: true
@@ -1145,69 +1726,356 @@ components:
     ModelDefinitions.CreateEmbeddingRequest:
       type: object
       required:
-        - model
-        - input
+        - model
+        - input
+      properties:
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - text-embedding-ada-002
+          description: ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
+          x-oaiTypeLabel: string
+        input:
+          anyOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+            - $ref: '#/components/schemas/ModelDefinitions.TokenArray'
+            - $ref: '#/components/schemas/ModelDefinitions.TokenArrayArray'
+          description: |-
+            Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a
+            single request, pass an array of strings or array of token arrays. Each input must not exceed
+            the max input tokens for the model (8191 tokens for `text-embedding-ada-002`) and cannot be an empty string.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+            for counting tokens.
+        user:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+        input_type:
+          type: string
+          x-ms-azure-openai: true
+    ModelDefinitions.CreateEmbeddingResponse:
+      type: object
+      required:
+        - object
+        - model
+        - data
+        - usage
+      properties:
+        object:
+          type: string
+          enum:
+            - embedding
+          description: The object type, which is always "embedding".
+        model:
+          type: string
+          description: The name of the model used to generate the embedding.
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.Embedding'
+          description: The list of embeddings generated by the model.
+        usage:
+          type: object
+          description: The usage information for the request.
+          required:
+            - prompt_tokens
+            - total_tokens
+          properties:
+            prompt_tokens:
+              type: integer
+              format: int64
+              description: The number of tokens used by the prompt.
+            total_tokens:
+              type: integer
+              format: int64
+              description: The total number of tokens used by the request.
+    ModelDefinitions.CreateFileRequest:
+      type: object
+      required:
+        - file
+        - purpose
+      properties:
+        file:
+          type: string
+          format: binary
+          description: |-
+            Name of the [JSON Lines](https://jsonlines.readthedocs.io/en/latest/) file to be uploaded.
+
+            If the `purpose` is set to "fine-tune", the file will be used for fine-tuning.
+        purpose:
+          type: string
+          description: |-
+            The intended purpose of the uploaded documents. Use "fine-tune" for
+            [fine-tuning](/docs/api-reference/fine-tuning). This allows us to validate the format of the
+            uploaded file.
+    ModelDefinitions.CreateFineTuneRequest:
+      type: object
+      required:
+        - training_file
+      properties:
+        training_file:
+          type: string
+          description: |-
+            The ID of an uploaded file that contains training data.
+
+            See [upload file](/docs/api-reference/files/upload) for how to upload a file.
+
+            Your dataset must be formatted as a JSONL file, where each training example is a JSON object
+            with the keys "prompt" and "completion". Additionally, you must upload your file with the
+            purpose `fine-tune`.
+
+            See the [fine-tuning guide](/docs/guides/legacy-fine-tuning/creating-training-data) for more
+            details.
+        validation_file:
+          type: string
+          nullable: true
+          description: |-
+            The ID of an uploaded file that contains validation data.
+
+            If you provide this file, the data is used to generate validation metrics periodically during
+            fine-tuning. These metrics can be viewed in the
+            [fine-tuning results file](/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
+            Your train and validation data should be mutually exclusive.
+
+            Your dataset must be formatted as a JSONL file, where each validation example is a JSON object
+            with the keys "prompt" and "completion". Additionally, you must upload your file with the
+            purpose `fine-tune`.
+
+            See the [fine-tuning guide](/docs/guides/legacy-fine-tuning/creating-training-data) for more
+            details.
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - ada
+                - babbage
+                - curie
+                - davinci
+          nullable: true
+          description: |-
+            The name of the base model to fine-tune. You can select one of "ada", "babbage", "curie",
+            "davinci", or a fine-tuned model created after 2022-04-21 and before 2023-08-22. To learn more
+            about these models, see the [Models](/docs/models) documentation.
+          x-oaiTypeLabel: string
+        n_epochs:
+          type: integer
+          format: int64
+          nullable: true
+          description: |-
+            The number of epochs to train the model for. An epoch refers to one full cycle through the
+            training dataset.
+          default: 4
+        batch_size:
+          type: integer
+          format: int64
+          nullable: true
+          description: |-
+            The batch size to use for training. The batch size is the number of training examples used to
+            train a single forward and backward pass.
+
+            By default, the batch size will be dynamically configured to be ~0.2% of the number of examples
+            in the training set, capped at 256 - in general, we've found that larger batch sizes tend to
+            work better for larger datasets.
+          default: null
+        learning_rate_multiplier:
+          type: number
+          format: double
+          nullable: true
+          description: |-
+            The learning rate multiplier to use for training. The fine-tuning learning rate is the original
+            learning rate used for pretraining multiplied by this value.
+
+            By default, the learning rate multiplier is the 0.05, 0.1, or 0.2 depending on final
+            `batch_size` (larger learning rates tend to perform better with larger batch sizes). We
+            recommend experimenting with values in the range 0.02 to 0.2 to see what produces the best
+            results.
+          default: null
+        prompt_loss_rate:
+          type: number
+          format: double
+          nullable: true
+          description: |-
+            The weight to use for loss on the prompt tokens. This controls how much the model tries to
+            learn to generate the prompt (as compared to the completion which always has a weight of 1.0),
+            and can add a stabilizing effect to training when completions are short.
+
+            If prompts are extremely long (relative to completions), it may make sense to reduce this
+            weight so as to avoid over-prioritizing learning the prompt.
+          default: 0.01
+        compute_classification_metrics:
+          type: boolean
+          nullable: true
+          description: |-
+            If set, we calculate classification-specific metrics such as accuracy and F-1 score using the
+            validation set at the end of every epoch. These metrics can be viewed in the
+            [results file](/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model).
+
+            In order to compute classification metrics, you must provide a `validation_file`. Additionally,
+            you must specify `classification_n_classes` for multiclass classification or
+            `classification_positive_class` for binary classification.
+          default: false
+        classification_n_classes:
+          type: integer
+          format: int64
+          nullable: true
+          description: |-
+            The number of classes in a classification task.
+
+            This parameter is required for multiclass classification.
+          default: null
+        classification_positive_class:
+          type: string
+          nullable: true
+          description: |-
+            The positive class in binary classification.
+
+            This parameter is needed to generate precision, recall, and F1 metrics when doing binary
+            classification.
+          default: null
+        classification_betas:
+          type: array
+          items:
+            type: number
+            format: double
+          nullable: true
+          description: |-
+            If this is provided, we calculate F-beta scores at the specified beta values. The F-beta score
+            is a generalization of F-1 score. This is only used for binary classification.
+
+            With a beta of 1 (i.e. the F-1 score), precision and recall are given the same weight. A larger
+            beta score puts more weight on recall and less on precision. A smaller beta score puts more
+            weight on precision and less on recall.
+          default: null
+        suffix:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.SuffixString'
+          nullable: true
+          description: |-
+            A string of up to 18 characters that will be added to your fine-tuned model name.
+
+            For example, a `suffix` of "custom-model-name" would produce a model name like
+            `ada:ft-your-org:custom-model-name-2022-02-15-04-21-04`.
+          default: null
+    ModelDefinitions.CreateImageEditRequest:
+      type: object
+      required:
+        - prompt
+        - image
+      properties:
+        prompt:
+          type: string
+          description: A text description of the desired image(s). The maximum length is 1000 characters.
+        image:
+          type: string
+          format: binary
+          description: |-
+            The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not
+            provided, image must have transparency, which will be used as the mask.
+        mask:
+          type: string
+          format: binary
+          description: |-
+            An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where
+            `image` should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions
+            as `image`.
+        n:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.ImagesN'
+          nullable: true
+          description: The number of images to generate. Must be between 1 and 10.
+          default: 1
+        size:
+          type: string
+          enum:
+            - 256x256
+            - 512x512
+            - 1024x1024
+          nullable: true
+          description: The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
+          default: 1024x1024
+        response_format:
+          type: string
+          enum:
+            - url
+            - b64_json
+          nullable: true
+          description: The format in which the generated images are returned. Must be one of `url` or `b64_json`.
+          default: url
+        user:
+          $ref: '#/components/schemas/ModelDefinitions.User'
+    ModelDefinitions.CreateImageRequest:
+      type: object
+      required:
+        - prompt
       properties:
-        model:
-          anyOf:
-            - type: string
-            - type: string
-              enum:
-                - text-embedding-ada-002
-          description: ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.
-          x-oaiTypeLabel: string
-        input:
-          anyOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-            - $ref: '#/components/schemas/ModelDefinitions.TokenArray'
-            - $ref: '#/components/schemas/ModelDefinitions.TokenArrayArray'
-          description: |-
-            Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a
-            single request, pass an array of strings or array of token arrays. Each input must not exceed
-            the max input tokens for the model (8191 tokens for `text-embedding-ada-002`) and cannot be an empty string.
-            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
-            for counting tokens.
+        prompt:
+          type: string
+          description: A text description of the desired image(s). The maximum length is 1000 characters.
+        n:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.ImagesN'
+          nullable: true
+          description: The number of images to generate. Must be between 1 and 10.
+          default: 1
+        size:
+          type: string
+          enum:
+            - 256x256
+            - 512x512
+            - 1024x1024
+          nullable: true
+          description: The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
+          default: 1024x1024
+        response_format:
+          type: string
+          enum:
+            - url
+            - b64_json
+          nullable: true
+          description: The format in which the generated images are returned. Must be one of `url` or `b64_json`.
+          default: url
         user:
           $ref: '#/components/schemas/ModelDefinitions.User'
-    ModelDefinitions.CreateEmbeddingResponse:
+    ModelDefinitions.CreateImageVariationRequest:
       type: object
       required:
-        - object
-        - model
-        - data
-        - usage
+        - image
       properties:
-        object:
+        image:
+          type: string
+          format: binary
+          description: |-
+            The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB,
+            and square.
+        n:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.ImagesN'
+          nullable: true
+          description: The number of images to generate. Must be between 1 and 10.
+          default: 1
+        size:
           type: string
           enum:
-            - embedding
-          description: The object type, which is always "embedding".
-        model:
+            - 256x256
+            - 512x512
+            - 1024x1024
+          nullable: true
+          description: The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
+          default: 1024x1024
+        response_format:
           type: string
-          description: The name of the model used to generate the embedding.
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ModelDefinitions.Embedding'
-          description: The list of embeddings generated by the model.
-        usage:
-          type: object
-          description: The usage information for the request.
-          required:
-            - prompt_tokens
-            - total_tokens
-          properties:
-            prompt_tokens:
-              type: integer
-              format: int64
-              description: The number of tokens used by the prompt.
-            total_tokens:
-              type: integer
-              format: int64
-              description: The total number of tokens used by the request.
+          enum:
+            - url
+            - b64_json
+          nullable: true
+          description: The format in which the generated images are returned. Must be one of `url` or `b64_json`.
+          default: url
+        user:
+          $ref: '#/components/schemas/ModelDefinitions.User'
     ModelDefinitions.CreateTranscriptionRequest:
       type: object
       required:
@@ -1327,6 +2195,32 @@ components:
       properties:
         text:
           type: string
+    ModelDefinitions.DeleteFileResponse:
+      type: object
+      required:
+        - id
+        - object
+        - deleted
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+        deleted:
+          type: boolean
+    ModelDefinitions.DeleteModelResponse:
+      type: object
+      required:
+        - id
+        - object
+        - deleted
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+        deleted:
+          type: boolean
     ModelDefinitions.Embedding:
       type: object
       description: Represents an embedding vector returned by embedding endpoint.
@@ -1360,15 +2254,301 @@ components:
         message:
           type: string
       x-ms-azure-openai: true
+    ModelDefinitions.FineTune:
+      type: object
+      description: The `FineTune` object represents a legacy fine-tune job that has been created through the API.
+      required:
+        - id
+        - object
+        - created_at
+        - updated_at
+        - model
+        - fine_tuned_model
+        - organization_id
+        - status
+        - hyperparams
+        - training_files
+        - validation_files
+        - result_files
+      properties:
+        id:
+          type: string
+          description: The object identifier, which can be referenced in the API endpoints.
+        object:
+          type: string
+          enum:
+            - fine-tune
+          description: The object type, which is always "fine-tune".
+        created_at:
+          type: integer
+          format: unixtime
+          description: The Unix timestamp (in seconds) for when the fine-tuning job was created.
+        updated_at:
+          type: integer
+          format: unixtime
+          description: The Unix timestamp (in seconds) for when the fine-tuning job was last updated.
+        model:
+          type: string
+          description: The base model that is being fine-tuned.
+        fine_tuned_model:
+          type: string
+          nullable: true
+          description: The name of the fine-tuned model that is being created.
+        organization_id:
+          type: string
+          description: The organization that owns the fine-tuning job.
+        status:
+          type: string
+          enum:
+            - created
+            - running
+            - succeeded
+            - failed
+            - cancelled
+          description: |-
+            The current status of the fine-tuning job, which can be either `created`, `running`,
+            `succeeded`, `failed`, or `cancelled`.
+        hyperparams:
+          type: object
+          description: |-
+            The hyperparameters used for the fine-tuning job. See the
+            [fine-tuning guide](/docs/guides/legacy-fine-tuning/hyperparameters) for more details.
+          required:
+            - n_epochs
+            - batch_size
+            - prompt_loss_weight
+            - learning_rate_multiplier
+          properties:
+            n_epochs:
+              type: integer
+              format: int64
+              description: |-
+                The number of epochs to train the model for. An epoch refers to one full cycle through the
+                training dataset.
+            batch_size:
+              type: integer
+              format: int64
+              description: |-
+                The batch size to use for training. The batch size is the number of training examples used to
+                train a single forward and backward pass.
+            prompt_loss_weight:
+              type: number
+              format: double
+              description: The weight to use for loss on the prompt tokens.
+            learning_rate_multiplier:
+              type: number
+              format: double
+              description: The learning rate multiplier to use for training.
+            compute_classification_metrics:
+              type: boolean
+              description: The classification metrics to compute using the validation dataset at the end of every epoch.
+            classification_positive_class:
+              type: string
+              description: The positive class to use for computing classification metrics.
+            classification_n_classes:
+              type: integer
+              format: int64
+              description: The number of classes to use for computing classification metrics.
+        training_files:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+          description: The list of files used for training.
+        validation_files:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+          description: The list of files used for validation.
+        result_files:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+          description: The compiled results files for the fine-tuning job.
+        events:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.FineTuneEvent'
+          description: The list of events that have been observed in the lifecycle of the FineTune job.
+    ModelDefinitions.FineTuneEvent:
+      type: object
+      required:
+        - object
+        - created_at
+        - level
+        - message
+      properties:
+        object:
+          type: string
+        created_at:
+          type: integer
+          format: unixtime
+        level:
+          type: string
+        message:
+          type: string
+    ModelDefinitions.Image:
+      type: object
+      description: Represents the url or the content of an image generated by the OpenAI API.
+      properties:
+        url:
+          type: string
+          format: uri
+          description: The URL of the generated image, if `response_format` is `url` (default).
+        b64_json:
+          type: string
+          format: base64
+          description: The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
+    ModelDefinitions.ImagesN:
+      type: integer
+      format: int64
+      minimum: 1
+      maximum: 10
+    ModelDefinitions.ImagesResponse:
+      type: object
+      required:
+        - created
+        - data
+      properties:
+        created:
+          type: integer
+          format: unixtime
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.Image'
+    ModelDefinitions.ListFilesResponse:
+      type: object
+      required:
+        - object
+        - data
+      properties:
+        object:
+          type: string
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.OpenAIFile'
+    ModelDefinitions.ListFineTuneEventsResponse:
+      type: object
+      required:
+        - object
+        - data
+      properties:
+        object:
+          type: string
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.FineTuneEvent'
+    ModelDefinitions.ListFineTunesResponse:
+      type: object
+      required:
+        - object
+        - data
+      properties:
+        object:
+          type: string
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.FineTune'
+    ModelDefinitions.ListModelsResponse:
+      type: object
+      required:
+        - object
+        - data
+      properties:
+        object:
+          type: string
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.Model'
     ModelDefinitions.MaxTokens:
       type: integer
       format: int64
       minimum: 0
+    ModelDefinitions.Model:
+      type: object
+      description: Describes an OpenAI model offering that can be used with the API.
+      required:
+        - id
+        - object
+        - created
+        - owned_by
+      properties:
+        id:
+          type: string
+          description: The model identifier, which can be referenced in the API endpoints.
+        object:
+          type: string
+          enum:
+            - model
+          description: The object type, which is always "model".
+        created:
+          type: integer
+          format: unixtime
+          description: The Unix timestamp (in seconds) when the model was created.
+        owned_by:
+          type: string
+          description: The organization that owns the model.
     ModelDefinitions.N:
       type: integer
       format: int64
       minimum: 1
       maximum: 128
+    ModelDefinitions.OpenAIFile:
+      type: object
+      description: The `File` object represents a document that has been uploaded to OpenAI.
+      required:
+        - id
+        - object
+        - bytes
+        - createdAt
+        - filename
+        - purpose
+        - status
+      properties:
+        id:
+          type: string
+          description: The file identifier, which can be referenced in the API endpoints.
+        object:
+          type: string
+          enum:
+            - file
+          description: The object type, which is always "file".
+        bytes:
+          type: integer
+          format: int64
+          description: The size of the file in bytes.
+        createdAt:
+          type: integer
+          format: unixtime
+          description: The Unix timestamp (in seconds) for when the file was created.
+        filename:
+          type: string
+          description: The name of the file.
+        purpose:
+          type: string
+          description: The intended purpose of the file. Currently, only "fine-tune" is supported.
+        status:
+          type: string
+          enum:
+            - uploaded
+            - processed
+            - pending
+            - error
+            - deleting
+            - deleted
+          description: |-
+            The current status of the file, which can be either `uploaded`, `processed`, `pending`,
+            `error`, `deleting` or `deleted`.
+        status_details:
+          type: string
+          nullable: true
+          description: |-
+            Additional details about the status of the file. If the file is in the `error` state, this will
+            include a message describing the error.
     ModelDefinitions.Penalty:
       type: number
       format: double
@@ -1394,6 +2574,10 @@ components:
         type: string
       minItems: 1
       maxItems: 4
+    ModelDefinitions.SuffixString:
+      type: string
+      minLength: 1
+      maxLength: 40
     ModelDefinitions.Temperature:
       type: number
       format: double
diff --git a/versions.tsp b/versions.tsp
index 6db6292b6..8c5f89f91 100644
--- a/versions.tsp
+++ b/versions.tsp
@@ -1,8 +1,8 @@
 import "@typespec/versioning";
 import "./openai.js";
 
-extern dec azure(target);
-extern dec azureVersion(target);
+extern dec azure(target: TypeSpec.Reflection.Model | TypeSpec.Reflection.ModelProperty | TypeSpec.Reflection.Operation);
+extern dec azureVersion(target: TypeSpec.Reflection.EnumMember);
 
 @Versioning.versioned(OpenAIFlavors)
 namespace ModelDefinitions {

From 9c2dd87922df1bc8bb38d9a925db75a8d55abde0 Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Mon, 6 Nov 2023 11:09:54 -0800
Subject: [PATCH 5/6] Add missing @path from deploymentId for chat completions

---
 completions/models.tsp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/completions/models.tsp b/completions/models.tsp
index b83014390..8da19fc97 100644
--- a/completions/models.tsp
+++ b/completions/models.tsp
@@ -148,6 +148,7 @@ scalar MaxTokens extends safeint;
 
 model CreateChatCompletionRequest {
   @azure
+  @TypeSpec.Http.path
   deploymentId: string;
 
   /**
@@ -213,6 +214,7 @@ model ChatCompletionFunctionParameters is Record<unknown>;
 
 model ChatCompletionRequestMessage {
   @azure
+  @TypeSpec.Http.path
   deploymentId: string;
 
   /** The role of the messages author. One of `system`, `user`, `assistant`, or `function`. */
@@ -333,7 +335,8 @@ model ChatCompletionResponseMessage {
 
 model CreateCompletionRequest {
   @azure
-  @TypeSpec.Http.path deploymentId: string;
+  @TypeSpec.Http.path
+  deploymentId: string;
 
   /**
    * ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to

From ab2c008882030386ca974cf180c260e922dc5916 Mon Sep 17 00:00:00 2001
From: "Johan Stenberg (MSFT)" <johan.stenberg@microsoft.com>
Date: Mon, 6 Nov 2023 17:09:49 -0800
Subject: [PATCH 6/6] Update azure openai yaml (regenerated) with latest source
 TSP

---
 .../openapi3/openapi.AzureOpenAI.yaml         | 296 +++++++++---------
 1 file changed, 149 insertions(+), 147 deletions(-)

diff --git a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
index 433dc6536..784381586 100644
--- a/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
+++ b/tsp-output/@typespec/openapi3/openapi.AzureOpenAI.yaml
@@ -97,153 +97,7 @@ paths:
         content:
           application/json:
             schema:
-              type: object
-              required:
-                - model
-                - messages
-              properties:
-                model:
-                  anyOf:
-                    - type: string
-                    - type: string
-                      enum:
-                        - gpt4
-                        - gpt-4-0314
-                        - gpt-4-0613
-                        - gpt-4-32k
-                        - gpt-4-32k-0314
-                        - gpt-4-32k-0613
-                        - gpt-3.5-turbo
-                        - gpt-3.5-turbo-16k
-                        - gpt-3.5-turbo-0301
-                        - gpt-3.5-turbo-0613
-                        - gpt-3.5-turbo-16k-0613
-                  description: |-
-                    ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
-                    table for details on which models work with the Chat API.
-                  x-oaiTypeLabel: string
-                messages:
-                  type: array
-                  items:
-                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
-                  description: |-
-                    A list of messages comprising the conversation so far.
-                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
-                  minItems: 1
-                functions:
-                  type: array
-                  items:
-                    $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
-                  description: A list of functions the model may generate JSON inputs for.
-                  minItems: 1
-                  maxItems: 128
-                function_call:
-                  anyOf:
-                    - type: string
-                      enum:
-                        - none
-                        - auto
-                    - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
-                  description: |-
-                    Controls how the model responds to function calls. `none` means the model does not call a
-                    function, and responds to the end-user. `auto` means the model can pick between an end-user or
-                    calling a function.  Specifying a particular function via `{\"name":\ \"my_function\"}` forces the
-                    model to call that function. `none` is the default when no functions are present. `auto` is the
-                    default if functions are present.
-                temperature:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Temperature'
-                  nullable: true
-                  description: |-
-                    What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
-                    more random, while lower values like 0.2 will make it more focused and deterministic.
-
-                    We generally recommend altering this or `top_p` but not both.
-                  default: 1
-                top_p:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.TopP'
-                  nullable: true
-                  description: |-
-                    An alternative to sampling with temperature, called nucleus sampling, where the model considers
-                    the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
-                    the top 10% probability mass are considered.
-
-                    We generally recommend altering this or `temperature` but not both.
-                  default: 1
-                n:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.N'
-                  nullable: true
-                  description: |-
-                    How many completions to generate for each prompt.
-                    **Note:** Because this parameter generates many completions, it can quickly consume your token
-                    quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
-                  default: 1
-                max_tokens:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
-                  nullable: true
-                  description: |-
-                    The maximum number of [tokens](/tokenizer) to generate in the completion.
-
-                    The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
-                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
-                    for counting tokens.
-                  default: 16
-                stop:
-                  allOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Stop'
-                  description: Up to 4 sequences where the API will stop generating further tokens.
-                  default: null
-                presence_penalty:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
-                  nullable: true
-                  description: |-
-                    Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
-                    in the text so far, increasing the model's likelihood to talk about new topics.
-
-                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
-                frequency_penalty:
-                  oneOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.Penalty'
-                  nullable: true
-                  description: |-
-                    Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
-                    frequency in the text so far, decreasing the model's likelihood to repeat the same line
-                    verbatim.
-
-                    [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
-                logit_bias:
-                  type: object
-                  description: |-
-                    Modify the likelihood of specified tokens appearing in the completion.
-                    Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
-                    associated bias value from -100 to 100. Mathematically, the bias is added to the logits
-                    generated by the model prior to sampling. The exact effect will vary per model, but values
-                    between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
-                    should result in a ban or exclusive selection of the relevant token.
-                  additionalProperties:
-                    type: integer
-                    format: int64
-                  nullable: true
-                  x-oaiTypeLabel: map
-                user:
-                  allOf:
-                    - $ref: '#/components/schemas/ModelDefinitions.User'
-                  description: |-
-                    A unique identifier representing your end-user, which can help OpenAI to monitor and detect
-                    abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
-                stream:
-                  type: boolean
-                  nullable: true
-                  description: |-
-                    If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
-                    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-                    as they become available, with the stream terminated by a `data: [DONE]` message.
-                    [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
-                  default: true
+              $ref: '#/components/schemas/ModelDefinitions.CreateChatCompletionRequest'
   /deployments/{deploymentId}/completions:
     post:
       tags:
@@ -1394,6 +1248,154 @@ components:
         error:
           $ref: '#/components/schemas/ModelDefinitions.ErrorBase'
       x-ms-azure-openai: true
+    ModelDefinitions.CreateChatCompletionRequest:
+      type: object
+      required:
+        - model
+        - messages
+      properties:
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - gpt4
+                - gpt-4-0314
+                - gpt-4-0613
+                - gpt-4-32k
+                - gpt-4-32k-0314
+                - gpt-4-32k-0613
+                - gpt-3.5-turbo
+                - gpt-3.5-turbo-16k
+                - gpt-3.5-turbo-0301
+                - gpt-3.5-turbo-0613
+                - gpt-3.5-turbo-16k-0613
+          description: |-
+            ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility)
+            table for details on which models work with the Chat API.
+          x-oaiTypeLabel: string
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.ChatCompletionRequestMessage'
+          description: |-
+            A list of messages comprising the conversation so far.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+          minItems: 1
+        functions:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctions'
+          description: A list of functions the model may generate JSON inputs for.
+          minItems: 1
+          maxItems: 128
+        function_call:
+          anyOf:
+            - type: string
+              enum:
+                - none
+                - auto
+            - $ref: '#/components/schemas/ModelDefinitions.ChatCompletionFunctionCallOption'
+          description: |-
+            Controls how the model responds to function calls. `none` means the model does not call a
+            function, and responds to the end-user. `auto` means the model can pick between an end-user or
+            calling a function.  Specifying a particular function via `{\"name":\ \"my_function\"}` forces the
+            model to call that function. `none` is the default when no functions are present. `auto` is the
+            default if functions are present.
+        temperature:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Temperature'
+          nullable: true
+          description: |-
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output
+            more random, while lower values like 0.2 will make it more focused and deterministic.
+
+            We generally recommend altering this or `top_p` but not both.
+          default: 1
+        top_p:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.TopP'
+          nullable: true
+          description: |-
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers
+            the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising
+            the top 10% probability mass are considered.
+
+            We generally recommend altering this or `temperature` but not both.
+          default: 1
+        n:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.N'
+          nullable: true
+          description: |-
+            How many completions to generate for each prompt.
+            **Note:** Because this parameter generates many completions, it can quickly consume your token
+            quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+          default: 1
+        max_tokens:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.MaxTokens'
+          nullable: true
+          description: |-
+            The maximum number of [tokens](/tokenizer) to generate in the completion.
+
+            The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb)
+            for counting tokens.
+          default: 16
+        stop:
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Stop'
+          description: Up to 4 sequences where the API will stop generating further tokens.
+          default: null
+        presence_penalty:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+          nullable: true
+          description: |-
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear
+            in the text so far, increasing the model's likelihood to talk about new topics.
+
+            [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+        frequency_penalty:
+          oneOf:
+            - $ref: '#/components/schemas/ModelDefinitions.Penalty'
+          nullable: true
+          description: |-
+            Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+            frequency in the text so far, decreasing the model's likelihood to repeat the same line
+            verbatim.
+
+            [See more information about frequency and presence penalties.](/docs/guides/gpt/parameter-details)
+        logit_bias:
+          type: object
+          description: |-
+            Modify the likelihood of specified tokens appearing in the completion.
+            Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an
+            associated bias value from -100 to 100. Mathematically, the bias is added to the logits
+            generated by the model prior to sampling. The exact effect will vary per model, but values
+            between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100
+            should result in a ban or exclusive selection of the relevant token.
+          additionalProperties:
+            type: integer
+            format: int64
+          nullable: true
+          x-oaiTypeLabel: map
+        user:
+          allOf:
+            - $ref: '#/components/schemas/ModelDefinitions.User'
+          description: |-
+            A unique identifier representing your end-user, which can help OpenAI to monitor and detect
+            abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
+        stream:
+          type: boolean
+          nullable: true
+          description: |-
+            If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only
+            [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+            as they become available, with the stream terminated by a `data: [DONE]` message.
+            [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb).
+          default: true
     ModelDefinitions.CreateChatCompletionResponse:
       type: object
       description: Represents a chat completion response returned by model, based on the provided input.