counting tokens

denniszielke · May 22, 2024 · be6ad5d · be6ad5d
1 parent 73d0334
commit be6ad5d
Show file tree

Hide file tree

Showing 2 changed files with 232 additions and 0 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -6,3 +6,4 @@ pydantic==2.7.1
 python-dotenv==1.0.1
 quadrant==1.0
 requests==2.32.0
+tiktoken==0.7.0
diff --git a/src-agents/phase4/notebook.ipynb b/src-agents/phase4/notebook.ipynb
@@ -0,0 +1,231 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Phase 4 - Efficiency\n",
+    "If not already done run this in the top level folder:\n",
+    "```\n",
+    "pip install -r requirements.txt\n",
+    "```\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found Azure OpenAI API Base Endpoint: https://cog-fkplarx5db7we.openai.azure.com/\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import requests\n",
+    "import os\n",
+    "import openai\n",
+    "import tiktoken\n",
+    "from enum import Enum\n",
+    "from pydantic import BaseModel\n",
+    "from openai import AzureOpenAI\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "# Load environment variables\n",
+    "if load_dotenv():\n",
+    "    print(\"Found Azure OpenAI API Base Endpoint: \" + os.getenv(\"AZURE_OPENAI_ENDPOINT\"))\n",
+    "else: \n",
+    "    print(\"Azure OpenAI API Base Endpoint not found. Have you configured the .env file?\")\n",
+    "    \n",
+    "API_KEY = os.getenv(\"AZURE_OPENAI_API_KEY\")\n",
+    "API_VERSION = os.getenv(\"OPENAI_API_VERSION\")\n",
+    "RESOURCE_ENDPOINT = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
+    "\n",
+    "\n",
+    "client = AzureOpenAI(\n",
+    "    azure_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
+    "    api_key = os.getenv(\"AZURE_OPENAI_API_KEY\"),\n",
+    "    api_version = os.getenv(\"AZURE_OPENAI_VERSION\")\n",
+    ")\n",
+    "deployment_name = os.getenv(\"AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME\")\n",
+    "model_name = os.getenv(\"AZURE_OPENAI_COMPLETION_MODEL\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is the object model for receiving questions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "class QuestionType(str, Enum):\n",
+    "    multiple_choice = \"multiple_choice\"\n",
+    "    true_false = \"true_false\"\n",
+    "    popular_choice = \"popular_choice\"\n",
+    "    estimation = \"estimation\"\n",
+    "\n",
+    "class Ask(BaseModel):\n",
+    "    question: str | None = None\n",
+    "    type: QuestionType\n",
+    "    correlationToken: str | None = None\n",
+    "\n",
+    "class Answer(BaseModel):\n",
+    "    answer: str\n",
+    "    correlationToken: str | None = None\n",
+    "    promptTokensUsed: int | None = None\n",
+    "    completionTokensUsed: int | None = None\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get the number of tokens\n",
+    "tiktoken is a library which allows you to get the number of tokens. \n",
+    "Ensure you pick the correct encoding for your model based on this list. https://github.com/openai/tiktoken/blob/c0ba74c238d18b4824c25f3c27fc8698055b9a76/tiktoken/model.py#L20\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of tokens in the string: 7\n"
+     ]
+    }
+   ],
+   "source": [
+    "def get_num_tokens_from_string(string: str, encoding_name: str='p50k_base') -> int:\n",
+    "    \"\"\"Returns the number of tokens in a text by a given encoding.\"\"\"\n",
+    "    encoding = tiktoken.get_encoding(encoding_name)\n",
+    "    return len(encoding.encode(string))\n",
+    "\n",
+    "number_of_tokens=get_num_tokens_from_string(\"Hello, Azure AI Adventure Day!\")\n",
+    "print(f\"Number of tokens in the string: {number_of_tokens}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "YOUR Mission: Adjust the function below and reuse it in the main.py file later to deploy to Azure and to update your service\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def ask_question(ask: Ask):\n",
+    "    \"\"\"\n",
+    "    Ask a question\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Send a completion call to generate an answer\n",
+    "    print('Sending a request to openai')\n",
+    "    start_phrase = ask.question\n",
+    "    messages=  [{\"role\" : \"assistant\", \"content\" : start_phrase}]\n",
+    "    number_of_tokens_in= get_num_tokens_from_string(messages[0]['content']);\n",
+    "    \n",
+    "    response = client.chat.completions.create(\n",
+    "        model = deployment_name,\n",
+    "        messages =messages,\n",
+    "    )\n",
+    "\n",
+    "    number_of_tokens_out= get_num_tokens_from_string(response.choices[0].message.content);\n",
+    "\n",
+    "    print(f\"Number of tokens in the input: {number_of_tokens_in}\")\n",
+    "    print(f\"Number of tokens in the output: {number_of_tokens_out}\")\n",
+    "    print('Total Tokens Used: ' + str(number_of_tokens_in + number_of_tokens_out))\n",
+    "\n",
+    "    answer = Answer(answer=response.choices[0].message.content)\n",
+    "    answer.correlationToken = ask.correlationToken\n",
+    "    answer.promptTokensUsed = response.usage.prompt_tokens\n",
+    "    answer.completionTokensUsed = response.usage.completion_tokens\n",
+    "\n",
+    "    return answer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use this snippet to try your method with several questions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sending a request to openai\n",
+      "Number of tokens in the input: 17\n",
+      "Number of tokens in the output: 3\n",
+      "Total Tokens Used: 20\n",
+      "Answer: answer='c. Green' correlationToken=None promptTokensUsed=24 completionTokensUsed=3\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "ask = Ask(question=\"Which of the following is a color? a. Tree b. Flower c. Green\", type=QuestionType.multiple_choice)\n",
+    "answer = await ask_question(ask)\n",
+    "print('Answer:', answer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make sure you transfer your code changes into main.py (or additional files). Then redeploy your container using this command.\n",
+    "```\n",
+    "bash ./azd-hooks/deploy.sh phase1 $AZURE_ENV_NAME\n",
+    "```\n",
+    "Make sure to provide the URL of your endpoint in the team portal!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}