diff --git a/openai_pgvector_helloworld/openai_pgvector_helloworld.ipynb b/openai_pgvector_helloworld/openai_pgvector_helloworld.ipynb index c6265d4..876a6bf 100644 --- a/openai_pgvector_helloworld/openai_pgvector_helloworld.ipynb +++ b/openai_pgvector_helloworld/openai_pgvector_helloworld.ipynb @@ -136,8 +136,14 @@ "# Helper function: calculate cost of embedding num_tokens\n", "# Assumes we're using the text-embedding-ada-002 model\n", "# See https://openai.com/pricing\n", + "costs = {\n", + " \"text-embedding-3-small\": 0.020/1_000_000,\n", + " \"text-embedding-3-large\": 0.130/1_000_000,\n", + " \"text-embedding-ada-002\": 0.100/1_000_000,\n", + "}\n", + " \n", "def get_embedding_cost(num_tokens):\n", - " return num_tokens/1000*0.0001\n", + " return num_tokens * costs['text-embedding-ada-002']\n", "\n", "# Helper function: calculate total cost of embedding all content in the dataframe\n", "def get_total_embeddings_cost():\n", @@ -151,11 +157,11 @@ "\n", "# Helper function: get embeddings for a text\n", "def get_embeddings(text):\n", - " response = openai.Embedding.create(\n", + " response = openai.embeddings.create(\n", " model=\"text-embedding-ada-002\",\n", " input = text.replace(\"\\n\",\" \")\n", " )\n", - " embedding = response['data'][0]['embedding']\n", + " embedding = response.data[0].embedding\n", " return embedding" ] }, @@ -265,7 +271,7 @@ "# Save the dataframe with embeddings as a CSV file\n", "df_new.to_csv('blog_data_and_embeddings.csv', index=False)\n", "# It may also be useful to save as a json file, but we won't use this in the tutorial\n", - "#df_new.to_json('blog_data_and_embeddings.json')" + "df_new.to_json('blog_data_and_embeddings.json')" ] }, { @@ -506,14 +512,14 @@ "# Helper function: get text completion from OpenAI API\n", "# Note max tokens is 4097\n", "# Note we're using the latest gpt-3.5-turbo-0613 model\n", - "def get_completion_from_messages(messages, model=\"gpt-3.5-turbo-0613\", temperature=0, max_tokens=1000):\n", - " response = openai.ChatCompletion.create(\n", + "def get_completion_from_messages(messages, model=\"gpt-4o-mini-2024-07-18\", temperature=0, max_tokens=1000):\n", + " response = openai.chat.completions.create(\n", " model=model,\n", " messages=messages,\n", " temperature=temperature, \n", " max_tokens=max_tokens, \n", " )\n", - " return response.choices[0].message[\"content\"]" + " return response.choices[0].message.content" ] }, { @@ -611,6 +617,15 @@ "print(input_2)\n", "print(response_2)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References\n", + "- https://docs.timescale.com/self-hosted/latest/install/installation-docker/#install-self-hosted-timescaledb-from-a-pre-built-container\n", + "- https://github.com/openai/openai-python/discussions/742 (OpenAI migration guide)" + ] } ], "metadata": { @@ -629,7 +644,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.11.6" } }, "nbformat": 4,