-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsave_embeddings.py
More file actions
29 lines (24 loc) · 961 Bytes
/
Copy pathsave_embeddings.py
File metadata and controls
29 lines (24 loc) · 961 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import json
import numpy as np
import openai
import os
openai.api_key = os.getenv("OPENAI_API_KEY")
CHUNKS_PATH = "data/chunks.json"
EMBEDDINGS_PATH = "data/embeddings.npy"
EMBEDDING_MODEL = "text-embedding-ada-002"
with open(CHUNKS_PATH, "r", encoding="utf-8") as f:
chunks = json.load(f)
def get_embedding(text):
resp = openai.Embedding.create(input=text, model=EMBEDDING_MODEL)
return np.array(resp['data'][0]['embedding'], dtype=np.float32)
embeddings = []
for i, chunk in enumerate(chunks):
print(f"Embedding {i+1}/{len(chunks)}")
try:
emb = get_embedding(chunk["text"])
embeddings.append(emb)
except Exception as e:
print(f"Ошибка для чанка {i}: {e}")
embeddings.append(np.zeros(1536, dtype=np.float32)) # или нужная размерность
np.save(EMBEDDINGS_PATH, np.stack(embeddings))
print("Готово! Эмбеддинги сохранены в", EMBEDDINGS_PATH)