-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_migration.sql
More file actions
49 lines (45 loc) · 1.65 KB
/
sql_migration.sql
File metadata and controls
49 lines (45 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
ALTER TABLE auth.users ADD COLUMN IF NOT EXISTS dummy BOOLEAN; -- Just so something happens if you accidentally run it in auth context, though we don't.
-- Execute this on the 'public' schema
ALTER TABLE public.collections
ADD COLUMN IF NOT EXISTS readme_notebook_id UUID NULL REFERENCES public.notebooks(id) ON DELETE SET NULL;
-- Enable the pgvector extension to work with embedding vectors
CREATE EXTENSION IF NOT EXISTS vector
WITH SCHEMA public;
-- Create a table combining chunks and vectors
CREATE TABLE IF NOT EXISTS public.structure_embeddings (
id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
structure_id UUID REFERENCES public.structures(id) ON DELETE CASCADE,
content TEXT NOT NULL,
embedding VECTOR(1536)
);
-- Create an index to speed up similarity searches (using ivfflat for example)
CREATE INDEX IF NOT EXISTS structure_embeddings_embedding_idx ON public.structure_embeddings USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Function to execute cosine similarity search against user's uploaded structures
CREATE OR REPLACE FUNCTION match_structures(
query_embedding VECTOR(1536),
match_threshold FLOAT,
match_count INT,
p_user_id UUID
)
RETURNS TABLE (
structure_id UUID,
content TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
SELECT
em.structure_id,
em.content,
1 - (em.embedding <=> query_embedding) AS similarity
FROM public.structure_embeddings em
JOIN public.structures st ON st.id = em.structure_id
WHERE 1 - (em.embedding <=> query_embedding) > match_threshold
AND st.user_id = p_user_id
ORDER BY em.embedding <=> query_embedding
LIMIT match_count;
END;
$$;