Leo-Rag-Flask-Server/server.py at main · Rexosphere/Leo-Rag-Flask-Server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Lightweight RAG Chatbot Flask API
Uses Gemini API for both embeddings and generation
"""

import os
import pickle
import numpy as np
from flask import Flask, request, jsonify
from dotenv import load_dotenv
import google.generativeai as genai

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

app = Flask(__name__)

# Global variables
embeddings_data = None


def get_embedding(text: str) -> list:
    """Get embedding using Gemini API."""
    result = genai.embed_content(
        model="models/text-embedding-004",
        content=text
    )
    return result['embedding']


def cosine_similarity(a, b):
    """Calculate cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def load_index():
    """Load pre-computed embeddings at startup."""
    global embeddings_data

    script_dir = os.path.dirname(os.path.abspath(__file__))
    index_path = os.path.join(script_dir, "index.pkl")

    print(f"Loading index from {index_path}...")
    with open(index_path, 'rb') as f:
        embeddings_data = pickle.load(f)
    print(f"Loaded {len(embeddings_data['texts'])} documents!")


def search(query: str, k: int = 5) -> list:
    """Search for similar documents."""
    if embeddings_data is None:
        return []

    query_embedding = get_embedding(query)

    similarities = []
    for i, emb in enumerate(embeddings_data['embeddings']):
        sim = cosine_similarity(query_embedding, emb)
        similarities.append((sim, embeddings_data['texts'][i]))

    similarities.sort(reverse=True, key=lambda x: x[0])
    return [text for _, text in similarities[:k]]


def get_answer(question: str, k: int = 5) -> str:
    """Generate answer using RAG."""
    docs = search(question, k=k)

    if not docs:
        return "No documents found in the index."

    context = "\n\n".join(docs)

    model = genai.GenerativeModel("gemini-2.5-flash")
    prompt = f"""Based on the following context about Lions Clubs International, answer the question.
"

Context:
{context}

Question: {question}

Answer:"""

    response = model.generate_content(prompt)
    return response.text.strip()


@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint."""
    return jsonify({
        "status": "healthy",
        "documents_loaded": len(embeddings_data['texts']) if embeddings_data else 0
    })


@app.route('/ask', methods=['POST'])
def ask():
    """Answer a question using RAG."""
    try:
        data = request.get_json()

        if not data or 'question' not in data:
            return jsonify({
                "success": False,
                "error": "Missing 'question' in request body"
            }), 400

        question = data['question']
        k = data.get('k', 5)

        answer = get_answer(question, k=k)

        return jsonify({
            "success": True,
            "question": question,
            "answer": answer
        })

    except Exception as e:
        return jsonify({
            "success": False,
            "error": str(e)
        }), 500


if __name__ == '__main__':
    load_index()
    app.run(host='0.0.0.0', port=5000, debug=False)