Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,4 @@ backend/.env.production

# Local demo assets (not for deployment)
_local/
backend/.env.test
15 changes: 8 additions & 7 deletions SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cp .env.example .env

**Required variables:**
- `MONGODB_URI`: MongoDB connection string
- `GEMINI_API_KEY`: Get from https://makersuite.google.com/app/apikey
- `VERTEX_API_KEY`: Vertex AI Express API key
- `GITHUB_TOKEN`: Generate at https://github.com/settings/tokens (needs `repo` scope)

### 2.4 Run Backend
Expand Down Expand Up @@ -82,14 +82,14 @@ Set these secrets in your GitHub repository (Settings → Secrets → Actions):

| Secret | Description | How to Get |
|--------|-------------|------------|
| `GEMINI_API_KEY` | Gemini API key | https://makersuite.google.com/app/apikey |
| `VERTEX_API_KEY` | Vertex AI Express API key | Google Cloud Console |
| `GITHUB_TOKEN` | GitHub PAT | https://github.com/settings/tokens |
| `FLY_API_TOKEN` | Fly.io token | `flyctl auth token` |
| `VERCEL_TOKEN` | Vercel token | https://vercel.com/account/tokens |

Set via CLI:
```bash
gh secret set GEMINI_API_KEY
gh secret set VERTEX_API_KEY
gh secret set GITHUB_TOKEN
```

Expand Down Expand Up @@ -121,11 +121,12 @@ MONGODB_URI=mongodb+srv://<username>:<password>@<cluster>.mongodb.net/somm_db

## 6. API Keys

### Gemini API Key
### Vertex AI Express API Key

1. Go to https://makersuite.google.com/app/apikey
2. Create a new API key
3. Copy to `backend/.env`
1. Go to Google Cloud Console
2. Enable Vertex AI API
3. Create API key for Vertex AI Express
4. Copy to `backend/.env` as `VERTEX_API_KEY`

### GitHub Personal Access Token

Expand Down
8 changes: 1 addition & 7 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,8 @@ FRONTEND_URL=http://localhost:3000
# Or local MongoDB:
MONGODB_URI=mongodb://localhost:27017/somm_db

# LLM APIs
# Get Gemini API key from: https://makersuite.google.com/app/apikey
GEMINI_API_KEY=your_gemini_api_key_here

# Vertex AI Express (API key auth for premium/admin routing)
# LLM API (Vertex AI Express only)
VERTEX_API_KEY=your_vertex_express_api_key_here
GOOGLE_CLOUD_PROJECT=your_gcp_project_id
GOOGLE_CLOUD_LOCATION=asia-northeast3

# Vertex AI role-based routing allowlists (comma-separated)
# VERTEX_PREMIUM_USER_IDS=user_id_1,user_id_2
Expand Down
8 changes: 4 additions & 4 deletions backend/app/api/routes/api_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from app.api.deps import get_current_user, User
from app.database.repositories.api_key import APIKeyRepository
from app.services.encryption import EncryptionService
from app.services.key_validator import validate_gemini_key
from app.services.key_validator import validate_api_key

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/keys", tags=["API Keys"])
Expand Down Expand Up @@ -80,7 +80,7 @@ async def register_key(
Raises:
HTTPException: If the key is invalid.
"""
validation = await validate_gemini_key(request.api_key)
validation = await validate_api_key(request.api_key, request.provider)
if not validation.valid:
raise HTTPException(
status_code=400, detail=f"Invalid API key: {validation.error}"
Expand Down Expand Up @@ -166,7 +166,7 @@ async def validate_key(
Returns:
Validation response with status and available models.
"""
result = await validate_gemini_key(request.api_key)
result = await validate_api_key(request.api_key, request.provider)
return ValidateKeyResponse(
valid=result.valid,
error=result.error,
Expand Down Expand Up @@ -199,7 +199,7 @@ async def refresh_key(provider: str, user: User = Depends(get_current_user)):

enc = EncryptionService()
decrypted = enc.decrypt(doc["encrypted_key"])
validation = await validate_gemini_key(decrypted)
validation = await validate_api_key(decrypted, provider)
if not validation.valid:
raise HTTPException(
status_code=400, detail=f"Key no longer valid: {validation.error}"
Expand Down
8 changes: 1 addition & 7 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,8 @@ class Settings(BaseSettings):
MONGODB_URI: str = "mongodb://localhost:27017/somm_db"
MONGO_DB: str = "somm"

# LLM APIs
GEMINI_API_KEY: str = ""
OPENAI_API_KEY: str = ""

# Vertex AI Express (API key auth)
# LLM API (Vertex AI Express only)
VERTEX_API_KEY: str = ""
GOOGLE_CLOUD_PROJECT: str = ""
GOOGLE_CLOUD_LOCATION: str = "asia-northeast3"
VERTEX_PREMIUM_USER_IDS: str = ""
VERTEX_ADMIN_USER_IDS: str = ""
VERTEX_PREMIUM_EMAILS: str = ""
Expand Down
2 changes: 1 addition & 1 deletion backend/app/graph/nodes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ async def evaluate(

started_at = datetime.now(timezone.utc).isoformat()
configurable = (config or {}).get("configurable", {})
provider = configurable.get("provider", "gemini")
provider = configurable.get("provider", "vertex")
api_key = configurable.get("api_key")
model = configurable.get("model")
temperature = configurable.get("temperature")
Expand Down
2 changes: 1 addition & 1 deletion backend/app/graph/nodes/jeanpierre.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ async def evaluate(

started_at = datetime.now(timezone.utc).isoformat()
configurable = (config or {}).get("configurable", {})
provider = configurable.get("provider", "gemini")
provider = configurable.get("provider", "vertex")
api_key = configurable.get("api_key")
model = configurable.get("model")
temperature = configurable.get("temperature")
Expand Down
7 changes: 2 additions & 5 deletions backend/app/graph/nodes/rag_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def _get_genai_client():
if _genai_client is None:
from google import genai

# Always use Gemini API (not Vertex AI) - Vertex AI requires OAuth2, not API keys
api_key = settings.GEMINI_API_KEY or settings.VERTEX_API_KEY
_genai_client = genai.Client(api_key=api_key)
_genai_client = genai.Client(api_key=settings.VERTEX_API_KEY)
return _genai_client


Expand Down Expand Up @@ -107,8 +105,7 @@ async def rag_enrich(
repo_context = state.get("repo_context", {})
query = _create_query(state)

api_key = settings.VERTEX_API_KEY or settings.GEMINI_API_KEY
if not api_key:
if not settings.VERTEX_API_KEY:
return {
"rag_context": {
"query": query,
Expand Down
2 changes: 1 addition & 1 deletion backend/app/graph/nodes/tasting_notes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ async def evaluate(
) -> Dict[str, Any]:
started_at = datetime.now(timezone.utc).isoformat()
configurable = (config or {}).get("configurable", {})
provider = configurable.get("provider", "gemini")
provider = configurable.get("provider", "vertex")
api_key = configurable.get("api_key")
model = configurable.get("model")
temperature = configurable.get("temperature")
Expand Down
7 changes: 2 additions & 5 deletions backend/app/graph/nodes/web_search_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
def _get_genai_client():
from google import genai

# Always use Gemini API (not Vertex AI) - Vertex AI requires OAuth2, not API keys
api_key = settings.GEMINI_API_KEY or settings.VERTEX_API_KEY
return genai.Client(api_key=api_key)
return genai.Client(api_key=settings.VERTEX_API_KEY)


async def web_search_enrich(
Expand All @@ -26,8 +24,7 @@ async def web_search_enrich(
if existing := state.get("web_search_context"):
return {"web_search_context": existing}

api_key = settings.VERTEX_API_KEY or settings.GEMINI_API_KEY
if not api_key:
if not settings.VERTEX_API_KEY:
return {
"web_search_context": {
"query": "",
Expand Down
78 changes: 34 additions & 44 deletions backend/app/providers/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _resolve_thinking_level(model_name: str) -> Optional[str]:


PROVIDER_DEFAULTS = {
"gemini": "gemini-3-flash-preview",
"google": "gemini-3-flash-preview",
"vertex": "gemini-3-flash-preview",
}

Expand All @@ -82,20 +82,7 @@ def build_llm(
max_output_tokens: Optional[int],
enable_fallback: bool = False,
) -> BaseChatModel:
"""Build an LLM instance for the specified provider.

Args:
provider: Provider name (gemini, vertex)
api_key: User-provided API key (BYOK) or None for server-side key
model: Model name or None for provider default
temperature: Temperature setting or None for default (0.7)
max_output_tokens: Max output tokens or None for default (2048)
enable_fallback: If True, attach fallback to provider's default model

Returns:
LLM instance, optionally wrapped with fallback chain
"""
provider_key = (provider or "gemini").lower()
provider_key = (provider or "vertex").lower()
resolved_key, byok_error = resolve_byok(api_key, provider_key)

if byok_error:
Expand All @@ -106,37 +93,40 @@ def build_llm(
)
resolved_max_tokens = max_output_tokens or DEFAULT_MAX_OUTPUT_TOKENS

if provider_key == "gemini":
resolved_model = model or PROVIDER_DEFAULTS["gemini"]
gemini_kwargs: dict = {
"model": resolved_model,
"temperature": resolved_temperature,
"max_output_tokens": resolved_max_tokens,
"google_api_key": resolved_key or settings.GEMINI_API_KEY,
"timeout": DEFAULT_REQUEST_TIMEOUT,
}
thinking_level = _resolve_thinking_level(resolved_model)
if thinking_level:
gemini_kwargs["thinking_level"] = thinking_level
llm = ChatGoogleGenerativeAI(**gemini_kwargs)
elif provider_key == "vertex":
resolved_model = model or PROVIDER_DEFAULTS["vertex"]
if provider_key not in ("google", "vertex"):
raise ValueError(
f"Unsupported provider: {provider_key}. Use 'vertex' or 'google'."
)

resolved_model = model or PROVIDER_DEFAULTS[provider_key]

if resolved_key:
use_vertex = provider_key == "vertex"
final_key = resolved_key
else:
if not settings.VERTEX_API_KEY:
raise ValueError("VERTEX_API_KEY is required for Vertex AI Express")
vertex_kwargs: dict = {
"model": resolved_model,
"temperature": resolved_temperature,
"max_output_tokens": resolved_max_tokens,
"timeout": DEFAULT_REQUEST_TIMEOUT,
"api_key": settings.VERTEX_API_KEY,
"vertexai": True,
}
thinking_level = _resolve_thinking_level(resolved_model)
if thinking_level:
vertex_kwargs["thinking_level"] = thinking_level
llm = ChatGoogleGenerativeAI(**vertex_kwargs)
raise ValueError("VERTEX_API_KEY is required")
use_vertex = True
final_key = settings.VERTEX_API_KEY

llm_kwargs: dict = {
"model": resolved_model,
"temperature": resolved_temperature,
"max_output_tokens": resolved_max_tokens,
"timeout": DEFAULT_REQUEST_TIMEOUT,
}

if use_vertex:
llm_kwargs["api_key"] = final_key
llm_kwargs["vertexai"] = True
else:
raise ValueError(f"Unsupported provider: {provider_key}")
llm_kwargs["google_api_key"] = final_key

thinking_level = _resolve_thinking_level(resolved_model)
if thinking_level:
llm_kwargs["thinking_level"] = thinking_level

llm = ChatGoogleGenerativeAI(**llm_kwargs)

if enable_fallback and model and model != PROVIDER_DEFAULTS.get(provider_key):
fallback_llm = build_llm(
Expand Down
4 changes: 2 additions & 2 deletions backend/app/providers/llm_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ async def wait(self) -> None:
_semaphore_lock = asyncio.Lock()

PROVIDER_CONCURRENCY_LIMITS = {
"gemini": 3,
"google": 3,
"vertex": 3,
}

PROVIDER_RPM_LIMITS = {
"gemini": 10,
"google": 10,
"vertex": 10,
}

Expand Down
2 changes: 1 addition & 1 deletion backend/app/services/evaluation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def _create_graph_config(
return {
"configurable": {
"thread_id": str(uuid.uuid4()),
"provider": provider or "gemini",
"provider": provider or "vertex",
"api_key": resolved_key,
"model": model,
"temperature": temperature,
Expand Down
Loading