From 4865da655f354d9481db522f36545292e3c0d6c4 Mon Sep 17 00:00:00 2001 From: ChrisAdamsdevelopment Date: Sun, 24 May 2026 06:56:28 -0700 Subject: [PATCH] feat: add spectracleanse-engineering Claude plugin --- .../.claude-plugin/plugin.json | 60 ++++++ spectracleanse-engineering/.mcp.json.example | 58 +++++ spectracleanse-engineering/CHANGELOG.md | 36 ++++ spectracleanse-engineering/README.md | 105 +++++++++ .../spectracleanse-incident-commander.md | 93 ++++++++ .../agents/spectracleanse-repo-researcher.md | 61 ++++++ .../docs/env-and-secrets-reference.md | 114 ++++++++++ .../docs/mcp-roadmap.md | 193 +++++++++++++++++ .../docs/plugin-validation.md | 201 ++++++++++++++++++ .../docs/product-context.md | 169 +++++++++++++++ .../docs/render-deploy-checklist.md | 164 ++++++++++++++ ...orted-formats-and-processing-boundaries.md | 146 +++++++++++++ .../spectracleanse-architecture/SKILL.md | 119 +++++++++++ .../spectracleanse-auth-billing/SKILL.md | 155 ++++++++++++++ .../spectracleanse-code-review/SKILL.md | 119 +++++++++++ .../spectracleanse-deploy-readiness/SKILL.md | 169 +++++++++++++++ .../spectracleanse-documentation/SKILL.md | 159 ++++++++++++++ .../SKILL.md | 111 ++++++++++ .../spectracleanse-incident-response/SKILL.md | 176 +++++++++++++++ .../SKILL.md | 192 +++++++++++++++++ .../spectracleanse-testing-strategy/SKILL.md | 177 +++++++++++++++ 21 files changed, 2777 insertions(+) create mode 100644 spectracleanse-engineering/.claude-plugin/plugin.json create mode 100644 spectracleanse-engineering/.mcp.json.example create mode 100644 spectracleanse-engineering/CHANGELOG.md create mode 100644 spectracleanse-engineering/README.md create mode 100644 spectracleanse-engineering/agents/spectracleanse-incident-commander.md create mode 100644 spectracleanse-engineering/agents/spectracleanse-repo-researcher.md create mode 100644 spectracleanse-engineering/docs/env-and-secrets-reference.md create mode 100644 spectracleanse-engineering/docs/mcp-roadmap.md create mode 100644 spectracleanse-engineering/docs/plugin-validation.md create mode 100644 spectracleanse-engineering/docs/product-context.md create mode 100644 spectracleanse-engineering/docs/render-deploy-checklist.md create mode 100644 spectracleanse-engineering/docs/supported-formats-and-processing-boundaries.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-architecture/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-auth-billing/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-code-review/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-deploy-readiness/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-documentation/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-founder-operating-review/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-incident-response/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-processing-pipeline/SKILL.md create mode 100644 spectracleanse-engineering/skills/spectracleanse-testing-strategy/SKILL.md diff --git a/spectracleanse-engineering/.claude-plugin/plugin.json b/spectracleanse-engineering/.claude-plugin/plugin.json new file mode 100644 index 0000000..ea62839 --- /dev/null +++ b/spectracleanse-engineering/.claude-plugin/plugin.json @@ -0,0 +1,60 @@ +{ + "name": "spectracleanse-engineering", + "description": "SpectraCleanse-specific engineering plugin. Covers code review, Render deploy readiness, auth/billing debugging, media-processing pipeline safety, incident response, technical documentation, testing strategy, and founder operating reviews. All skills are calibrated to the SpectraCleanse stack: React + Vite frontend, Node.js/Express backend, SQLite (better-sqlite3), ExifTool (exiftool-vendored), Google Gemini (gemini-2.5-flash), Stripe checkout, and Render/Hyperlift deployment.", + "skills": [ + { + "name": "spectracleanse-code-review", + "path": "skills/spectracleanse-code-review/SKILL.md" + }, + { + "name": "spectracleanse-architecture", + "path": "skills/spectracleanse-architecture/SKILL.md" + }, + { + "name": "spectracleanse-deploy-readiness", + "path": "skills/spectracleanse-deploy-readiness/SKILL.md" + }, + { + "name": "spectracleanse-processing-pipeline", + "path": "skills/spectracleanse-processing-pipeline/SKILL.md" + }, + { + "name": "spectracleanse-auth-billing", + "path": "skills/spectracleanse-auth-billing/SKILL.md" + }, + { + "name": "spectracleanse-incident-response", + "path": "skills/spectracleanse-incident-response/SKILL.md" + }, + { + "name": "spectracleanse-documentation", + "path": "skills/spectracleanse-documentation/SKILL.md" + }, + { + "name": "spectracleanse-testing-strategy", + "path": "skills/spectracleanse-testing-strategy/SKILL.md" + }, + { + "name": "spectracleanse-founder-operating-review", + "path": "skills/spectracleanse-founder-operating-review/SKILL.md" + } + ], + "agents": [ + { + "name": "spectracleanse-repo-researcher", + "path": "agents/spectracleanse-repo-researcher.md" + }, + { + "name": "spectracleanse-incident-commander", + "path": "agents/spectracleanse-incident-commander.md" + } + ], + "docs": [ + "docs/product-context.md", + "docs/render-deploy-checklist.md", + "docs/mcp-roadmap.md", + "docs/supported-formats-and-processing-boundaries.md", + "docs/env-and-secrets-reference.md", + "docs/plugin-validation.md" + ] +} diff --git a/spectracleanse-engineering/.mcp.json.example b/spectracleanse-engineering/.mcp.json.example new file mode 100644 index 0000000..5effaac --- /dev/null +++ b/spectracleanse-engineering/.mcp.json.example @@ -0,0 +1,58 @@ +{ + "_comment": "SpectraCleanse Engineering Plugin – MCP configuration example. Copy to .mcp.json and fill in real values. NEVER commit .mcp.json to version control.", + + "mcpServers": { + + "github": { + "_status": "planned", + "_description": "GitHub MCP for repo context, PRs, issues, and commit history. Install from https://github.com/github/github-mcp-server", + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-github"], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "REPLACE_WITH_YOUR_GITHUB_PAT" + } + }, + + "sentry": { + "_status": "planned – configure if/when Sentry is added to SpectraCleanse", + "_description": "Sentry MCP for production error context during incident response.", + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-sentry"], + "env": { + "SENTRY_AUTH_TOKEN": "REPLACE_WITH_YOUR_SENTRY_TOKEN", + "SENTRY_ORG": "REPLACE_WITH_YOUR_ORG_SLUG", + "SENTRY_PROJECT": "spectracleanse" + } + }, + + "playwright": { + "_status": "planned – for UI smoke test automation", + "_description": "Playwright MCP for browser-based smoke tests: upload flow, auth, checkout modal, download.", + "command": "npx", + "args": ["-y", "@playwright/mcp"], + "env": { + "PLAYWRIGHT_BASE_URL": "https://spectracleanse.com" + } + }, + + "spectracleanse-admin": { + "_status": "planned – does not exist yet; see docs/mcp-roadmap.md for spec", + "_description": "Future SpectraCleanse Admin MCP. Exposes deploy health, env validation, smoke checks, usage stats, and release notes generation. To be built as a remote MCP server backed by the Render deployment.", + "url": "REPLACE_WITH_SPECTRACLEANSE_ADMIN_MCP_URL", + "headers": { + "Authorization": "Bearer REPLACE_WITH_ADMIN_MCP_SECRET" + }, + "_tools": [ + "get_deploy_health", + "validate_env", + "run_smoke_checks", + "list_recent_processing_failures", + "get_checkout_failures", + "get_usage_and_plan_stats", + "list_supported_formats", + "draft_release_notes_from_commits" + ] + } + + } +} diff --git a/spectracleanse-engineering/CHANGELOG.md b/spectracleanse-engineering/CHANGELOG.md new file mode 100644 index 0000000..8ea50f3 --- /dev/null +++ b/spectracleanse-engineering/CHANGELOG.md @@ -0,0 +1,36 @@ +# Changelog – spectracleanse-engineering plugin + +All changes to this plugin are documented here. + +## [Unreleased] + +### Added +- Created SpectraCleanse-specific engineering plugin scaffold (`spectracleanse-engineering/`) +- Plugin manifest (`.claude-plugin/plugin.json`) with 9 skills, 2 agents, 6 docs registered +- `spectracleanse-code-review` skill: upload/MIME/ExifTool/Gemini/auth/CORS/Stripe-aware code review +- `spectracleanse-architecture` skill: Render-constrained architecture decision framework +- `spectracleanse-deploy-readiness` skill: Render/Hyperlift pre-deploy checklist with env gate and smoke tests +- `spectracleanse-processing-pipeline` skill: format matrix, trust boundaries, ExifTool and browser-side cleanse review +- `spectracleanse-auth-billing` skill: JWT, /api/me, 402 upgrade, Stripe checkout, plan-desync debugging +- `spectracleanse-incident-response` skill: incident classes mapped to SpectraCleanse failure modes +- `spectracleanse-documentation` skill: accurate doc generation with current-vs-planned distinction +- `spectracleanse-testing-strategy` skill: test surface audit starting from current zero-test state +- `spectracleanse-founder-operating-review` skill: solo-founder shipping review replacing generic standups +- `spectracleanse-repo-researcher` agent: read-only repo exploration with citation discipline +- `spectracleanse-incident-commander` agent: production triage with hypothesis ranking +- `docs/product-context.md`: full stack reference with risk model and known assumptions +- `docs/render-deploy-checklist.md`: Render/Hyperlift-specific deploy checklist +- `docs/mcp-roadmap.md`: 3-phase MCP integration plan with Admin MCP tool spec +- `docs/supported-formats-and-processing-boundaries.md`: format matrix with verification instructions +- `docs/env-and-secrets-reference.md`: all env vars with types, roles, and Render notes +- `docs/plugin-validation.md`: local install and manual validation checklist +- `.mcp.json.example`: safe placeholder MCP config (no real secrets) +- `README.md`: plugin overview, stack reference, install instructions +- Documented VITE_API_URL vs VITE_BACKEND_URL discrepancy between app.tsx and .env.example +- Documented Node 18 in ci.yml vs Node 20.20.2 in .nvmrc — noted as known conflict + +### Notes +- Initial version created from live repo inspection (May 2026) +- All stack facts verified from: `package.json`, `server.js`, `server/cleansePolicy.js`, + `server/processor.js`, `app.tsx`, `.env.example`, `.nvmrc`, `.github/workflows/ci.yml`, + `docs/manual-qa-checklist.md`, `PIPELINE.md`, `deploy.md` diff --git a/spectracleanse-engineering/README.md b/spectracleanse-engineering/README.md new file mode 100644 index 0000000..4145104 --- /dev/null +++ b/spectracleanse-engineering/README.md @@ -0,0 +1,105 @@ +# SpectraCleanse Engineering Plugin + +A Claude plugin built specifically for engineering work on [SpectraCleanse](https://spectracleanse.com) — the AI-provenance-marker cleansing and SEO metadata injection tool for audio and video files. + +This plugin is **not** a generic engineering assistant. Every skill, checklist, and doc in here is calibrated to the actual SpectraCleanse repo: its stack, endpoints, deployment model, risk surfaces, and founder workflow. + +--- + +## What this plugin helps with + +| Skill | When to use it | +|---|---| +| `spectracleanse-code-review` | Reviewing changes to server.js, app.tsx, cleansePolicy.js, processor.js | +| `spectracleanse-architecture` | Making architectural decisions: storage, auth upgrades, queue, API split | +| `spectracleanse-deploy-readiness` | Pre-deploy gate checks for Render/Hyperlift releases | +| `spectracleanse-processing-pipeline` | Reviewing or designing file-processing and ExifTool behavior | +| `spectracleanse-auth-billing` | Debugging JWT, /api/me, 402 upgrade flow, Stripe checkout | +| `spectracleanse-incident-response` | Production triage: broken deploy, CORS failure, Gemini outage | +| `spectracleanse-documentation` | Writing accurate README, env, API, or user-facing docs | +| `spectracleanse-testing-strategy` | Designing smoke tests, CI gates, and integration test fixtures | +| `spectracleanse-founder-operating-review` | Solo-founder shipping review: what shipped, risks, next move | + +--- + +## What this plugin does NOT do + +- It does not auto-deploy to Render or Hyperlift. +- It does not store or handle real API keys, secrets, or webhook secrets. +- It does not claim format support without verifying from `server/cleansePolicy.js`. +- It does not execute live Stripe or Gemini calls. +- It does not replace the manual QA checklist in `docs/manual-qa-checklist.md`. + +--- + +## Stack reference (confirmed from repo) + +- **Frontend**: React 18 + Vite 4, TypeScript 5, Tailwind CSS 3, `lucide-react` +- **Backend**: Node.js 20.20.2, Express 4, `server.js` as entrypoint +- **Database**: SQLite via `better-sqlite3` 9, WAL mode, tables: `users`, `jobs` +- **Media processing**: `exiftool-vendored` 28, `music-metadata` 11, `browser-id3-writer` (browser-side MP3) +- **AI**: Google Gemini `gemini-2.5-flash` via REST (`/api/generate-seo`) +- **Billing**: Stripe 16, subscription checkout, webhook at `/api/stripe-webhook` +- **Auth**: email/password + bcrypt, JWT 7-day expiry, Bearer token +- **Frontend env var**: `VITE_API_URL` (used in `app.tsx`) — note: `.env.example` lists `VITE_BACKEND_URL` but the frontend reads `VITE_API_URL` +- **Deployment**: Render / Spaceship Hyperlift (both documented); Docker support present + +--- + +## Installation / local testing + +See `docs/plugin-validation.md` for full instructions. + +Quick start: +```bash +# From repo root, point Claude at this plugin directory +claude --plugin-dir ./spectracleanse-engineering +``` + +Or reference it in your `.claude/settings.json`: +```json +{ + "pluginDirs": ["./spectracleanse-engineering"] +} +``` + +--- + +## Agents + +- **spectracleanse-repo-researcher** — read-only repo exploration; finds actual files, endpoints, env vars before any claim is made +- **spectracleanse-incident-commander** — production triage; ranks hypotheses, separates facts from guesses, focuses on user impact first + +--- + +## MCP roadmap + +See `docs/mcp-roadmap.md`. Short version: +1. Remote SpectraCleanse Admin MCP (deploy health, env validation, smoke checks, usage stats) +2. GitHub MCP (issues, PRs, commit context) +3. Sentry MCP (if/when error monitoring is configured) +4. Playwright MCP (UI smoke tests) + +--- + +## Safety / security notes + +- No real secrets are stored in this plugin. `.mcp.json.example` uses placeholders only. +- Never commit a real `.mcp.json` (containing live tokens) to version control. +- All documented endpoints and env vars are verified from the repo. If the repo changes, update this plugin. +- Skills marked `conservative` will not suggest auto-running risky operations. + +--- + +## Keeping this plugin current + +When you make significant changes to SpectraCleanse, update: + +| Change | Update | +|---|---| +| New API endpoint | `docs/product-context.md`, `spectracleanse-code-review/SKILL.md` | +| New env var | `docs/env-and-secrets-reference.md`, `spectracleanse-deploy-readiness/SKILL.md` | +| Format support change | `docs/supported-formats-and-processing-boundaries.md`, `spectracleanse-processing-pipeline/SKILL.md` | +| New plan tier | `spectracleanse-auth-billing/SKILL.md`, `docs/product-context.md` | +| New deployment platform | `docs/render-deploy-checklist.md`, `spectracleanse-deploy-readiness/SKILL.md` | +| Gemini model change | `spectracleanse-processing-pipeline/SKILL.md`, `docs/product-context.md` | diff --git a/spectracleanse-engineering/agents/spectracleanse-incident-commander.md b/spectracleanse-engineering/agents/spectracleanse-incident-commander.md new file mode 100644 index 0000000..014719c --- /dev/null +++ b/spectracleanse-engineering/agents/spectracleanse-incident-commander.md @@ -0,0 +1,93 @@ +# spectracleanse-incident-commander + +## Role +Production triage agent for SpectraCleanse incidents. Ranks hypotheses, separates confirmed facts from guesses, focuses on user impact and containment first, then root cause. Does not pretend to have checked a live service unless a tool confirmed it. + +## Behavior rules +- Start with user impact, not root cause. Answer "who is affected and what can't they do?" before "why is this happening?" +- Rank hypotheses by likelihood, not by familiarity. The most familiar-sounding cause is not always the most likely. +- Clearly distinguish: `[CONFIRMED]` (directly observed via tool, log, or response) vs. `[HYPOTHESIS]` (inferred, not yet verified). +- Propose containment actions before investigation deep-dives. Rollback beats debugging. +- Never say "the DB is fine" or "Stripe is configured" without evidence from an actual check. +- If no live tooling is connected, say so and ask for log output, curl responses, or Render/Hyperlift dashboard screenshots. +- After an incident is resolved, produce a prevention item — one checklist or monitoring addition that would catch this earlier next time. + +## Incident severity model + +| Severity | Criteria | Time to respond | +|---|---|---| +| P0 | All users blocked (site down, auth broken, no uploads possible) | Immediate | +| P1 | Core feature down for some users (processing failures, Stripe checkout broken) | Within 1 hour | +| P2 | Degraded experience (Gemini SEO unavailable, slow processing) | Within 4 hours | +| P3 | Minor issue (incorrect error message, cosmetic bug) | Next working session | + +## Fast triage checklist + +Before forming any hypothesis, run: + +1. `curl -sf https://api.spectracleanse.com/api/health` + - `{"status":"ok"}` → backend running; issue is route-specific or env-specific + - Timeout / 502 / 504 → backend not running; check Render/Hyperlift deploy status + - `{"error":"API route not found"}` or HTML → something is wrong with the routing setup + +2. Check Render/Hyperlift deployment logs for FATAL lines: + - `FATAL: missing required environment variable: JWT_SECRET` → server exited at startup; all requests fail + - `FATAL: Stripe is not fully configured in production.` → Stripe env vars missing + - `FATAL: set FRONTEND_URL or ALLOWED_ORIGINS for production CORS configuration.` → CORS config missing; server exited + +3. Check browser console / network tab: + - CORS error → `FRONTEND_URL` or `ALLOWED_ORIGINS` misconfiguration + - 401 on all requests → JWT_SECRET rotated or requireAuth regression + - Requests going to `undefined` or `localhost:3001` in production → `VITE_API_URL` missing from build env + +## Containment actions by class + +| Class | Containment | +|---|---| +| Broken deploy | Rollback to previous deploy SHA in Render/Hyperlift | +| JWT_SECRET rotated | All users must re-login; cannot revert without reverting the secret | +| CORS misconfiguration | Correct `FRONTEND_URL` env var, restart/redeploy service | +| Stripe env missing | Add missing env vars, redeploy (server exits if Stripe vars missing in prod) | +| Gemini outage | Communicate that AI metadata generation is temporarily unavailable; processing still works | +| DB on ephemeral disk | If data was lost: restore from backup; add persistent disk; critical P0 | +| ExifTool failure spike | Check if a new file format is being uploaded that ExifTool cannot process | +| VITE_API_URL missing | Trigger a rebuild with `VITE_API_URL` set in build environment | + +## Output format + +``` +## Incident title +[Short, specific: "SpectraCleanse: [class] – [date/time if known]"] + +## Severity +[P0 / P1 / P2 / P3 and reason] + +## User impact +[CONFIRMED or HYPOTHESIS: who is affected, what they cannot do] + +## Timeline +[Time first noticed, last known-good state, events since] + +## Known facts [CONFIRMED] +[Only things directly observed: API response, log line, env var value, Stripe event] + +## Hypotheses [UNCONFIRMED — ranked by likelihood] +[1. Most likely cause with reasoning + 2. Next most likely + ...] + +## Immediate containment +[What to do right now — before root cause is confirmed] + +## Investigation steps +[Ordered: what to check next, with expected output for each step] + +## Fix +[Specific code or config change, once root cause is confirmed] + +## Verification +[How to confirm the fix worked] + +## Prevention items +[One or two additions to the deploy checklist, monitoring, or test suite] +``` diff --git a/spectracleanse-engineering/agents/spectracleanse-repo-researcher.md b/spectracleanse-engineering/agents/spectracleanse-repo-researcher.md new file mode 100644 index 0000000..3fb680a --- /dev/null +++ b/spectracleanse-engineering/agents/spectracleanse-repo-researcher.md @@ -0,0 +1,61 @@ +# spectracleanse-repo-researcher + +## Role +Read-only SpectraCleanse repo exploration agent. Finds actual files, confirms real endpoint names, env var names, supported formats, scripts, and code behavior before any claim is made. Does not edit files unless explicitly instructed. + +## Behavior rules +- Read before claiming. Never assert that a file exists, an endpoint exists, or a format is supported without reading the relevant source file first. +- Cite sources as `filename:line` or `filename:function` wherever possible. +- If a file does not exist, say so explicitly rather than guessing its contents. +- Return a structured evidence summary, not a narrative. +- Prefer `cat`, `grep`, and `find` over broad assumptions. +- If two sources conflict (e.g., `.env.example` vs. `app.tsx` on env var names), report the conflict and cite both sources. +- Do not make architecture recommendations — that is the `spectracleanse-architecture` skill's job. + +## Key files to know +- `server.js` — backend entrypoint, 637 lines. All routes, middleware, CORS, auth, Stripe, Gemini, upload handling. +- `server/cleansePolicy.js` — authoritative format support. `CLEANSE_POLICY.server.supportedExtensions` = `['.mp4', '.m4a']`. `CLEANSE_POLICY.quick.supportedExtensions` = `['.mp3']`. +- `server/processor.js` — ExifTool orchestration. `processMediaFile()` is the main entry point. +- `server/metadataRules.js` — `MARKER_RULES`, `isBenign()`, `isAllowedInjected()`, `detectMarkers()`. +- `server/cleanup.js` — file deletion management. +- `server/downloadTokens.js` — one-time download token management for batch outputs. +- `app.tsx` — entire React frontend. Single file. API base URL: `import.meta.env.VITE_API_URL`. +- `package.json` — dependencies, scripts, Node engine range (`20.x`). No test runner. +- `.env.example` — env var documentation. Note: lists `VITE_BACKEND_URL` but `app.tsx` reads `VITE_API_URL` — confirmed conflict. +- `.nvmrc` / `.node-version` — both pin `20.20.2`. +- `.github/workflows/ci.yml` — smoke test on Node 18. Confirmed mismatch with production pin. +- `docs/manual-qa-checklist.md` — manual QA process. +- `deploy.md` — Spaceship Hyperlift deployment guide. +- `PIPELINE.md` — GitHub Actions CI/CD documentation. + +## Known facts (pre-loaded from repo inspection) +- Backend API port: `PORT` env var, defaults to `3001` +- Confirmed API endpoints: `/api/health` (GET), `/api/register` (POST), `/api/login` (POST), `/api/me` (GET, auth), `/api/create-checkout-session` (POST, auth), `/api/stripe-webhook` (POST), `/api/process` (POST, auth, multipart), `/api/process-batch` (POST, auth, multipart), `/api/generate-seo` (POST, auth), `/api/download/:token` (GET, auth) +- Free tier limit: `FREE_MONTHLY_LIMIT = 3` (server.js) +- Plans: `free`, `creator`, `studio` +- JWT expiry: `JWT_EXPIRES = '7d'` +- Max upload size: `MAX_FILE_SIZE = 500 * 1024 * 1024` (500 MB) +- Max batch files: 20 +- Max batch total: 2 GB +- Gemini model: `gemini-2.5-flash` (REST API, not SDK) +- DB tables: `users`, `jobs` +- ExifTool package: `exiftool-vendored` v28.3.1 + +## Output format for research tasks + +``` +## Research question +[What was asked] + +## Files inspected +[List of files read, with line ranges if relevant] + +## Evidence found +[Specific facts with citations: filename:line] + +## Conflicts or uncertainties +[Any place where two sources disagree, or where a claim could not be verified] + +## Recommended follow-up +[Any additional file or line that should be read to fully answer the question] +``` diff --git a/spectracleanse-engineering/docs/env-and-secrets-reference.md b/spectracleanse-engineering/docs/env-and-secrets-reference.md new file mode 100644 index 0000000..9b2fdae --- /dev/null +++ b/spectracleanse-engineering/docs/env-and-secrets-reference.md @@ -0,0 +1,114 @@ +# SpectraCleanse – Environment Variables and Secrets Reference + +All variables confirmed from `server.js` and `app.tsx` as of May 2026. +Do not store real values in this file. Use Render/Hyperlift dashboard secrets management. + +--- + +## Backend variables (Node.js runtime — `server.js`) + +### Core server + +| Variable | Type | Required | Default (non-prod) | Description | +|---|---|---|---|---| +| `PORT` | number | No | `3001` | Port the Express server listens on. Render/Hyperlift typically assigns this. | +| `NODE_ENV` | string | Yes (prod) | — | Set to `production` in all production deployments. Controls dev-friendly fallbacks. | + +### Auth + +| Variable | Type | Required | Default (non-prod) | Description | +|---|---|---|---|---| +| `JWT_SECRET` | string | **Yes — server exits if missing in production** | `dev_jwt_secret_change_me` (dev only) | Signs and verifies JWT tokens. Must be a long random string (≥32 chars). Generate: `node -e "console.log(require('crypto').randomBytes(64).toString('hex'))"`. Never reuse across environments. | + +⚠️ **Rotating `JWT_SECRET`** invalidates all existing tokens — all logged-in users must re-login. Only rotate deliberately, never accidentally via deploy. + +### Database + +| Variable | Type | Required | Default (non-prod) | Description | +|---|---|---|---|---| +| `DB_PATH` | string | Yes | `spectra.db` (working dir) | Absolute path to the SQLite database file. In production on Render, must point to a persistent disk mount (e.g. `/data/spectra.db`). If missing or pointing to ephemeral storage, all user data is lost on redeploy. | + +⚠️ **Render-specific**: Render web services have ephemeral local storage. You must add a persistent disk in the Render dashboard and set `DB_PATH` to a path within that disk's mount point. Hyperlift uses similar persistent volume configuration (`/data` per `deploy.md`). + +### CORS and origins + +| Variable | Type | Required | Default (non-prod) | Description | +|---|---|---|---|---| +| `FRONTEND_URL` | string | **Yes in production** | `''` | Full URL of the frontend (e.g. `https://spectracleanse.com`). Used to configure CORS allowed origins AND as the base URL for Stripe checkout `success_url`/`cancel_url`. No trailing slash. | +| `ALLOWED_ORIGINS` | string | No | `''` | Comma-separated list of additional allowed CORS origins. Useful if the frontend and backend are on different subdomains. Combined with `FRONTEND_URL`. | + +⚠️ If both `FRONTEND_URL` and `ALLOWED_ORIGINS` are empty in production, the server calls `process.exit(1)` at startup — the deploy will silently fail. + +### Stripe + +| Variable | Type | Required | Default (non-prod) | Description | +|---|---|---|---|---| +| `STRIPE_SECRET_KEY` | string | **Yes in production** | — | Stripe secret key. Starts with `sk_live_` in production, `sk_test_` in test mode. Found in Stripe Dashboard → Developers → API keys. | +| `STRIPE_WEBHOOK_SECRET` | string | **Yes in production** | — | Stripe webhook signing secret. Starts with `whsec_`. Found in Stripe Dashboard → Developers → Webhooks → your endpoint → Signing secret. Must match the webhook URL exactly. | +| `STRIPE_CREATOR_PRICE_ID` | string | **Yes in production** | — | Stripe Price ID for the Creator plan ($9.99/mo). Starts with `price_`. Found in Stripe Dashboard → Products. | +| `STRIPE_STUDIO_PRICE_ID` | string | **Yes in production** | — | Stripe Price ID for the Studio plan ($29.99/mo). Starts with `price_`. Must be different from the Creator price ID. | +| `ENABLE_MOCK_CHECKOUT` | boolean | No | `false` | If `true`, `/api/create-checkout-session` returns a mock redirect without calling Stripe. Useful for local development without Stripe credentials. **Must never be `true` in production.** | + +⚠️ If any of the four Stripe vars are missing in production (`NODE_ENV=production`), the server calls `process.exit(1)` at startup. + +⚠️ **Stripe webhook ordering**: The `/api/stripe-webhook` route uses `express.raw({ type: 'application/json' })` and must be registered **before** `app.use(express.json())` in `server.js`. This is already correct in the current codebase — do not change this ordering. + +### AI + +| Variable | Type | Required | Default (non-prod) | Description | +|---|---|---|---|---| +| `GEMINI_API_KEY` | string | Yes (for SEO generation) | — | Google Gemini API key. Used by `/api/generate-seo` to call `gemini-2.5-flash`. Create at https://aistudio.google.com/app/apikey. If missing, `/api/generate-seo` returns HTTP 500. Processing (`/api/process`) does not depend on this key. | + +--- + +## Frontend variables (Vite build-time — `app.tsx`) + +⚠️ **These variables are embedded at build time by Vite.** They must be set in the build environment (Render build settings or CI env), not just as runtime env vars. Setting them only in Render's runtime env will have no effect. + +| Variable | Type | Required | Build or Runtime | Description | +|---|---|---|---|---| +| `VITE_API_URL` | string | **Yes** | **Build time** | Base URL for all API calls from the frontend. Example: `https://api.spectracleanse.com`. If missing at build time, the frontend throws "Missing VITE_API_URL in production build" on first API call. | + +⚠️ **Known discrepancy**: `.env.example` lists `VITE_BACKEND_URL` but `app.tsx` line 11 reads `VITE_API_URL`. The correct variable name is `VITE_API_URL`. The `.env.example` entry is incorrect and should be updated. + +--- + +## Variable classification + +| Variable | Secret? | Commit to repo? | Notes | +|---|---|---|---| +| `JWT_SECRET` | ✅ Secret | Never | Treat like a password | +| `STRIPE_SECRET_KEY` | ✅ Secret | Never | Live key gives billing access | +| `STRIPE_WEBHOOK_SECRET` | ✅ Secret | Never | Used to verify Stripe requests | +| `STRIPE_CREATOR_PRICE_ID` | ⚠️ Semi-sensitive | No | Price IDs are not strictly secret but should not be public | +| `STRIPE_STUDIO_PRICE_ID` | ⚠️ Semi-sensitive | No | Same as above | +| `GEMINI_API_KEY` | ✅ Secret | Never | API billing access | +| `DB_PATH` | ℹ️ Config | Safe in docs | Not secret but path leaks server layout | +| `FRONTEND_URL` | ℹ️ Config | Safe in docs | Public URL | +| `ALLOWED_ORIGINS` | ℹ️ Config | Safe in docs | Public URLs | +| `PORT` | ℹ️ Config | Safe in docs | | +| `NODE_ENV` | ℹ️ Config | Safe in docs | | +| `ENABLE_MOCK_CHECKOUT` | ℹ️ Config | Safe in docs | Must be false in prod | +| `VITE_API_URL` | ℹ️ Config | Safe in docs | Public URL embedded in bundle | + +--- + +## Render-specific notes + +- Set all secrets under **Environment → Secret Files or Environment Variables** in the Render dashboard. +- `VITE_API_URL` must be set under **Build** environment variables, not just runtime variables. +- `DB_PATH` must point to a path inside the persistent disk mount (e.g. `/data/spectra.db`). Add the disk under **Disks** in the Render service settings. +- `NODE_VERSION=20.20.2` should be set as a build environment variable to override Render's default Node version. +- `ENABLE_MOCK_CHECKOUT` must not be set (or must be `false`) on the production service. + +--- + +## Local development (.env file) + +Copy `.env.example` to `.env` and fill in values. Notes for local dev: +- `JWT_SECRET`: any non-empty string works locally +- Stripe: leave blank and set `ENABLE_MOCK_CHECKOUT=true` to use mock checkout +- `GEMINI_API_KEY`: required for SEO generation; use a real key or leave blank (SEO calls will return 500) +- `DB_PATH`: `./spectra.db` or `/tmp/spectra.db` for local development +- `FRONTEND_URL`: `http://localhost:5173` +- `VITE_API_URL`: set in your terminal or `.env` for Vite — e.g. `VITE_API_URL=http://localhost:3001` diff --git a/spectracleanse-engineering/docs/mcp-roadmap.md b/spectracleanse-engineering/docs/mcp-roadmap.md new file mode 100644 index 0000000..7f403b9 --- /dev/null +++ b/spectracleanse-engineering/docs/mcp-roadmap.md @@ -0,0 +1,193 @@ +# SpectraCleanse MCP Integration Roadmap + +This document describes the recommended MCP (Model Context Protocol) architecture for SpectraCleanse and the engineering plugin. It separates the three distinct MCP concerns and provides a phased implementation plan. + +--- + +## The three MCP concerns for SpectraCleanse + +| # | MCP | Purpose | Priority | +|---|---|---|---| +| 1 | **SpectraCleanse Admin MCP** | Remote MCP server backed by the Render deployment. Exposes deploy health, env validation, usage stats, smoke checks, and release note generation. | High — build first | +| 2 | **SpectraCleanse Engineering Plugin** | This plugin. Installs locally as a Claude plugin; provides skills for code review, deploy readiness, incident response, etc. | Already built | +| 3 | **Local ExifTool / Media MCP** (optional) | Local MCP for running ExifTool operations and inspecting real media files during development. Only useful if working with local test fixtures. | Low priority | + +--- + +## Phase 1: SpectraCleanse Admin MCP (build this first) + +### What it is +A remote MCP server that runs as a service (can be a separate Express route on the SpectraCleanse backend, or a separate service) and exposes structured tools for Claude to query the live deployment state. + +### Why remote, not local +- The production DB, upload state, and deploy health are on Render — not on the developer's machine. +- A remote MCP can query the live DB, run health checks, and read Render env state from wherever Claude is running. +- No secrets are stored in the plugin — the MCP server handles authentication server-side. + +### Tool surface (spec — not yet implemented) + +```typescript +// get_deploy_health +// Returns: { status: 'ok'|'degraded', nodeVersion, dbConnected, exiftoolRunning, uptime } +get_deploy_health(): DeployHealth + +// validate_env +// Returns: { missing: string[], misconfigured: string[], ok: string[] } +// Checks all required env vars are present and correctly formatted +validate_env(): EnvValidation + +// run_smoke_checks +// Returns: { checks: { name: string, passed: boolean, detail: string }[] } +// Runs /api/health, /api/me auth check, and a minimal format rejection check +run_smoke_checks(): SmokeCheckResults + +// list_recent_processing_failures +// Returns: last N failed processing attempts (from a future error log table) +// Current: not tracked in DB — would require adding an error_log table +list_recent_processing_failures(limit?: number): ProcessingFailure[] + +// get_checkout_failures +// Returns: Stripe checkout sessions where userId was missing or plan update failed +// Current: not tracked — would require a billing_events table +get_checkout_failures(since?: string): CheckoutFailure[] + +// get_usage_and_plan_stats +// Returns: { totalUsers, freeUsers, creatorUsers, studioUsers, jobsThisMonth, jobsAllTime } +get_usage_and_plan_stats(): UsageStats + +// list_supported_formats +// Returns: the current CLEANSE_POLICY from cleansePolicy.js as structured data +list_supported_formats(): FormatPolicy + +// draft_release_notes_from_commits +// Returns: AI-drafted release notes from recent git commits +// Requires: GitHub MCP or git access +draft_release_notes_from_commits(since?: string): string +``` + +### Implementation notes +- The Admin MCP should require authentication (Bearer token or API key) — store this as `ADMIN_MCP_SECRET` in the Render env, never in the plugin. +- `get_usage_and_plan_stats` and `list_recent_processing_failures` can query the existing `users` and `jobs` tables immediately. +- `get_checkout_failures` and `list_recent_processing_failures` need new DB tables to be useful — this is future work. +- Use the `@modelcontextprotocol/sdk` Node.js package or expose via an HTTP endpoint that Claude can query. + +### Minimal first implementation +1. Add `/admin/health` route to `server.js` (auth-gated) +2. Return `{ nodeVersion, dbConnected, uptime, planStats, jobsThisMonth }` +3. Register this as an MCP tool in `.mcp.json` +4. Test that Claude can query it during an incident response session + +--- + +## Phase 2: GitHub MCP (connect when repo is on GitHub) + +### What it provides +- Read commits, PRs, issues from `github.com/ChrisAdamsdevelopment/SpectraCleanseAI` +- Draft release notes from commit history +- Surface open issues during founder operating review +- Check CI status for the last push to `main` + +### Setup +```json +// .mcp.json (never commit with real token) +{ + "mcpServers": { + "github": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-github"], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "REPLACE" + } + } + } +} +``` + +### Tools to use during skills +- `get_file_contents` — verify actual endpoint names / env vars before making claims +- `list_commits` — populate "Shipped" section of founder operating review +- `list_pull_requests` — check for open PRs during deploy readiness gate +- `get_issue` — surface known bugs during incident triage + +--- + +## Phase 3: Sentry MCP (add if/when Sentry is configured) + +### What it provides +- Production error rates and stack traces during incident response +- Alert history for /api/process, /api/generate-seo failures +- Top errors by volume + +### Setup +```json +{ + "mcpServers": { + "sentry": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-sentry"], + "env": { + "SENTRY_AUTH_TOKEN": "REPLACE", + "SENTRY_ORG": "REPLACE", + "SENTRY_PROJECT": "spectracleanse" + } + } + } +} +``` + +Note: Sentry is not currently configured in SpectraCleanse. This is Phase 3 — do not set up until Phase 1 Admin MCP provides baseline observability. + +--- + +## Phase 4: Playwright MCP (optional, for UI smoke test automation) + +### What it provides +- End-to-end browser automation for the upload → process → download flow +- Automated upgrade modal trigger testing +- Cross-browser format rejection verification + +### Setup +```json +{ + "mcpServers": { + "playwright": { + "command": "npx", + "args": ["-y", "@playwright/mcp"], + "env": { + "PLAYWRIGHT_BASE_URL": "https://spectracleanse.com" + } + } + } +} +``` + +--- + +## Security notes + +- Never store real tokens or API keys in this plugin directory or any committed `.mcp.json`. +- The Admin MCP secret (`ADMIN_MCP_SECRET`) must be rotatable independently from `JWT_SECRET`. +- The Admin MCP must enforce auth on every tool call — do not expose usage stats or processing failure data without authentication. +- For local development: copy `.mcp.json.example` to `.mcp.json`, fill in test credentials, add `.mcp.json` to `.gitignore`. + +--- + +## Implementation sequence + +``` +Week 1: Add /admin/health route to server.js (auth-gated) + → Test manually via curl + → Connect to Claude via .mcp.json + +Week 2: Add get_usage_and_plan_stats tool + → Useful for founder operating review + +Week 3: Connect GitHub MCP + → Use list_commits in founder operating review + +Month 2: Add Sentry (if error monitoring is configured) + Add Playwright MCP (if UI test automation is prioritized) + +Month 3: Implement full Admin MCP tool surface + (list_recent_processing_failures requires new DB table) +``` diff --git a/spectracleanse-engineering/docs/plugin-validation.md b/spectracleanse-engineering/docs/plugin-validation.md new file mode 100644 index 0000000..76b9a01 --- /dev/null +++ b/spectracleanse-engineering/docs/plugin-validation.md @@ -0,0 +1,201 @@ +# SpectraCleanse Engineering Plugin – Installation and Validation + +This document explains how to install and validate the `spectracleanse-engineering` plugin locally before relying on it in engineering workflows. + +--- + +## Installation + +### Option 1: Point Claude at the plugin directory + +From the SpectraCleanse repo root: + +```bash +claude --plugin-dir ./spectracleanse-engineering +``` + +This loads the plugin for a single Claude session. + +### Option 2: Add to Claude settings + +Add to your `.claude/settings.json` (create if it doesn't exist): + +```json +{ + "pluginDirs": [ + "./spectracleanse-engineering" + ] +} +``` + +This loads the plugin for all Claude sessions in the SpectraCleanse repo directory. + +### Option 3: Global plugin registration + +If you want the plugin available from any directory: + +```json +// ~/.claude/settings.json +{ + "pluginDirs": [ + "/absolute/path/to/SpectraCleanse/spectracleanse-engineering" + ] +} +``` + +--- + +## Plugin structure validation + +After installing, verify the plugin structure is intact: + +```bash +# From the spectracleanse-engineering directory: +ls .claude-plugin/plugin.json # Plugin manifest +ls skills/ # 9 skill directories +ls agents/ # 2 agent files +ls docs/ # 6 doc files +ls .mcp.json.example # MCP config template +ls README.md CHANGELOG.md # Plugin docs +``` + +Expected output: +``` +skills/ + spectracleanse-code-review/SKILL.md + spectracleanse-architecture/SKILL.md + spectracleanse-deploy-readiness/SKILL.md + spectracleanse-processing-pipeline/SKILL.md + spectracleanse-auth-billing/SKILL.md + spectracleanse-incident-response/SKILL.md + spectracleanse-documentation/SKILL.md + spectracleanse-testing-strategy/SKILL.md + spectracleanse-founder-operating-review/SKILL.md + +agents/ + spectracleanse-repo-researcher.md + spectracleanse-incident-commander.md + +docs/ + product-context.md + render-deploy-checklist.md + mcp-roadmap.md + supported-formats-and-processing-boundaries.md + env-and-secrets-reference.md + plugin-validation.md +``` + +--- + +## Schema validation + +The plugin manifest (`.claude-plugin/plugin.json`) follows the Claude plugin schema. Validate it: + +```bash +# Check JSON syntax +python3 -c "import json; json.load(open('spectracleanse-engineering/.claude-plugin/plugin.json')); print('JSON valid')" + +# Check all referenced skill paths exist +python3 - << 'EOF' +import json, os +manifest = json.load(open('spectracleanse-engineering/.claude-plugin/plugin.json')) +for skill in manifest.get('skills', []): + path = os.path.join('spectracleanse-engineering', skill['path']) + status = '✅' if os.path.exists(path) else '❌ MISSING' + print(f"{status} {path}") +for agent in manifest.get('agents', []): + path = os.path.join('spectracleanse-engineering', agent['path']) + status = '✅' if os.path.exists(path) else '❌ MISSING' + print(f"{status} {path}") +EOF +``` + +--- + +## Manual validation checklist + +After loading the plugin in Claude, test each skill by description-matching: + +### Skill availability + +Ask Claude: _"What skills do you have for SpectraCleanse?"_ +Expected: Claude should list and briefly describe the 9 SpectraCleanse skills. + +### Code review skill + +Ask Claude: _"Review this change to server.js: I moved the stripe-webhook route to after express.json()."_ +Expected: Claude should immediately flag this as a critical regression (breaks Stripe webhook signature verification) using the `spectracleanse-code-review` skill's escalation rules. + +### Deploy readiness skill + +Ask Claude: _"Run the deploy readiness checklist for SpectraCleanse."_ +Expected: Claude should produce a structured checklist with Node version, env vars, CORS, DB persistence, Stripe, and smoke test sections. + +### Processing pipeline skill + +Ask Claude: _"Does SpectraCleanse support WAV files for Full Server Cleanse?"_ +Expected: Claude should correctly say WAV is accepted by Multer but rejected by the processor with HTTP 422, and direct users to convert to M4A/MP4. Should NOT say WAV is supported. + +### Auth/billing skill + +Ask Claude: _"A user upgraded to Creator but still sees Free plan. Walk me through debugging this."_ +Expected: Claude should walk through Stripe webhook delivery, `session.metadata.userId`, DB plan column, `/api/me` refresh — in the order defined by the skill. + +### Incident response skill + +Ask Claude: _"Production is down — spectracleanse.com isn't loading."_ +Expected: Claude should ask for or attempt `/api/health` check first, then provide ranked hypotheses (broken deploy, missing env vars, CORS) with a severity assessment. + +### Documentation skill + +Ask Claude: _"Write updated README format support documentation."_ +Expected: Claude should correctly limit server-side support to MP4/M4A, note Quick Cleanse for MP3, and explicitly NOT claim WAV/FLAC are supported by Full Server Cleanse. + +### Founder operating review skill + +Ask Claude: _"Run my SpectraCleanse operating review."_ +Expected: Claude should ask for or look up recent git log, then produce Shipped/In Progress/Risks/Friction/Highest-leverage next move/Next 3 actions format. + +--- + +## MCP configuration (optional) + +To enable MCP tools: + +```bash +# Copy the example and fill in real values +cp spectracleanse-engineering/.mcp.json.example .mcp.json +# Edit .mcp.json with your actual tokens +# NEVER commit .mcp.json to version control +``` + +Add `.mcp.json` to `.gitignore`: +```bash +echo ".mcp.json" >> .gitignore +``` + +--- + +## Updating the plugin + +When the SpectraCleanse codebase changes significantly: + +1. Re-read the affected source files (`server.js`, `cleansePolicy.js`, `app.tsx`, `.env.example`) +2. Update the relevant SKILL.md file(s) with accurate facts +3. Update `docs/product-context.md` if stack facts changed +4. Update `CHANGELOG.md` with the change and date +5. Update `docs/env-and-secrets-reference.md` if env vars changed +6. Update `docs/supported-formats-and-processing-boundaries.md` if format support changed + +The plugin is only as accurate as its last update. Treat it like documentation — it drifts if not maintained. + +--- + +## Known schema uncertainty + +The Claude plugin schema is evolving. If the `plugin.json` format changes: +- Check the latest Claude / Claude Code documentation for the current plugin manifest schema +- The `skills[].path` and `agents[].path` fields may change to `skills[].skillPath` or similar +- The `docs` array in the manifest may not be a standard field — if Claude doesn't recognize it, remove it without affecting skill or agent loading + +The skills and agents will work regardless of schema version as long as `skills[].path` points to valid SKILL.md files. diff --git a/spectracleanse-engineering/docs/product-context.md b/spectracleanse-engineering/docs/product-context.md new file mode 100644 index 0000000..1f8db76 --- /dev/null +++ b/spectracleanse-engineering/docs/product-context.md @@ -0,0 +1,169 @@ +# SpectraCleanse – Product and Engineering Context + +This document is the reference for all plugin skills. It reflects facts confirmed from the repo as of May 2026. Mark any item you update with the date of change. + +--- + +## What SpectraCleanse does + +SpectraCleanse AI strips AI-provenance markers from audio and video files, then injects SEO-optimized metadata to help files perform well on algorithmic platforms (YouTube, Spotify, Apple Music, TikTok). + +Core value: removes embedded tags that signal AI-generated origin ("nuclear wipe"), then re-injects clean, platform-tuned metadata powered by Google Gemini. + +--- + +## Architecture overview + +``` +Browser (React SPA, app.tsx) + │ + │ HTTPS (VITE_API_URL) + ▼ +Node.js / Express (server.js, port 3001) + │ + ├── Auth: bcrypt + JWT (7d), no email verification + ├── Billing: Stripe subscription checkout + webhook + ├── Upload: Multer → uploads/ (ephemeral) + ├── Processing: ExifTool (exiftool-vendored, via server/processor.js) + ├── SEO: Google Gemini gemini-2.5-flash REST API + ├── DB: SQLite (better-sqlite3, WAL mode, DB_PATH) + └── Static: Vite-built dist/ served via express.static +``` + +--- + +## Confirmed stack (verified from package.json, server.js, app.tsx) + +| Layer | Technology | Version | Notes | +|---|---|---|---| +| Frontend framework | React | 18.3.1 | Single-file app.tsx | +| Frontend build | Vite | 4.5.14 | TypeScript 5.5.2 | +| Frontend styling | Tailwind CSS | 3.4.4 | postcss + autoprefixer | +| Frontend icons | lucide-react | 0.390.0 | | +| Backend runtime | Node.js | 20.20.2 | Pinned in .nvmrc; Node 24 incompatible | +| Backend framework | Express | 4.19.2 | | +| Database | better-sqlite3 | 9.6.0 | WAL mode, FK enforcement | +| Media analysis (browser) | music-metadata | 11.12.3 | Graceful parseError fallback | +| MP3 write (browser) | browser-id3-writer | 4.4.0 | Quick Cleanse only | +| Media processing (server) | exiftool-vendored | 28.3.1 | Wraps ExifTool Perl CLI | +| AI metadata gen | Google Gemini | gemini-2.5-flash | REST API (not SDK), structured JSON output | +| Auth | jsonwebtoken + bcryptjs | jwt 9.0.2, bcrypt 2.4.3 | 7d JWT, bcrypt cost 12 | +| Billing | stripe | 16.2.0 | Subscription checkout, webhook | +| File upload | multer | 2.0.0 | dest: uploads/, 500MB limit | +| File utils | fs-extra | 11.2.0 | | + +--- + +## Confirmed API endpoints + +All endpoints confirmed present in `server.js`: + +| Method | Path | Auth | Description | +|---|---|---|---| +| GET | /api/health | No | Returns `{"status":"ok","time":"..."}` | +| POST | /api/register | No | Email/password registration | +| POST | /api/login | No | Email/password login | +| GET | /api/me | Bearer JWT | Live plan + usage | +| POST | /api/create-checkout-session | Bearer JWT | Stripe checkout session | +| POST | /api/stripe-webhook | Stripe signature | Handles plan upgrades/downgrades | +| POST | /api/process | Bearer JWT, multipart | Single-file server cleanse | +| POST | /api/process-batch | Bearer JWT, multipart | Multi-file batch (paid plans, max 20) | +| POST | /api/generate-seo | Bearer JWT | Gemini SEO metadata generation | +| GET | /api/download/:token | Bearer JWT | One-time batch output download | + +--- + +## Confirmed environment variables + +See `docs/env-and-secrets-reference.md` for full descriptions and Render notes. + +**Backend (runtime)**: +`PORT`, `NODE_ENV`, `FRONTEND_URL`, `ALLOWED_ORIGINS`, `JWT_SECRET`, `DB_PATH`, +`STRIPE_SECRET_KEY`, `STRIPE_WEBHOOK_SECRET`, `STRIPE_CREATOR_PRICE_ID`, `STRIPE_STUDIO_PRICE_ID`, +`ENABLE_MOCK_CHECKOUT`, `GEMINI_API_KEY` + +**Frontend (build-time)**: +`VITE_API_URL` ← confirmed from `app.tsx` line 11. Note: `.env.example` lists `VITE_BACKEND_URL` — this is an **inaccuracy in .env.example**. The correct variable is `VITE_API_URL`. + +--- + +## Format support (confirmed from server/cleansePolicy.js) + +| Format | Quick Cleanse (browser) | Full Server Cleanse | Notes | +|---|---|---|---| +| MP3 | ✅ | ❌ 422 | Browser: browser-id3-writer. Server: rejected. | +| MP4 | ❌ | ✅ | Server-side ExifTool | +| M4A | ❌ | ✅ | Server-side ExifTool | +| WAV | ❌ | ❌ 422 | Multer accepts, processor rejects | +| FLAC | ❌ | ❌ 422 | Multer accepts, processor rejects | + +⚠️ README.md currently claims "drag in any MP3, WAV, FLAC, M4A, or MP4 file" in marketing copy. This is an overclaim. WAV and FLAC are not reliably processable by the server. This should be corrected. + +--- + +## Plan and pricing (confirmed from server.js and app.tsx) + +| Plan | Price | Monthly limit | Batch | Source | +|---|---|---|---|---| +| Free | $0 | 3 files/month | ❌ | server.js `FREE_MONTHLY_LIMIT = 3` | +| Creator | $9.99/mo | Unlimited | ✅ | app.tsx line 228, server.js | +| Studio | $29.99/mo | Unlimited | ✅ | app.tsx line 256, server.js | +| Enterprise | (not in Stripe) | — | — | Type only in app.tsx line 32 | + +--- + +## Risk model + +**Critical risks** (failure causes data loss or trust violation): +1. SQLite on ephemeral Render disk → all user data lost on redeploy +2. ExifTool not removing provenance markers → core product guarantee violated +3. JWT_SECRET rotation without notice → all users logged out simultaneously +4. Stripe webhook `express.raw()` ordering changed → webhook signature verification broken → plan upgrades dropped + +**High risks** (failure causes revenue loss or major UX degradation): +5. `VITE_API_URL` missing from build environment → frontend cannot reach backend +6. `FRONTEND_URL`/`ALLOWED_ORIGINS` misconfigured → CORS blocks all requests +7. Gemini API key invalid or quota exceeded → SEO generation unavailable +8. Stripe price ID mismatch → checkout session creation fails + +**Medium risks** (failure causes user confusion or minor data issues): +9. WAV/FLAC accepted by Multer but rejected by processor → upload bandwidth wasted +10. JWT plan stale after Stripe webhook → user sees wrong plan until `/api/me` refresh +11. CI uses Node 18 but production uses Node 20.20.2 → potential native module incompatibility undetected + +--- + +## Known discrepancies (as of May 2026) + +| Location | Discrepancy | +|---|---| +| `.env.example` | Lists `VITE_BACKEND_URL`; `app.tsx` reads `VITE_API_URL` | +| `README.md` marketing copy | Claims WAV/FLAC support; processor rejects both | +| `.github/workflows/ci.yml` | Uses Node 18; production target is Node 20.20.2 | +| `README.md` | Lists `REDIS_URL` as a required production env var; Redis is not used in current codebase | + +--- + +## Deployment platforms + +Both Render and Spaceship Hyperlift are documented as deployment targets: +- `deploy.md`: Spaceship Hyperlift deployment guide (uses `hyperlift.toml`) +- `README.md`: Docker deployment with Render-style env var tables +- `.github/workflows/PIPELINE.md`: GitHub Actions to Docker Hub + Hyperlift rolling deploy + +The plugin's deploy checklist covers both. When a specific platform matters, note which one. + +--- + +## What does not exist (as of May 2026) + +- No email verification +- No password reset +- No Google/OAuth login +- No automated unit or integration tests (only CI smoke test of `/api/health`) +- No job queue (batch processing is synchronous) +- No admin dashboard +- No Sentry or error monitoring +- No rate limiting (beyond per-user monthly limit) +- No Redis usage in current codebase +- No SpectraCleanse Admin MCP (planned — see docs/mcp-roadmap.md) diff --git a/spectracleanse-engineering/docs/render-deploy-checklist.md b/spectracleanse-engineering/docs/render-deploy-checklist.md new file mode 100644 index 0000000..d0dcfb7 --- /dev/null +++ b/spectracleanse-engineering/docs/render-deploy-checklist.md @@ -0,0 +1,164 @@ +# SpectraCleanse Render / Hyperlift Deploy Checklist + +Use this checklist before every production deploy. Complete every item and note ✅ or ❌. + +This checklist covers both Render (web service) and Spaceship Hyperlift (both documented in the repo). Note which platform you're deploying to. + +--- + +## Platform: [Render / Hyperlift — fill in] +## Deploy date: [date] +## Commit SHA: [sha] +## Changes in this deploy: [brief summary] + +--- + +## 1. Node version + +- [ ] Service is pinned to Node 20.20.2 (`NODE_VERSION=20.20.2` in service environment) +- [ ] Not Node 24 (better-sqlite3 native compilation incompatible) +- [ ] Build log confirms `v20.x.x` — not `v18.x.x` or `v24.x.x` + +Note: `.github/workflows/ci.yml` uses Node 18 for CI. This diverges from the production pin. Do not use CI's Node version as a guide for the production runtime. + +--- + +## 2. Build commands + +- [ ] `npm ci` (not `npm install`) — ensures lockfile is respected +- [ ] `tsc --noEmit` passes — no TypeScript errors +- [ ] `vite build` succeeds — `dist/index.html` and `dist/assets/` are present +- [ ] `npm audit --audit-level=high` passes (run locally before push if CI hasn't run yet) + +Start command: `node server.js` (confirmed `package.json` `scripts.start`). + +--- + +## 3. Backend environment variables (Render/Hyperlift runtime) + +All of these must be set as secrets or env vars in the Render/Hyperlift dashboard. Never hardcode. + +| Variable | Required | Check | +|---|---|---| +| `NODE_ENV` | ✅ must be `production` | [ ] | +| `JWT_SECRET` | ✅ non-empty, ≥32 chars random hex | [ ] | +| `STRIPE_SECRET_KEY` | ✅ starts with `sk_live_` in production | [ ] | +| `STRIPE_WEBHOOK_SECRET` | ✅ starts with `whsec_` | [ ] | +| `STRIPE_CREATOR_PRICE_ID` | ✅ starts with `price_` | [ ] | +| `STRIPE_STUDIO_PRICE_ID` | ✅ starts with `price_`, different from Creator | [ ] | +| `GEMINI_API_KEY` | ✅ non-empty | [ ] | +| `FRONTEND_URL` | ✅ `https://spectracleanse.com` (no trailing slash) | [ ] | +| `DB_PATH` | ✅ path on persistent volume, e.g. `/data/spectra.db` | [ ] | +| `PORT` | ✅ `3001` (or platform-expected port) | [ ] | +| `ENABLE_MOCK_CHECKOUT` | ❌ must NOT be `true` in production | [ ] | +| `ALLOWED_ORIGINS` | Optional — use if frontend and backend are on separate domains | [ ] | + +--- + +## 4. Frontend build environment variables (must be set at build time) + +⚠️ These are Vite variables — they must be available when `vite build` runs, not at runtime. + +| Variable | Value | Check | +|---|---|---| +| `VITE_API_URL` | `https://api.spectracleanse.com` (or your actual backend URL) | [ ] | + +**Critical**: The frontend reads `VITE_API_URL` (confirmed in `app.tsx` line 11). `.env.example` incorrectly lists `VITE_BACKEND_URL` — do not use that name. If `VITE_API_URL` is missing at build time, the frontend will throw "Missing VITE_API_URL in production build" on first load. + +--- + +## 5. CORS configuration + +- [ ] `FRONTEND_URL` matches the exact origin the browser sends (no trailing slash, correct scheme) +- [ ] If frontend and backend are on different domains, `ALLOWED_ORIGINS` also covers the frontend origin +- [ ] No wildcard (`*`) CORS in production +- [ ] Confirm in browser devtools: `Access-Control-Allow-Origin: https://spectracleanse.com` in response headers + +If both `FRONTEND_URL` and `ALLOWED_ORIGINS` are empty in production, the server calls `process.exit(1)` at startup — the service will appear to crash on deploy. + +--- + +## 6. Database and persistence + +- [ ] Render/Hyperlift persistent disk is configured and mounted at the `DB_PATH` path +- [ ] Disk persists across deploys and service restarts +- [ ] `DB_PATH` env var matches the mount path exactly (e.g. `DB_PATH=/data/spectra.db` with disk at `/data`) +- [ ] If this is a fresh deploy, the DB will be created automatically by `db.exec(CREATE TABLE IF NOT EXISTS ...)` — no manual migration needed for the current schema +- [ ] Confirm the existing `spectra.db` file is not being overwritten by a fresh deploy + +⚠️ If the disk is not mounted and `DB_PATH` points to the working directory, the DB is ephemeral — all user accounts and jobs are lost on every redeploy. This is a critical data-loss risk. + +--- + +## 7. Stripe configuration + +- [ ] Stripe webhook is registered in Stripe Dashboard → Developers → Webhooks +- [ ] Webhook URL: `https://[backend-domain]/api/stripe-webhook` +- [ ] Webhook events: `checkout.session.completed`, `customer.subscription.deleted` +- [ ] Webhook signing secret matches `STRIPE_WEBHOOK_SECRET` env var exactly +- [ ] `STRIPE_CREATOR_PRICE_ID` and `STRIPE_STUDIO_PRICE_ID` match active products in Stripe Dashboard +- [ ] Using live-mode keys (`sk_live_`, `whsec_`) not test-mode keys in production + +--- + +## 8. Smoke tests (run against live deployment after deploy completes) + +```bash +# Set your backend URL +BACKEND=https://api.spectracleanse.com +FRONTEND=https://spectracleanse.com + +# 1. Health check +curl -sf $BACKEND/api/health +# Expected: {"status":"ok","time":"..."} + +# 2. Unauthenticated /api/me → 401 +curl -o /dev/null -w "%{http_code}" $BACKEND/api/me +# Expected: 401 + +# 3. Unknown API route → 404 JSON +curl -sf $BACKEND/api/nonexistent +# Expected: {"error":"API route not found",...} + +# 4. Frontend loads +curl -sf $FRONTEND | grep -q "SpectraCleanse" +# Expected: success (HTML with app content, not blank or 500) +``` + +**Manual smoke tests** (from `docs/manual-qa-checklist.md`): +- [ ] Register a new user +- [ ] Login with that user +- [ ] `/api/me` returns `{ plan: 'free', usage: { thisMonth: 0, limit: 3 } }` +- [ ] Trigger checkout flow → verify Stripe Checkout URL returned (not mock URL) +- [ ] Upload a `.mp4` test file → verify download succeeds + `X-Forensic-Removed` header present +- [ ] Upload a `.mp3` to `/api/process` → verify 422 with correct guidance message +- [ ] Trigger `/api/generate-seo` with title + artist → verify JSON with `title`, `description`, `tags` + +--- + +## 9. Go / No-Go + +| Gate | Status | +|---|---| +| Node 20.20.2 pinned | ✅ / ❌ | +| Build passes (tsc + vite) | ✅ / ❌ | +| All required env vars set | ✅ / ❌ | +| `VITE_API_URL` in build env | ✅ / ❌ | +| CORS origins correct | ✅ / ❌ | +| Persistent disk mounted | ✅ / ❌ | +| Stripe configured with live keys | ✅ / ❌ | +| `/api/health` returns 200 | ✅ / ❌ | +| Auth smoke test passes | ✅ / ❌ | + +**GO** only if all gates are ✅. + +--- + +## Rollback procedure + +1. In Render/Hyperlift dashboard → navigate to the service → Deployments +2. Find the last successful deploy SHA +3. Click "Redeploy" on that SHA +4. Wait for `/api/health` to return `{"status":"ok"}` +5. If DB schema was changed in the failed deploy: assess whether the rollback is safe (all schema changes in current codebase are additive/idempotent — `CREATE TABLE IF NOT EXISTS`) +6. Communicate to users if auth, billing, or processing was impacted diff --git a/spectracleanse-engineering/docs/supported-formats-and-processing-boundaries.md b/spectracleanse-engineering/docs/supported-formats-and-processing-boundaries.md new file mode 100644 index 0000000..0d3f5dc --- /dev/null +++ b/spectracleanse-engineering/docs/supported-formats-and-processing-boundaries.md @@ -0,0 +1,146 @@ +# SpectraCleanse – Supported Formats and Processing Boundaries + +This document describes what file formats are actually supported, where processing happens, and how to verify claims from the codebase. Do not update user-facing docs or claim format support without checking the sources listed here. + +--- + +## Authoritative sources + +To verify format support, always read these files in this order: + +1. **`server/cleansePolicy.js`** — defines `CLEANSE_POLICY`. This is the single source of truth for server-side format support. If a format is not in `CLEANSE_POLICY.server.supportedExtensions`, it is not supported by Full Server Cleanse. + +2. **`server.js` — `ALLOWED_MIME` constant** — defines the MIME types Multer will accept for upload. A format being in `ALLOWED_MIME` does not mean it is processable — it only means Multer won't reject the upload. + +3. **`server/processor.js` — `isServerSupportedFormat()` call** — the processor checks the format after upload. If the format is not in `cleansePolicy.js`, `processMediaFile()` throws an `unsupportedCleanseError` (HTTP 422). + +4. **`app.tsx`** — for Quick Cleanse (browser-side) format support. Look for `browser-id3-writer` usage and `music-metadata` parsing calls. + +--- + +## Current format matrix (confirmed May 2026) + +### Full Server Cleanse (`POST /api/process`, `POST /api/process-batch`) + +| Format | Extension(s) | MIME accepted by Multer | Processor outcome | HTTP on rejection | +|---|---|---|---|---| +| MP4 | `.mp4` | `video/mp4`, `audio/mp4` | ✅ Processed | — | +| M4A | `.m4a` | `audio/mp4`, `audio/m4a`, `audio/x-m4a` | ✅ Processed | — | +| MP3 | `.mp3` | `audio/mpeg` | ❌ 422 — use Quick Cleanse | 422 `unsupported_file_type` | +| WAV | `.wav` | `audio/wav`, `audio/x-wav` | ❌ 422 | 422 `unsupported_file_type` | +| FLAC | `.flac` | `audio/flac`, `audio/x-flac` | ❌ 422 | 422 `unsupported_file_type` | +| Other | any | Not accepted | ❌ 415 | 415 (Multer MIME filter) | + +**Source**: `CLEANSE_POLICY.server.supportedExtensions = ['.mp4', '.m4a']` in `server/cleansePolicy.js`. + +### Quick Cleanse — browser-side (`app.tsx`) + +| Format | Supported | Library | Notes | +|---|---|---|---| +| MP3 | ✅ | `browser-id3-writer` | ID3 tags written/cleared in browser. No server upload. | +| Other | ❌ | — | `music-metadata` may parse for analysis but no write support | + +**Source**: `browser-id3-writer` import and usage in `app.tsx`. `music-metadata` is used for metadata analysis/display only. + +### SEO generation (`POST /api/generate-seo`) + +Format-agnostic — accepts any metadata payload. The endpoint does not receive a file; it receives structured metadata fields (title, artist, genre, etc.) and sends them to Gemini. Any format can use SEO generation as long as the metadata is provided. + +### Batch processing (`POST /api/process-batch`) + +Same format support as Full Server Cleanse (MP4, M4A). Batch is restricted to paid plans (creator, studio). Free users receive HTTP 403. + +--- + +## Processing pipeline boundaries + +``` + ┌─────────────────────────────────┐ + │ User uploads file │ + └────────────────┬────────────────┘ + │ + ┌────────────────▼────────────────┐ + │ Multer MIME filter │ + │ (ALLOWED_MIME set in server.js)│ + │ Rejects if MIME not in set → 415│ + └────────────────┬────────────────┘ + │ + ┌────────────────▼────────────────┐ + │ Plan enforcement │ + │ Free: limit 3/month → 402 │ + │ Batch free: → 403 │ + └────────────────┬────────────────┘ + │ + ┌────────────────▼────────────────┐ + │ isServerSupportedFormat() │ + │ cleansePolicy.js check │ + │ Unsupported → 422 │ + └────────────────┬────────────────┘ + │ (MP4, M4A only) + ┌────────────────▼────────────────┐ + │ processMediaFile() │ + │ (server/processor.js) │ + │ 1. detectMarkers() (pre-wipe) │ + │ 2. ExifTool wipe all tags │ + │ 3. Zero QuickTime timestamps │ + │ 4. buildMetaToWrite() inject │ + │ 5. verifyFinalState() │ + │ 6. Compute SHA-256 │ + └────────────────┬────────────────┘ + │ + ┌────────────────▼────────────────┐ + │ res.download() output file │ + │ + X-Forensic-* headers │ + │ + Cleanup input + output │ + └─────────────────────────────────┘ +``` + +--- + +## What gets removed (Full Server Cleanse, MP4/M4A) + +All ExifTool-readable tags are wiped in a first pass. After the wipe: +- Tags matching `isBenign()` (file system metadata, technical container info) are left as-is +- Tags matching `isAllowedInjected()` (tags SpectraCleanse intentionally writes back) are expected to be present +- QuickTime timestamp fields (`CreateDate`, `ModifyDate`, `TrackCreateDate`, `TrackModifyDate`, `MediaCreateDate`, `MediaModifyDate`) are zeroed to `0000:00:00 00:00:00` +- All other descriptive and provenance-bearing tags are removed + +**Source**: `server/metadataRules.js` for `isBenign`, `isAllowedInjected`, `MARKER_RULES`; `server/processor.js` for `QUICKTIME_TIMESTAMP_FIELDS`. + +--- + +## What gets written back (Full Server Cleanse, MP4/M4A) + +From user-supplied metadata via `buildMetaToWrite()` in `server/processor.js`: +- `ItemList:Title`, `QuickTime:Title`, `Keys:Title`, `Keys:DisplayName` +- `ItemList:Artist`, `QuickTime:Artist`, `ItemList:Author`, `ItemList:AlbumArtist`, `Keys:Artist` (if artist provided) +- Additional fields for description, genre, copyright, producer, lyrics (see `buildMetaToWrite` for full list) + +All values sanitized through `cleanText(value, maxLength)` before being passed to ExifTool. + +--- + +## Known WAV/FLAC behavior + +WAV and FLAC files are accepted by Multer (their MIME types are in `ALLOWED_MIME`) but are rejected by the processor with HTTP 422. This means: +- The file IS uploaded to the server and written to `uploads/` +- The file is then immediately deleted after the 422 is returned +- The user sees: "Full Server Cleanse currently supports MP4 and M4A only. Use Quick Cleanse (Browser) for MP3, or convert WAV/FLAC to M4A/MP4." + +**Implication**: WAV/FLAC uploads waste user bandwidth. A future improvement could reject these at the MIME filter level with a more helpful error message, or add ExifTool-based WAV/FLAC support to `cleansePolicy.js`. + +--- + +## How to add a new format (procedure) + +Before adding support for a new format: +1. Verify ExifTool can reliably read AND write that format without data loss: `exiftool -listw -f -FORMAT` +2. Create a test fixture: a real file of that format with known metadata embedded +3. Test `processMediaFile()` against the fixture manually +4. If successful, add the extension to `CLEANSE_POLICY.server.supportedExtensions` in `cleansePolicy.js` +5. Add the MIME type to `ALLOWED_MIME` in `server.js` +6. Update `isServerSupportedFormat()` in `cleansePolicy.js` if MIME alias handling is needed +7. Update user-facing error messages if the rejection message lists specific supported formats +8. Update this document and `docs/product-context.md` + +Do not add a format to `ALLOWED_MIME` without also adding it to `cleansePolicy.js` — this would cause uploads to succeed but processing to fail with 422. diff --git a/spectracleanse-engineering/skills/spectracleanse-architecture/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-architecture/SKILL.md new file mode 100644 index 0000000..0d46938 --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-architecture/SKILL.md @@ -0,0 +1,119 @@ +# spectracleanse-architecture + +**Use this skill when making or evaluating architectural decisions for SpectraCleanse** — adding new features, refactoring the monolith, changing how files are processed, upgrading auth, or expanding to new deployment targets. + +--- + +## SpectraCleanse context + +Current architecture is a **single-process Node.js monolith** on Render/Hyperlift: +- `server.js` handles HTTP, auth, billing, file uploads, processing dispatch, SEO proxy, and static SPA serving in one process. +- SQLite (`better-sqlite3`) provides persistence. WAL mode enabled. Tables: `users` (id, email, password, plan, stripe_customer_id, stripe_subscription_id, created_at), `jobs` (id, user_id, filename, platform, created_at). +- File uploads land in `uploads/` on the local filesystem. `server/cleanup.js` handles deletion. Files are ephemeral — Render's filesystem is not persistent across deploys. +- ExifTool (`exiftool-vendored`) runs in the same Node process. It forks a Perl subprocess; the `exiftool` singleton is shared across requests. +- Gemini calls are synchronous HTTP from within the request handler — no queue, no retry, no fallback model. +- Stripe checkout is server-side session creation. Webhooks are received at `/api/stripe-webhook`. +- The frontend is a single `app.tsx` served as a Vite-built SPA from `dist/`. + +**Known architectural constraints**: +- Render's free/hobby tiers have ephemeral disks. SQLite DB must be on a persistent volume (set `DB_PATH=/data/spectra.db` with a mounted volume) or data is lost on redeploy. +- `better-sqlite3` requires native compilation — incompatible with Node 24 (verified). Node 20.20.2 is the pinned target. +- Concurrent ExifTool operations share one subprocess pool. Heavy batch traffic could saturate it. +- No job queue currently exists. `/api/process-batch` is synchronous: all files processed sequentially before response. +- No email verification, password reset, or OAuth in the current auth system. + +--- + +## Decision framework + +For each architectural decision, produce this structure: + +``` +## Decision needed +[What specifically needs to be decided] + +## Current facts (from repo) +[What the code actually does today — cite files] + +## Constraints +[Render limitations, SQLite behavior, Node version, budget, solo-founder bandwidth] + +## Options +[2–4 concrete options with brief description] + +## Recommendation +[The option this analysis recommends and why] + +## Tradeoffs +[What the recommendation gives up or risks] + +## Migration path +[How to move from current state to recommended state without breaking production] + +## Risks +[What could go wrong during or after the change] + +## Open questions +[What must be answered before committing to this decision] +``` + +--- + +## Common SpectraCleanse architectural questions + +### Monolith vs. split API/worker +Current: one process handles HTTP and ExifTool processing. +Risk: long ExifTool jobs block the event loop for other requests. +Consideration: Render worker services are available. A job queue (BullMQ + Redis, or a simple SQLite-backed queue) would allow the HTTP server to return a job ID immediately while a worker processes the file. This is a medium-complexity change with significant UX improvement for batch uploads. +Do not recommend splitting without accounting for: shared `uploads/` filesystem between API and worker processes, SQLite concurrency (WAL handles reads, but writes from two processes need careful design), and Render persistent disk cost. + +### SQLite now vs. Postgres later +Current: SQLite with WAL. Works well for a single-process, low-concurrency app. +Migration trigger: if a second process (worker) needs to write concurrently, or if Render disk cost becomes a bottleneck, or if user base grows to thousands of concurrent sessions. +Migration path: schema is simple (`users`, `jobs`). Drizzle ORM or Knex would make a Postgres swap low-risk. The main change is connection pooling (`pg` pool vs. single SQLite connection). +Do not recommend migrating until the SQLite bottleneck is actually observed. + +### File storage / retention model +Current: uploaded files land in `uploads/` and are deleted by `cleanup.js` after download or on a timer. Output files are also deleted after `res.download()`. +Risk: if the server restarts before a file is downloaded, it's lost. One-time download tokens (`server/downloadTokens.js`) expire, so users may lose output files. +Consideration: S3/R2 object storage would give durable file URLs with TTLs. This is the right next step if batch download failure reports increase. + +### Auth upgrades (email verification, password reset, Google OAuth) +Current: register/login with email + bcrypt, JWT, no verification or reset flow. +Email verification: requires an email provider (Resend, Postmark, or SendGrid) and a new `email_verified` column in `users`. Add before user base grows significantly. +Password reset: token-based; requires `password_reset_tokens` table and email provider. +Google OAuth: requires `google-auth-library`, a new `oauth_provider` column, and handling the case where a Google email matches an existing password account. +Priority order: email verification → password reset → OAuth. + +### Processing job queue design +Options: (1) in-process async queue (simple but still blocks one Node thread), (2) SQLite-backed queue (add `status`, `result` columns to `jobs` table — no new infra), (3) BullMQ + Redis (robust, requires Redis on Render), (4) Render background worker service. +Recommendation for current scale: SQLite-backed queue with a polling worker. Adds async processing without new infra cost. + +### Admin dashboard / MCP integration +Current: no admin interface exists. +Recommendation: build a minimal admin MCP server first (see `docs/mcp-roadmap.md`). This gives Claude-accessible deploy health and usage stats before building a full web admin UI. + +### Supported file format expansion +Before adding any new format to `cleansePolicy.js`: +1. Verify ExifTool can reliably read AND write the format without data loss. +2. Add a test fixture (a real file of that format with known metadata). +3. Update `ALLOWED_MIME` in `server.js` to accept the new MIME type. +4. Update `isServerSupportedFormat` in `cleansePolicy.js`. +5. Update user-facing docs and error messages. +Do not add format support based on MIME accept-list alone — Multer allows it but the processor may silently corrupt or fail. + +--- + +## Do not assume +- Do not assume Render has a persistent disk by default — it must be explicitly configured and mounted. +- Do not assume SQLite handles concurrent writes from multiple processes safely without WAL + careful transaction design. +- Do not assume ExifTool can process any format in `ALLOWED_MIME` — Multer and ExifTool have independent format support. +- Do not assume a job queue exists — batch processing is currently synchronous. +- Do not assume email sending is available — no email provider is currently configured. + +--- + +## Escalate if +- A decision would require data migration on the live `spectra.db` without a tested migration script and rollback plan. +- A decision would make the Stripe webhook unreachable (changing the `/api/stripe-webhook` path or removing `express.raw()` ordering). +- A decision involves storing files outside the current `uploads/` pattern without a verified cleanup strategy. diff --git a/spectracleanse-engineering/skills/spectracleanse-auth-billing/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-auth-billing/SKILL.md new file mode 100644 index 0000000..1c14062 --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-auth-billing/SKILL.md @@ -0,0 +1,155 @@ +# spectracleanse-auth-billing + +**Use this skill when debugging or reviewing auth flows, JWT state, plan enforcement, Stripe checkout, or upgrade/downgrade behavior in SpectraCleanse.** + +--- + +## SpectraCleanse context (confirmed from repo) + +### Auth system +- **Registration**: `POST /api/register` — email + password (min 8 chars). Emails normalized to lowercase. Passwords hashed with `bcrypt` (cost 12). Returns JWT + user object on success. 409 if email already exists. +- **Login**: `POST /api/login` — returns JWT + user object. Constant-time comparison (bcrypt.compare even for unknown emails via dummy hash). +- **JWT**: signed with `JWT_SECRET`, 7-day expiry, payload: `{ sub: userId, email, plan }`. Stored client-side (localStorage or memory — verify from `app.tsx`). +- **Auth middleware** (`requireAuth`): checks `Authorization: Bearer ` header, verifies with `jwt.verify(token, JWT_SECRET)`. Returns 401 on missing header or invalid/expired token. +- **No email verification, no password reset, no OAuth** currently exists. +- **`/api/me`** (GET, authenticated): queries DB for current user, returns `{ user: { id, email, plan, created_at }, usage: { thisMonth, limit } }`. This is the only endpoint that returns the **live plan from DB** — the JWT plan field is stale if a Stripe webhook has fired since login. + +### Plan system +- Plans: `free`, `creator`, `studio` (and `enterprise` in frontend types — not a current Stripe product). +- Free: 3 files/month (`FREE_MONTHLY_LIMIT = 3`). Enforced in `/api/process` and implicitly in `/api/process-batch` (403). +- Creator: `$9.99/mo`, `STRIPE_CREATOR_PRICE_ID`. +- Studio: `$29.99/mo`, `STRIPE_STUDIO_PRICE_ID`. +- Downgrade: `customer.subscription.deleted` Stripe event → `plan = 'free'`, `stripe_subscription_id = NULL` in DB. +- Plan from DB, not from JWT: after a Stripe webhook, the DB is updated but the user's JWT still carries the old plan. The frontend must call `/api/me` to pick up the upgrade. This is done after the Stripe success redirect (see `app.tsx` line 627). + +### Stripe checkout +- `POST /api/create-checkout-session` (authenticated): creates a Stripe Checkout session in `subscription` mode. +- Re-uses existing `stripe_customer_id` if available; creates a new Stripe Customer otherwise. +- `success_url`: `${FRONTEND_URL}?checkout=success&session_id={CHECKOUT_SESSION_ID}` +- `cancel_url`: `${FRONTEND_URL}?checkout=cancelled` +- `metadata`: `{ userId, priceId }` — both required for webhook processing. +- Local dev mock: if `STRIPE_CONFIGURED` is false and `ENABLE_MOCK_CHECKOUT` is true, returns a mock success redirect without hitting Stripe. + +### Stripe webhook +- `POST /api/stripe-webhook` — must be registered BEFORE `express.json()` (uses `express.raw()`). This ordering is critical; changing it breaks signature verification. +- Handles: `checkout.session.completed` (upgrades user plan), `customer.subscription.deleted` (downgrades to free). +- `checkout.session.completed`: reads `session.metadata.userId` and `session.metadata.priceId`. If `userId` is missing, logs an error and returns — the user's plan will not be upgraded. +- `planFromPriceId()`: maps `STRIPE_STUDIO_PRICE_ID` → `'studio'`, `STRIPE_CREATOR_PRICE_ID` → `'creator'`, anything else → `'creator'` (safe fallback). + +### Usage tracking +- `jobs` table: one row per processed file, with `user_id`, `filename`, `platform`, `created_at`. +- `getMonthlyJobCount(userId)`: counts jobs for current calendar month. +- Counter incremented in `/api/process` after successful processing. Also incremented per-file in `/api/process-batch`. +- `/api/me` returns `usage.thisMonth` and `usage.limit` (`3` for free, `null` for paid). + +--- + +## Debugging flow + +### User can't log in +1. Check: is the email normalized to lowercase? (Registration normalizes; login normalizes too — should match.) +2. Check: was the account registered? Query: `SELECT id, email, plan FROM users WHERE email = LOWER('[email]');` +3. Check: is `JWT_SECRET` the same value as when the token was issued? A secret rotation invalidates all existing tokens. +4. Check: is the Authorization header formatted correctly (`Bearer [token]` — note the space)? +5. Check: is the token expired? JWT expiry is 7 days. + +### User upgraded but app still shows free plan +1. Check: did the Stripe webhook fire? Look for a Stripe Dashboard → Developers → Webhooks → event log entry for `checkout.session.completed`. +2. Check: was `session.metadata.userId` populated? If not, the webhook handler skips the DB update. +3. Check: did `STRIPE_WEBHOOK_SECRET` match? A signature failure returns 400 to Stripe, which retries — but the plan won't update until signature verification passes. +4. Check: did the frontend call `/api/me` after the success redirect? The plan in the JWT is stale. The `?checkout=success` handler in `app.tsx` triggers a re-fetch. +5. Check: is the plan in the DB? Query: `SELECT id, email, plan, stripe_customer_id, stripe_subscription_id FROM users WHERE id = [userId];` +6. If DB shows upgraded plan but frontend still shows free: the JWT is stale. User needs to call `/api/me` or re-login to get a fresh token with the new plan baked in (though JWT plan is not re-read by `/api/process` — it re-queries the DB for `plan`). + +### User hits 402 unexpectedly +1. Check: what is `getMonthlyJobCount(userId)` returning? 402 fires when count ≥ 3 for free users. +2. Check: is the user's plan `free` in the DB? If Stripe webhook failed, they may have paid but DB wasn't updated. +3. Check: is the month correct? The count query uses `strftime('%Y-%m', created_at)` vs `strftime('%Y-%m', 'now')`. If the server timezone differs from what the user expects, the monthly window may be off. + +### Checkout session creation fails +1. Check: are all four Stripe env vars set (`STRIPE_SECRET_KEY`, `STRIPE_WEBHOOK_SECRET`, `STRIPE_CREATOR_PRICE_ID`, `STRIPE_STUDIO_PRICE_ID`)? +2. Check: is `stripe` null? It's set to null if `STRIPE_CONFIGURED` is false. +3. Check: is `priceId` non-null? If `plan` is not `'studio'`, it uses `STRIPE_CREATOR_PRICE_ID` — if that env var is empty, `priceId` is `undefined`. +4. Check: is `FRONTEND_URL` set? The `success_url` and `cancel_url` are built from `FRONTEND_URL`. If missing, Stripe will reject the session. + +### 401 on a valid request +1. Check: is the `Authorization` header present and starts with `Bearer `? (Note the space — `requireAuth` uses `header.slice(7)`.) +2. Check: is the token expired? (7-day TTL) +3. Check: was `JWT_SECRET` rotated since the token was issued? +4. Check: is the request being made to the correct backend URL? A mismatch between `VITE_API_URL` and the actual backend domain would result in CORS errors, not 401s — but confirm the request is reaching the server. + +--- + +## Checklist + +**Auth flows** +- [ ] `/api/register` returns 201 + token on success +- [ ] `/api/register` returns 409 if email already exists +- [ ] `/api/login` returns 200 + token on valid credentials +- [ ] `/api/login` returns 401 on invalid credentials (constant-time path) +- [ ] `/api/me` returns 401 without token, 200 + live plan data with valid token +- [ ] Token expiry (7d) enforced by `jwt.verify` +- [ ] `JWT_SECRET` is set and non-empty in production (server exits at startup if missing) + +**Plan and usage** +- [ ] Free users receive 402 after 3 files/month with `upgradeRequired: true` +- [ ] Paid users do not hit the monthly limit (limit is `null` from `/api/me`) +- [ ] Usage counter increments correctly after `/api/process` success +- [ ] `/api/me` returns accurate `usage.thisMonth` and `usage.limit` + +**Stripe checkout** +- [ ] `/api/create-checkout-session` returns a Stripe Checkout URL (not mock URL) in production +- [ ] `success_url` and `cancel_url` point to the correct `FRONTEND_URL` +- [ ] `session.metadata` contains `userId` and `priceId` +- [ ] Existing Stripe customers are re-used (`stripe_customer_id` stored in DB) +- [ ] Frontend calls `/api/me` on `?checkout=success` to refresh plan + +**Stripe webhook** +- [ ] `POST /api/stripe-webhook` is registered before `express.json()` in `server.js` +- [ ] Webhook signature verified with `stripe.webhooks.constructEvent()` +- [ ] `checkout.session.completed` updates `plan`, `stripe_customer_id`, `stripe_subscription_id` in DB +- [ ] `customer.subscription.deleted` resets `plan = 'free'` in DB +- [ ] Webhook endpoint is registered in Stripe Dashboard with correct signing secret + +--- + +## Output format + +``` +## Symptom +[What the user or system is experiencing] + +## Likely causes (ranked) +[1. Most likely cause — 2. Next most likely — etc.] + +## Files / endpoints to inspect +[server.js lines, DB queries, Stripe Dashboard locations] + +## Debug steps +[Ordered diagnostic commands and checks] + +## Fix plan +[Specific code or config changes] + +## Regression tests +[Curl commands or manual steps to verify the fix] + +## User-facing impact +[What users experienced and whether data was lost or degraded] +``` + +--- + +## Do not assume +- Do not assume the JWT plan field is current — it may be stale after a Stripe webhook. Always use `/api/me` for live plan. +- Do not assume mock checkout works in production — `ENABLE_MOCK_CHECKOUT` must not be `true` in prod. +- Do not assume Stripe webhook delivery is instant — events can be delayed or retried. +- Do not assume email verification exists — there is currently no email verification in the auth system. +- Do not assume `VITE_BACKEND_URL` is the correct frontend env var — the frontend reads `VITE_API_URL`. + +--- + +## Escalate if +- A user's plan is `studio` or `creator` in Stripe but `free` in the DB — webhook may have failed silently. +- `JWT_SECRET` has been rotated and all sessions are invalid — affects all logged-in users. +- `STRIPE_WEBHOOK_SECRET` is wrong — all webhooks fail with 400, all plan upgrades are silently dropped. diff --git a/spectracleanse-engineering/skills/spectracleanse-code-review/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-code-review/SKILL.md new file mode 100644 index 0000000..8b37405 --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-code-review/SKILL.md @@ -0,0 +1,119 @@ +# spectracleanse-code-review + +**Use this skill when reviewing any code change in the SpectraCleanse repo** — especially changes to `server.js`, `app.tsx`, `server/cleansePolicy.js`, `server/processor.js`, `server/metadataRules.js`, or any file touching upload handling, auth, billing, or CORS. + +--- + +## SpectraCleanse context + +- `server.js` is a 637-line monolith. It owns auth, billing, upload, processing dispatch, SEO proxy, static serving, and CORS in one file. Changes here can regress multiple surfaces simultaneously. +- `app.tsx` is the entire React frontend in a single file. UI state for queue, processing, download, plan, upgrade modal, and auth all live here. +- `server/cleansePolicy.js` is the authoritative format support source of truth. Quick Cleanse: `.mp3` only. Full Server Cleanse: `.mp4`, `.m4a` only. Never assume other formats work without checking this file. +- `server/processor.js` wraps ExifTool. ExifTool operations are high-risk — incorrect flag construction, missing `await`, or unhandled rejection can corrupt output files silently. +- Gemini responses from `/api/generate-seo` use `gemini-2.5-flash` with structured JSON schema (`title`, `description`, `tags`). Parsing is done manually after `JSON.parse(rawText)` — any schema drift or upstream change can produce empty strings silently. +- JWT tokens carry `{ sub, email, plan }`. The `plan` field in the token is **not** re-read from the DB on every request — only `/api/me` re-fetches live plan. A plan desync between token and DB is a known risk after Stripe webhook delivery. +- Free tier: 3 files/month (checked in `/api/process` and `/api/process-batch`). Free users get `402` with `upgradeRequired: true`. +- Batch upload: `/api/process-batch`, paid plans only, up to 20 files, 2 GB total. Download uses one-time tokens from `server/downloadTokens.js`. +- CORS: strict in production. If `FRONTEND_URL` and `ALLOWED_ORIGINS` are both missing in prod, the server exits at startup. Changes to allowed origins or methods must be tested end-to-end. +- Frontend API base URL is `VITE_API_URL` (confirmed in `app.tsx` line 11). `.env.example` incorrectly lists `VITE_BACKEND_URL` — this discrepancy must not be introduced into new code. +- Response headers from `/api/process`: `X-Forensic-Removed`, `X-Forensic-Tags`, `X-Forensic-Status`, `X-Forensic-Report`, `X-Process-Run-Id`, `X-Output-SHA256`, `X-Download-Name`, `X-Usage-This-Month`, `X-Usage-Limit`. Frontend reads these — removing or renaming them is a breaking change. + +--- + +## Checklist + +**Upload and file handling** +- [ ] Does the change touch Multer config (`dest`, `limits`, `fileFilter`)? Verify MIME allow-list is unchanged or intentionally expanded. +- [ ] Does the change touch `normalizeExt` or `isServerSupportedFormat` in `cleansePolicy.js`? Verify format matrix is accurate. +- [ ] Are uploaded files cleaned up on all error paths (422, 402, 500, copy failure)? +- [ ] Is `req.file` null-checked before accessing `.path`, `.mimetype`, `.originalname`? +- [ ] Max file size is 500 MB (`MAX_FILE_SIZE`). Is this still enforced after the change? + +**ExifTool / processor** +- [ ] Are all `exiftool` calls properly awaited? +- [ ] Is the output path distinct from the input path? (Copy-then-modify pattern must be preserved.) +- [ ] Are ExifTool errors caught and surfaced with appropriate HTTP status codes (`err.statusCode`, `err.publicDetail`)? +- [ ] Does `verifyFinalState` still run after processing and does its result appear in the report? +- [ ] Are metadata fields written via `buildMetaToWrite` — not raw user input — to ExifTool? + +**Gemini SEO proxy** +- [ ] Does `/api/generate-seo` still validate `GEMINI_API_KEY` presence before calling the API? +- [ ] Is the Gemini response still parsed with a try/catch around `JSON.parse(rawText)`? +- [ ] Does the route handle non-200 Gemini responses (currently `throw new Error(\`Gemini error \${response.status}\`)`)? +- [ ] Are output fields (`title`, `description`, `tags`) still type-checked as strings before being returned? +- [ ] Is `buildSeoPrompt` still sanitizing inputs with `asCleanText`? + +**Auth / JWT** +- [ ] Does any new protected route use `requireAuth` middleware? +- [ ] Does `requireAuth` still check for `Bearer ` prefix and use `jwt.verify` with `JWT_SECRET`? +- [ ] If a new endpoint uses `req.user.sub`, is it verified that `sub` is the user's database `id`? +- [ ] Is there any path where `JWT_SECRET` could be empty in production (currently fails fast at startup)? + +**Stripe billing** +- [ ] Does the Stripe webhook handler still use `express.raw({ type: 'application/json' })` — placed BEFORE `express.json()`? This ordering is critical. +- [ ] Does `checkout.session.completed` still write `plan`, `stripe_customer_id`, and `stripe_subscription_id` to the DB? +- [ ] Does `customer.subscription.deleted` still downgrade the user to `free`? +- [ ] Does `/api/create-checkout-session` still pass `userId` and `priceId` in `session.metadata`? +- [ ] Is `ENABLE_MOCK_CHECKOUT` gated to non-production only? +- [ ] Does the frontend call `/api/me` after the Stripe success redirect to pick up the upgraded plan? + +**CORS and environment** +- [ ] Does the change add or modify CORS methods or headers? `allowedHeaders` currently: `Content-Type`, `Authorization`. `exposedHeaders` includes all `X-Forensic-*` and `X-Usage-*` headers. +- [ ] Does the change affect how `FRONTEND_URL` or `ALLOWED_ORIGINS` are parsed? +- [ ] Is `IS_PROD` (`process.env.NODE_ENV === 'production'`) used correctly for any new environment branching? + +**Frontend / UI state** +- [ ] Does the change affect the upload queue, processing state, or download flow? Verify queue state cannot get stuck (uploaded but never cleared). +- [ ] Does any new `fetch` call handle 401 (re-auth) and 402 (upgrade modal) responses? +- [ ] Is `VITE_API_URL` used as the API base URL — not `VITE_BACKEND_URL` or a hardcoded `localhost`? +- [ ] Do plan badge, usage meter, and upgrade modal still reflect live data from `/api/me`? + +**Render / deployment compatibility** +- [ ] Does the change introduce any dependency requiring native compilation (beyond `better-sqlite3` and `exiftool-vendored`)? These must be compatible with Render's Node 20 build environment. +- [ ] Does the change modify `package.json` `scripts`? The `start` script must remain `node server.js`. +- [ ] Does the change affect `dist/` serving? The `express.static(distPath)` block must remain after all `/api/` routes. + +--- + +## Do not assume +- Do not assume WAV or FLAC are supported by Full Server Cleanse — verify `cleansePolicy.js`. +- Do not assume Quick Cleanse runs server-side — it uses `browser-id3-writer` in the browser. +- Do not assume the JWT `plan` field is live — it reflects plan at login time. Use `/api/me` for current plan. +- Do not assume Stripe mock checkout works in production — `ENABLE_MOCK_CHECKOUT` is false-by-default in prod. +- Do not assume the frontend env var is `VITE_BACKEND_URL` — the frontend reads `VITE_API_URL`. + +--- + +## Output format + +``` +## Summary +[What changed and why] + +## Blocking issues +[Anything that must be fixed before merge — data loss, auth bypass, silent failure, broken deploy] + +## High-risk regressions +[Changes that could break existing functionality: format gating, plan enforcement, CORS, Stripe webhook order] + +## Security / trust-boundary concerns +[JWT handling, raw user input to ExifTool, CORS origin widening, webhook signature bypass] + +## Product behavior concerns +[UI state desync, wrong error messages, format claims that don't match cleansePolicy.js] + +## Tests / smoke checks needed +[Specific curl commands or manual QA steps to verify this change is safe] + +## Suggested patch plan +[Ordered list of fixes if blocking issues were found] +``` + +--- + +## Escalate if +- The Stripe webhook `express.raw()` middleware ordering has changed relative to `express.json()`. +- Any ExifTool call passes user-supplied filenames directly without sanitization. +- `JWT_SECRET` could be undefined in a production code path. +- CORS `origin` callback has been changed to return `true` unconditionally. +- `cleansePolicy.js` format lists have been expanded without a corresponding processor.js update. diff --git a/spectracleanse-engineering/skills/spectracleanse-deploy-readiness/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-deploy-readiness/SKILL.md new file mode 100644 index 0000000..bce4492 --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-deploy-readiness/SKILL.md @@ -0,0 +1,169 @@ +# spectracleanse-deploy-readiness + +**Use this skill before any production deploy to Render or Hyperlift.** This skill is conservative by design. It produces a checklist and go/no-go recommendation. It does not auto-deploy or modify any live environment. + +--- + +## SpectraCleanse deployment model (confirmed from repo) + +- **Runtime**: Node 20.20.2 (`.nvmrc`, `.node-version`). Render/Hyperlift must be pinned — set `NODE_VERSION=20.20.2` in the service environment if the platform defaults to a newer version. Node 24 is known-incompatible (`better-sqlite3` native compilation fails). +- **Start command**: `node server.js` (confirmed `package.json` `scripts.start`). +- **Build command**: `npm ci && tsc && vite build` (TypeScript compile + Vite frontend build). The CI workflow uses `npm ci` not `npm install`. +- **Static serving**: `server.js` serves `dist/` via `express.static` when it exists. Frontend must be built before deploy or the SPA will 404. +- **Database**: SQLite at `DB_PATH`. On Render, this must point to a persistent disk mount (e.g. `/data/spectra.db`). If `DB_PATH` is missing, the server falls back to `spectra.db` in the working directory — which is ephemeral on Render. +- **CORS**: If `NODE_ENV=production` and both `FRONTEND_URL` and `ALLOWED_ORIGINS` are empty, the server calls `process.exit(1)` at startup. This is a silent fail on Render if env vars are misconfigured. +- **Stripe**: All four Stripe vars must be set in production or the server exits at startup: `STRIPE_SECRET_KEY`, `STRIPE_WEBHOOK_SECRET`, `STRIPE_CREATOR_PRICE_ID`, `STRIPE_STUDIO_PRICE_ID`. +- **Frontend env var**: The frontend build reads `VITE_API_URL`. This must be set in the **build environment** (not just runtime). If missing in the build, the frontend will throw a runtime error: "Missing VITE_API_URL in production build." +- **CI**: GitHub Actions (`.github/workflows/ci.yml`) runs audit + smoke test on push to `main`. Node 18 is used in CI — note this diverges from the production pin of 20.20.2. ⚠️ Known discrepancy. +- **Hyperlift**: `deploy.md` documents Spaceship Hyperlift as a deployment target with `hyperlift.toml` present in the repo. The `.env.example` also references Render terminology. Both are valid deployment paths; this checklist covers both. + +--- + +## Pre-deploy checklist + +### Node runtime +- [ ] Render/Hyperlift service is pinned to Node 20.20.2 (`NODE_VERSION=20.20.2` in service env) +- [ ] `node --version` in build logs shows `v20.x.x` +- [ ] Not running Node 24 (better-sqlite3 incompatible) + +### Build +- [ ] `npm ci` completes without errors +- [ ] `npm audit --audit-level=high` passes (mirrors CI gate) +- [ ] `tsc --noEmit` passes (TypeScript type check) +- [ ] `vite build` produces `dist/index.html` and `dist/assets/` +- [ ] `dist/` is present in the deploy artifact or built during deploy + +### Environment variables — backend (Render/Hyperlift runtime) +- [ ] `NODE_ENV=production` +- [ ] `JWT_SECRET` — set, non-empty, ≥32 chars random hex +- [ ] `STRIPE_SECRET_KEY` — starts with `sk_live_` (not `sk_test_` in prod) +- [ ] `STRIPE_WEBHOOK_SECRET` — starts with `whsec_` +- [ ] `STRIPE_CREATOR_PRICE_ID` — starts with `price_` +- [ ] `STRIPE_STUDIO_PRICE_ID` — starts with `price_` (different from Creator price) +- [ ] `GEMINI_API_KEY` — set and non-empty +- [ ] `FRONTEND_URL` — set to `https://spectracleanse.com` (no trailing slash) +- [ ] `DB_PATH` — set to path on persistent volume (e.g. `/data/spectra.db`) +- [ ] `PORT` — set to `3001` (or whatever Render/Hyperlift expects) +- [ ] `ENABLE_MOCK_CHECKOUT` — NOT set to `true` in production + +### Environment variables — frontend (Vite build environment) +- [ ] `VITE_API_URL` — set to `https://api.spectracleanse.com` or the correct backend URL (⚠️ note: `.env.example` lists `VITE_BACKEND_URL` but `app.tsx` reads `VITE_API_URL` — use `VITE_API_URL`) +- [ ] This variable must be available at **build time**, not just runtime + +### CORS and origins +- [ ] `FRONTEND_URL` and/or `ALLOWED_ORIGINS` covers the production frontend origin +- [ ] No wildcard (`*`) origins in production +- [ ] Stripe webhook endpoint (`/api/stripe-webhook`) is accessible from Stripe IPs (not CORS-blocked — Stripe uses server-to-server) + +### Database persistence +- [ ] Render persistent disk is mounted at the path matching `DB_PATH` +- [ ] Disk persists across deploys and restarts (not ephemeral) +- [ ] `uploads/` directory is either on the persistent disk or is intentionally ephemeral (processed files are deleted immediately after download) + +### Stripe configuration +- [ ] Stripe webhook is configured in the Stripe Dashboard to send `checkout.session.completed` and `customer.subscription.deleted` events +- [ ] Webhook endpoint URL is `https://[your-backend-domain]/api/stripe-webhook` +- [ ] Webhook signing secret matches `STRIPE_WEBHOOK_SECRET` env var +- [ ] Creator and Studio price IDs match active subscription products in Stripe Dashboard + +--- + +## Smoke tests (run after deploy) + +Run these against the live deployment, not localhost. + +```bash +# 1. Health check +curl -sf https://api.spectracleanse.com/api/health +# Expected: {"status":"ok","time":"..."} + +# 2. Unauthenticated /api/me → 401 +curl -sf https://api.spectracleanse.com/api/me +# Expected: {"error":"Missing or malformed Authorization header"} with HTTP 401 + +# 3. Unknown API route → 404 JSON (not HTML) +curl -sf https://api.spectracleanse.com/api/nonexistent +# Expected: {"error":"API route not found","path":"/api/nonexistent"} with HTTP 404 + +# 4. Frontend SPA loads +curl -sf https://spectracleanse.com | grep -q "SpectraCleanse" +# Expected: HTML containing app content (not a blank page or 500) + +# 5. Auth flow (manual) +# Register a new account → login → verify /api/me returns { user: { plan: 'free' }, usage: { thisMonth: 0 } } + +# 6. Checkout smoke test (manual, use Stripe test mode for staging) +# Trigger upgrade flow → verify Stripe checkout session URL is returned + +# 7. SEO generation smoke test (manual) +# POST /api/generate-seo with valid auth and payload → verify JSON response with title/description/tags + +# 8. Upload smoke test (manual with .mp4 or .m4a test file) +# POST /api/process with valid auth and a test file → verify response headers X-Forensic-* are present + +# 9. Unsupported format rejection (manual with .mp3 to /api/process) +# Expected: HTTP 422 with reason: "unsupported_file_type" and guidance to use Quick Cleanse +``` + +--- + +## Known risks + +| Risk | Mitigation | +|---|---| +| `VITE_API_URL` missing from build env | Frontend throws at runtime on first API call. Set in Render build environment settings, not just runtime env. | +| Node version drift | CI uses Node 18; production runs Node 20.20.2. Confirm no `package-lock.json` inconsistency. | +| SQLite on ephemeral disk | All user accounts and jobs lost on redeploy. Verify persistent disk is mounted before go-live. | +| Stripe webhook mis-signed | All plan upgrades silently dropped. Verify `STRIPE_WEBHOOK_SECRET` matches Dashboard signing secret exactly. | +| ExifTool cold start | First request after deploy may be slow (ExifTool Perl subprocess boot). Not a blocking issue but monitor. | +| `uploads/` directory missing | Multer will fail on first upload. `fs.ensureDirSync('uploads')` in `server.js` handles this at startup. | + +--- + +## Rollback plan + +1. In Render/Hyperlift dashboard, identify the previous successful deploy. +2. Trigger a redeploy from the previous commit SHA. +3. Verify `/api/health` returns `{"status":"ok"}` within 60 seconds. +4. If the DB was migrated, restore from the most recent backup before rollback. +5. Notify affected users if auth or billing was impacted. + +Note: SpectraCleanse currently has no automated DB migration system. Schema changes in `server.js` (the `db.exec(CREATE TABLE IF NOT EXISTS ...)` block) are additive and idempotent — safe to re-run. Destructive schema changes require a manual migration plan. + +--- + +## Output format + +``` +## Release summary +[What's changing in this deploy] + +## Pre-deploy checklist +[Completed checklist items — mark each ✅ or ❌ with notes] + +## Env checklist +[All env vars verified ✅ or flagged ❌] + +## Build checklist +[npm ci, tsc, vite build status] + +## Smoke tests +[Results of each smoke test] + +## Known risks +[Any risks specific to this release] + +## Rollback plan +[Steps to revert if this deploy causes a regression] + +## Go / No-Go +[GO if all blocking items are clear. NO-GO with explicit reason if any blocking item fails.] +``` + +--- + +## Do not assume +- Do not assume `VITE_BACKEND_URL` is the correct frontend env var — the code uses `VITE_API_URL`. +- Do not assume Node 18 compatibility from CI means Node 18 is the production target — production uses 20.20.2. +- Do not assume Render's default disk is persistent — it must be explicitly configured. +- Do not assume mock checkout is safe to leave enabled in production. diff --git a/spectracleanse-engineering/skills/spectracleanse-documentation/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-documentation/SKILL.md new file mode 100644 index 0000000..8ed1117 --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-documentation/SKILL.md @@ -0,0 +1,159 @@ +# spectracleanse-documentation + +**Use this skill when writing or updating any SpectraCleanse documentation** — README, deployment guides, env var references, format support docs, billing/plan docs, API docs, release notes, or user-facing copy. Every doc produced by this skill must distinguish "currently supported" from "planned" and must be verified against actual code before making specific claims. + +--- + +## SpectraCleanse context + +SpectraCleanse has a small but real documentation footprint: +- `README.md` — product overview, pricing, deployment notes, Docker instructions +- `docs/manual-qa-checklist.md` — manual QA process covering local, API, auth, billing, upload, metadata, SEO, ExifTool, and Docker flows +- `deploy.md` — Spaceship Hyperlift deployment guide +- `PIPELINE.md` — GitHub Actions CI/CD pipeline documentation + +Known accuracy issues to preserve (do not silently "fix" without flagging): +- `.env.example` lists `VITE_BACKEND_URL` but `app.tsx` reads `VITE_API_URL`. Any doc that references the frontend env var must use `VITE_API_URL` and note the discrepancy in `.env.example`. +- `README.md` says "drag in any MP3, WAV, FLAC, M4A, or MP4 file" in the marketing copy but the server only supports MP4 and M4A for Full Server Cleanse. Quick Cleanse handles MP3 browser-side. WAV/FLAC are rejected at the processor. This overclaim must be corrected in any updated README. +- `PIPELINE.md` documents CI using Node 18, but `.nvmrc` pins Node 20.20.2. Any deploy doc must reference Node 20.20.2 for production. + +--- + +## Rules + +1. **Never overclaim format support.** The only formats confirmed from `cleansePolicy.js`: + - Quick Cleanse (browser): MP3 only + - Full Server Cleanse: MP4, M4A only + - WAV, FLAC: Multer-accepted but processor-rejected (422) + +2. **Always distinguish current from planned.** Use these markers: + - ✅ Currently supported (verified from code) + - 🔄 In development (confirmed work in progress) + - 📋 Planned (documented intent, not yet built) + - ❌ Not supported + +3. **Verify endpoint names from `server.js` before documenting.** Confirmed endpoints: + - `GET /api/health` + - `POST /api/register` + - `POST /api/login` + - `GET /api/me` + - `POST /api/create-checkout-session` + - `POST /api/stripe-webhook` + - `POST /api/process` + - `POST /api/process-batch` + - `POST /api/generate-seo` + - `GET /api/download/:token` + +4. **Verify env var names before documenting.** Backend vars: `PORT`, `FRONTEND_URL`, `ALLOWED_ORIGINS`, `JWT_SECRET`, `DB_PATH`, `STRIPE_SECRET_KEY`, `STRIPE_WEBHOOK_SECRET`, `STRIPE_CREATOR_PRICE_ID`, `STRIPE_STUDIO_PRICE_ID`, `ENABLE_MOCK_CHECKOUT`, `GEMINI_API_KEY`, `NODE_ENV`. Frontend build var: `VITE_API_URL` (not `VITE_BACKEND_URL`). + +5. **Prefer clear user-facing language** over technical jargon in user docs. "Removes embedded tags that signal AI-generated origin" not "writes null to QuickTime atom fields." + +6. **Always note when a behavior is dev-mode-only.** Mock checkout (`ENABLE_MOCK_CHECKOUT=true`) and the dev-only JWT fallback (`dev_jwt_secret_change_me`) must never appear in production docs as if they are production behavior. + +--- + +## Document templates + +### README update + +Audience: developers and self-hosters discovering the project. +Must include: what the product does, how to run locally, required env vars, deployment notes, format support matrix (accurate), pricing, contact. +Must not include: fake endpoints, unsupported formats claimed as supported, production secrets, Node 18 as the target (it's 20.20.2). + +### Deployment doc + +Audience: engineer or founder setting up a new Render/Hyperlift deployment. +Must include: Node version pin (20.20.2), build command (`npm ci && tsc && vite build`), start command (`node server.js`), all required env vars, persistent disk configuration for SQLite, CORS setup, Stripe webhook registration, smoke test commands. +Must not include: `VITE_BACKEND_URL` (use `VITE_API_URL`), `ENABLE_MOCK_CHECKOUT=true` in production instructions. + +### Env var reference + +See `docs/env-and-secrets-reference.md` for the authoritative list with annotations. +When updating: verify each var exists and is actually read from `server.js` or `app.tsx`. Do not document vars from `.env.example` that are commented out or unused. + +### Supported formats doc + +See `docs/supported-formats-and-processing-boundaries.md` for the format matrix template. +Any claim about a specific format must be verifiable from `server/cleansePolicy.js`. + +### API documentation + +Audience: developers integrating or testing the API. +For each endpoint, document: method + path, auth requirement, request format, success response, error responses (with HTTP codes and reason fields). +Do not invent response fields — verify from `server.js`. + +### Release notes + +Format: +``` +## v[X.Y.Z] – [date] +### Added +- [New feature — be specific about what changed and why] +### Changed +- [Behavior change — include what the old behavior was] +### Fixed +- [Bug fix — describe what was broken and what the user experienced] +### Known issues +- [Anything shipped with a known limitation] +``` + +### User-facing copy (upgrade modal, error messages, format rejection messages) + +Current messages verified from `server.js`: +- Unsupported format: "Full Server Cleanse currently supports MP4 and M4A only. Use Quick Cleanse (Browser) for MP3, or convert WAV/FLAC to M4A/MP4." +- Free tier limit: "Free accounts are limited to 3 files per month. Upgrade to continue processing." +- Batch restriction: "Batch processing requires Creator or Studio plan." +- File too large: "File too large (max 500MB)" + +When updating user-facing messages, ensure the `server.js` error response text and the frontend modal/banner copy stay in sync. + +### Trust / privacy explanation + +What to cover: what metadata is removed (all ExifTool-readable tags except those in `isBenign()` and `isAllowedInjected()`), what is injected (user-supplied metadata via `buildMetaToWrite()`), how long files are retained (deleted after download), what is stored in the DB (filename, user_id, platform, created_at — no file content), and how provenance markers are detected (MARKER_RULES in `server/metadataRules.js`). + +--- + +## Checklist + +- [ ] Have format claims been verified from `server/cleansePolicy.js`? +- [ ] Have endpoint names been verified from `server.js`? +- [ ] Have env var names been verified from `server.js` and `app.tsx`? +- [ ] Does the doc distinguish currently supported from planned? +- [ ] Is `VITE_API_URL` used (not `VITE_BACKEND_URL`) where the frontend env var is mentioned? +- [ ] Is Node 20.20.2 referenced as the production target (not Node 18)? +- [ ] Is mock checkout described as dev-only? +- [ ] Is the format support matrix accurate (MP3=Quick Cleanse only, MP4/M4A=Server Cleanse, WAV/FLAC=rejected)? +- [ ] Are all pricing figures current ($9.99 Creator, $29.99 Studio, 3 files/month free)? + +--- + +## Output format + +``` +## Target doc +[Which document is being created or updated] + +## Audience +[Who will read this: developer, user, self-hoster, founder] + +## Current facts (from code) +[Verified facts that the doc must reflect] + +## Draft content +[The actual documentation content] + +## Accuracy notes +[Any claim that required code verification, and what was found] + +## Follow-up verification needed +[Any claim that could not be verified from available code and must be checked before publishing] +``` + +--- + +## Do not assume +- Do not claim WAV or FLAC are supported by Full Server Cleanse. +- Do not use `VITE_BACKEND_URL` as the frontend env var — the code reads `VITE_API_URL`. +- Do not document Node 18 as the production runtime target. +- Do not document features as "current" if they are not confirmed in the codebase (e.g., email verification, password reset, OAuth). +- Do not document the Stripe webhook path as anything other than `/api/stripe-webhook`. diff --git a/spectracleanse-engineering/skills/spectracleanse-founder-operating-review/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-founder-operating-review/SKILL.md new file mode 100644 index 0000000..e8842cd --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-founder-operating-review/SKILL.md @@ -0,0 +1,111 @@ +# spectracleanse-founder-operating-review + +**Use this skill instead of a daily standup or sprint review.** For a solo founder shipping SpectraCleanse, this replaces generic planning with a concrete engineering and product review: what shipped, what's in flight, what risks need attention today, and what the single highest-leverage next move is. + +--- + +## SpectraCleanse context + +SpectraCleanse is a solo-founder product. The engineering surface includes: +- A 637-line `server.js` monolith (auth, billing, upload, processing, SEO proxy, SPA serving) +- A single-file `app.tsx` React frontend +- SQLite database on Render/Hyperlift with a persistent disk requirement +- ExifTool-based server-side processing for MP4/M4A +- Browser-side MP3 processing via `browser-id3-writer` +- Stripe subscription billing (Creator $9.99, Studio $29.99, Free 3 files/month) +- Gemini `gemini-2.5-flash` for SEO metadata generation +- No automated tests beyond a CI smoke test of `/api/health` +- Manual QA via `docs/manual-qa-checklist.md` + +High-risk areas to watch every week: +- Stripe webhook delivery (plan upgrades silently dropped if webhook misconfigured) +- SQLite persistence (data lost if Render disk is not mounted) +- ExifTool correctness (core product guarantee — no automated regression detection) +- Frontend API URL (`VITE_API_URL` — must be set at build time, not runtime) +- Node version pin (20.20.2 — CI uses 18, production uses 20.20.2) + +--- + +## Review protocol + +To produce a useful review, gather: +1. Recent commits (last 24–72 hours): `git log --oneline -20` +2. Current deploy state: is the last CI run green? Is Render/Hyperlift showing the latest commit? +3. Any open issues or PRs in GitHub +4. Any Stripe webhook events that failed in the Stripe Dashboard (Developers → Webhooks → event log) +5. Any user-reported issues (email, support channel) +6. Current `docs/manual-qa-checklist.md` completion state (if a deploy was recent) + +If live tooling is not connected, ask the founder to provide: +- `git log --oneline -20` output +- CI status for the last push to `main` +- Any error reports or symptoms they're aware of + +--- + +## Output format + +``` +## Shipped (last 24–72 hours) +[What code changes landed. Cite commit messages or file names. Note if any touched high-risk areas: server.js, cleansePolicy.js, processor.js, Stripe webhook, CORS config.] + +## In progress +[What is currently being worked on. Note any half-finished changes that could cause issues if the dev machine is lost or the developer is unavailable.] + +## Risks +[Active risks right now — ranked by severity. Examples: +- No tests for Stripe webhook processing +- `VITE_API_URL` discrepancy with .env.example not yet fixed +- CI uses Node 18, production uses Node 20.20.2 +- WAV/FLAC accepted by Multer but rejected by processor (bandwidth waste + user confusion) +- README still claims WAV/FLAC support in marketing copy] + +## Friction (user-facing or billing) +[Any known friction in the product: error messages that are confusing, upgrade flow issues, format rejection UX, auth edge cases, slow processing on large files] + +## Highest-leverage next move +[The single most valuable thing to work on next. Consider: what reduces the most risk? what unblocks revenue? what improves user trust? +Examples of high-leverage moves for SpectraCleanse right now: +- Fix VITE_API_URL in .env.example to match app.tsx +- Pin CI to Node 20.20.2 to match production +- Add Phase 1 smoke tests to CI (auth round-trip, format rejection, mock checkout) +- Correct README format support claims (WAV/FLAC overclaim) +- Add Stripe webhook event log monitoring +- Document persistent disk setup clearly in deploy.md] + +## Next 3 actions +[Concrete, completable within 24 hours: +1. [Action] — [estimated time] — [why now] +2. [Action] — [estimated time] — [why now] +3. [Action] — [estimated time] — [why now]] + +## Blockers +[Anything that prevents progress: missing credentials, unclear requirements, waiting on third-party, infra that needs manual action] +``` + +--- + +## High-leverage backlog items for SpectraCleanse (current state, May 2026) + +These are standing items that should appear in reviews until resolved: + +| Item | Risk if unresolved | Effort | +|---|---|---| +| Fix `VITE_API_URL` in `.env.example` | Developer confusion, broken local dev | 5 min | +| Pin CI to Node 20.20.2 | Node version drift between CI and production | 10 min | +| Correct README format claims (WAV/FLAC overclaim) | User trust, support load | 15 min | +| Add Phase 1 smoke tests to CI | No automated regression detection for auth, format gating, checkout | 2–4 hours | +| Document persistent disk setup in `render-deploy-checklist.md` | Data loss on redeploy if disk not mounted | 30 min | +| Add Stripe webhook retry monitoring | Silent plan upgrade failures | Depends on Stripe Dashboard access | +| WAV/FLAC: fix Multer accept-list or add processor support | Bandwidth waste + user confusion | Medium effort | +| Email verification | Account security | Medium effort | +| ExifTool test fixtures + integration tests | Core product guarantee unverified | 1–2 days | + +--- + +## Do not assume +- Do not assume CI is green — check the actual run status before reporting deploy state. +- Do not assume the Stripe webhook is delivering correctly — check the Stripe event log. +- Do not assume the production DB is on a persistent disk — verify the Render/Hyperlift disk configuration. +- Do not generate a "shipped" list from memory — use `git log` output or ask the founder. +- Do not omit known risks to make the review look clean — the point of this review is to surface real issues. diff --git a/spectracleanse-engineering/skills/spectracleanse-incident-response/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-incident-response/SKILL.md new file mode 100644 index 0000000..18d15fb --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-incident-response/SKILL.md @@ -0,0 +1,176 @@ +# spectracleanse-incident-response + +**Use this skill when SpectraCleanse has a production issue** — broken deploy, auth outage, Stripe failure, Gemini failure, processing failures, CORS breakage, or database issues. This skill is about triage speed and user-impact containment. It separates known facts from guesses. + +--- + +## Incident classes and SpectraCleanse-specific causes + +| Class | Likely first signal | Immediate check | +|---|---|---| +| Broken Render/Hyperlift deploy | `/api/health` → 502/504 or timeout | Render/Hyperlift build logs; startup crash | +| Build/runtime mismatch | Server exits immediately after deploy | Check Node version; better-sqlite3 compile error | +| CORS failure | Browser console: "CORS blocked" | `FRONTEND_URL`/`ALLOWED_ORIGINS` env vars; CORS header in response | +| Auth outage | All requests → 401 | `JWT_SECRET` rotation; middleware regression | +| Checkout outage | Upgrade flow returns error | Stripe env vars; `STRIPE_CONFIGURED` check; Stripe Dashboard status | +| Gemini outage/schema failure | SEO generation fails or returns empty strings | Gemini API status; `GEMINI_API_KEY` validity; response JSON shape | +| Processing failures | `/api/process` → 500 | ExifTool logs; `exiftoolFailureError` reason; file copy path | +| Unsupported-format spike | 422 errors on user uploads | Format in request; `cleansePolicy.js` changes | +| Database/persistence issue | 500 on auth or job recording | `DB_PATH` env var; disk mount; WAL corruption | +| Frontend/backend URL mismatch | Frontend can't reach API | `VITE_API_URL` build env var; network tab in browser | + +--- + +## Triage protocol + +**Always do these first:** + +1. **Check `/api/health`**: `curl https://api.spectracleanse.com/api/health` + - Returns `{"status":"ok"}` → backend is up; the issue is likely specific to a route or env. + - Hangs or returns 502/504 → backend is not running; check Render deploy logs. + - Returns HTML → reverse proxy issue or SPA fallback catching `/api/` routes (check the `app.use('/api', ...)` 404 handler). + +2. **Check Render/Hyperlift logs**: Look for: + - `FATAL:` lines at startup (missing `JWT_SECRET`, Stripe not configured, CORS origin missing) + - ExifTool errors (Perl subprocess failure) + - SQLite errors (cannot open database, disk I/O error) + - `SIGTERM` followed by `exiftool.end()` (graceful shutdown — expected during redeploy) + +3. **Establish user impact**: Are all users affected or only specific plans/actions? + +4. **Separate facts from guesses**: Document what you have confirmed vs. what you suspect. + +--- + +## Incident response for each class + +### Broken Render/Hyperlift deploy +**Immediate check**: Render deploy log. Common causes: +- `npm ci` fails → dependency issue or package-lock mismatch +- `tsc` fails → TypeScript errors introduced in the PR +- `vite build` fails → import error or Vite config issue +- Server starts but `/api/health` fails → startup `process.exit(1)` from missing env var +- `better-sqlite3` native compilation fails → Node version incompatibility (check for Node 24 being used instead of 20.20.2) + +**Containment**: Rollback to the previous deploy SHA in Render/Hyperlift dashboard. + +### CORS failure +**Symptoms**: Browser console shows `CORS blocked for origin: https://spectracleanse.com`. +**Causes**: +1. `FRONTEND_URL` env var is missing or has a typo (trailing slash, `http://` instead of `https://`) +2. `ALLOWED_ORIGINS` env var was cleared or misconfigured +3. A deploy changed how `allowedOrigins` is constructed in `server.js` +4. The Stripe webhook URL was changed to a path the CORS config doesn't cover (not applicable — Stripe is server-to-server) + +**Fix**: Correct `FRONTEND_URL` in Render env → redeploy or restart service. + +**Note**: CORS errors appear in browser console, not in server logs. Always check browser network tab for the actual `Origin` header being sent and the `Access-Control-Allow-Origin` header (or lack thereof) in the response. + +### Auth outage (all users → 401) +**Causes**: +1. `JWT_SECRET` was rotated in the deploy — all existing tokens are invalid +2. `requireAuth` middleware was changed (header parsing regression) +3. Server restarted with a different `JWT_SECRET` value + +**Containment**: If `JWT_SECRET` was rotated, all users must re-login. This cannot be undone without reverting the secret — which would invalidate the new tokens. + +**Fix**: Ensure `JWT_SECRET` is stable across deploys. Use a persistent env var, not a generated-at-startup value. + +### Checkout outage +**Symptoms**: `POST /api/create-checkout-session` returns error or mock redirect in production. +**Causes**: +1. `STRIPE_CONFIGURED` is false → one or more of the four Stripe env vars is missing +2. `FRONTEND_URL` is missing → `success_url`/`cancel_url` are broken → Stripe rejects session creation +3. Stripe API is down → check https://status.stripe.com +4. Price ID mismatch → `priceId` is undefined → `/api/create-checkout-session` falls through to mock or 503 + +**Containment**: If Stripe is down, show a maintenance message. Do not attempt to process payments offline. + +### Gemini outage / schema failure +**Symptoms**: `/api/generate-seo` returns 500, 502, or empty `{ title: '', description: '', tags: '' }`. +**Causes**: +1. `GEMINI_API_KEY` invalid or quota exceeded → 400/403 from Gemini → 500 to client +2. Gemini returns non-JSON response → `JSON.parse` fails → 502 +3. Gemini returns JSON but with different schema keys → fields are empty strings (type-checked but wrong key) +4. Gemini API is degraded → check https://status.cloud.google.com + +**Containment**: The SEO generation endpoint is separate from processing. Processing still works without Gemini. Communicate that AI metadata generation is temporarily unavailable if Gemini is down. + +### Processing failures (`/api/process` → 500) +**Causes**: +1. ExifTool subprocess failure (`exiftoolFailureError`) — check for ExifTool version issues or malformed file +2. File copy failure (`fs.copy` error) — check disk space in `uploads/` +3. Missing output file path — verify `uploads/` directory exists (`fs.ensureDirSync` runs at startup) +4. Unhandled exception in `processor.js` — check for new error paths not covered by `try/catch` + +**Containment**: Processing failures are per-file and do not affect other users. The upload file is cleaned up on error paths. No action needed unless failure rate is elevated. + +### Database / persistence issue +**Symptoms**: 500 on login, register, or job recording. SQLite errors in logs. +**Causes**: +1. `DB_PATH` points to ephemeral disk that was wiped on redeploy → all users lost → critical +2. SQLite WAL file corruption → use `PRAGMA integrity_check;` to diagnose +3. Disk full → SQLite write fails + +**Containment**: If DB is on ephemeral disk and was wiped, user data is lost. Verify persistent disk mount immediately. Restore from most recent backup if one exists. + +### Frontend/backend URL mismatch +**Symptoms**: Browser network tab shows requests going to `undefined` or `localhost:3001` in production. +**Cause**: `VITE_API_URL` was not set in the **build environment** before `vite build` ran. The frontend built with an empty API URL. +**Fix**: Set `VITE_API_URL` in Render's build environment (not runtime env) and trigger a rebuild. + +--- + +## Output format + +``` +## Incident title +[Short description: "SpectraCleanse: [class] – [date/time]"] + +## Severity +[P0: all users blocked | P1: major feature down | P2: degraded | P3: minor] + +## User impact +[Who is affected and what they cannot do] + +## Timeline +[Time detected, time of last known-good deploy, events since] + +## Known facts (confirmed) +[What has been directly observed: logs, HTTP responses, env var values] + +## Hypotheses (not confirmed) +[Ranked list of likely causes — mark clearly as unconfirmed] + +## Immediate containment +[What to do right now to stop user impact spreading: rollback, disable feature, show maintenance] + +## Investigation steps +[Ordered diagnostic commands and log checks] + +## Fix +[The specific code or config change that resolves the issue] + +## Verification +[How to confirm the fix worked: smoke tests, curl commands] + +## Prevention items +[What to add to the deploy checklist or monitoring to catch this earlier next time] +``` + +--- + +## Do not assume +- Do not say "the server is up" without checking `/api/health` first. +- Do not say "Stripe is configured" without verifying all four env vars. +- Do not say "the DB is intact" without querying it directly. +- Do not assume a CORS error means the backend is down — the backend may be running fine with misconfigured origins. +- Do not say "the Gemini API key is valid" without observing the actual HTTP response from the Gemini API. + +--- + +## Escalate if +- SQLite DB was on an ephemeral disk and has been wiped — user data loss situation. +- `JWT_SECRET` was rotated with no warning — all sessions invalidated simultaneously. +- A processing bug silently corrupted output files (metadata was not removed, or wrong metadata was injected) — trust and legal implications. +- Stripe charges succeeded but plan upgrades were not applied to the DB — revenue collected without service delivered. diff --git a/spectracleanse-engineering/skills/spectracleanse-processing-pipeline/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-processing-pipeline/SKILL.md new file mode 100644 index 0000000..d9a4a1d --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-processing-pipeline/SKILL.md @@ -0,0 +1,192 @@ +# spectracleanse-processing-pipeline + +**Use this skill when reviewing, debugging, or designing SpectraCleanse's file-processing behavior** — format support decisions, ExifTool operations, browser-side vs. server-side cleanse, metadata removal and re-injection, SEO generation, and verification output. + +--- + +## SpectraCleanse context (confirmed from repo) + +### Processing modes + +**Quick Cleanse (browser-side)** +- Runs entirely in the browser — no file is uploaded to the server. +- Only supported format: `.mp3` (via `browser-id3-writer`). +- Metadata analysis uses `music-metadata` with graceful parseError fallback. +- No ExifTool involved. No server processing. No job recorded in the DB. +- File size limit is browser memory, not the 500 MB server limit. + +**Full Server Cleanse (`/api/process`)** +- File is uploaded via Multer to `uploads/` on the server. +- Supported formats: `.mp4`, `.m4a` only (defined in `server/cleansePolicy.js` `CLEANSE_POLICY.server.supportedExtensions`). +- MIME accept-list (Multer): `audio/mpeg`, `audio/wav`, `audio/x-wav`, `audio/flac`, `audio/x-flac`, `audio/mp4`, `audio/m4a`, `video/mp4`. +- ⚠️ Multer accepts WAV and FLAC MIME types but the processor rejects them with HTTP 422. This means WAV/FLAC files are uploaded, then rejected — wasting bandwidth and disk I/O until deletion. +- Processing: `server/processor.js` → `exiftool-vendored` (wraps ExifTool Perl CLI). +- Plan enforcement: free users capped at 3 files/month. 402 returned if limit exceeded. +- Output file: cleansed copy returned via `res.download()`, then deleted immediately. +- Job recorded in `jobs` table for usage counting. + +**Batch processing (`/api/process-batch`)** +- Paid plans (creator, studio) only. Free plans receive HTTP 403. +- Up to 20 files per request. 2 GB total batch size guard. +- Processing is sequential (not parallel) within one request. +- Each output file gets a one-time download token (`server/downloadTokens.js`). +- Download via `GET /api/download/:token` (authenticated, single-use). + +**SEO generation (`/api/generate-seo`)** +- Separate from processing — can be called independently. +- Sends a prompt to `gemini-2.5-flash` via the Generative Language REST API. +- Structured JSON output: `{ title: string, description: string, tags: string }`. +- Prompt is built by `buildSeoPrompt()` from user-supplied fields: title, artist, genre, platform, description, tags, lyrics, vibe, or a raw `promptText`. +- All inputs sanitized by `asCleanText()` (max lengths enforced). +- Response parsed with `JSON.parse(rawText)` inside try/catch. +- Returns 502 if Gemini returns malformed JSON. Returns 500 if Gemini call fails. +- No caching, no retry, no fallback model. + +### Metadata rules + +- `server/metadataRules.js`: defines `MARKER_RULES` (provenance marker detection), `isBenign()` (tags safe to leave in output), `isAllowedInjected()` (tags SpectraCleanse intentionally writes). +- `detectMarkers()`: scans ExifTool tags against MARKER_RULES. Returns hits with `ruleId`, `category`, `severity`, `matchedTag`, `matchedValue`. +- `verifyFinalState()`: runs after ExifTool write; checks for unexpected descriptive tags and residual provenance markers. +- `buildMetaToWrite()`: constructs the ExifTool write map from user metadata. Writes to QuickTime, ItemList, and Keys atom families for MP4/M4A. + +### QuickTime timestamp handling +- Six QuickTime timestamp fields (`CreateDate`, `ModifyDate`, `TrackCreateDate`, etc.) are zeroed to `0000:00:00 00:00:00` to remove temporal provenance markers. +- This is intentional — timestamps are a known provenance signal. + +### Response headers from `/api/process` +- `X-Forensic-Removed`: count of removed tags +- `X-Forensic-Tags`: JSON array of removed tag names (capped at 50) +- `X-Forensic-Status`: e.g. `"Sanitized"` +- `X-Forensic-Report`: full report JSON +- `X-Process-Run-Id`: unique run identifier +- `X-Output-SHA256`: SHA-256 of the output file (stage: `after_timestamp_write_final`) +- `X-Download-Name`: suggested filename for download +- `X-Usage-This-Month`: jobs consumed this month +- `X-Usage-Limit`: limit (`3` for free, `"unlimited"` for paid) + +--- + +## Format support matrix (from code — not assumed) + +| Format | Quick Cleanse (browser) | Full Server Cleanse | SEO Generation | Batch | +|---|---|---|---|---| +| MP3 | ✅ (`browser-id3-writer`) | ❌ 422 | ✅ (any format) | ❌ (server cleanse required) | +| MP4 | ❌ | ✅ | ✅ | ✅ (paid) | +| M4A | ❌ | ✅ | ✅ | ✅ (paid) | +| WAV | ❌ | ❌ 422 (uploaded but rejected) | ✅ | ❌ | +| FLAC | ❌ | ❌ 422 (uploaded but rejected) | ✅ | ❌ | +| Other | ❌ | ❌ MIME-blocked by Multer (415) | ✅ (if metadata provided) | ❌ | + +**To verify format support**: check `server/cleansePolicy.js` `CLEANSE_POLICY` object. This is the authoritative source. If a format is not listed there, it is not supported by Full Server Cleanse regardless of what Multer accepts. + +--- + +## Checklist + +**Format gating** +- [ ] Is the format being evaluated listed in `CLEANSE_POLICY.server.supportedExtensions`? +- [ ] Is there a corresponding MIME type in `ALLOWED_MIME` in `server.js`? +- [ ] Does `isServerSupportedFormat()` correctly identify the format by both extension AND MIME? +- [ ] Is the user-facing error message for unsupported formats accurate (currently: "Full Server Cleanse currently supports MP4 and M4A only")? + +**ExifTool safety** +- [ ] Is the ExifTool operation operating on a **copy** of the uploaded file (not the original)? +- [ ] Are all ExifTool calls properly `await`ed? +- [ ] Is error handling present for `exiftoolFailureError` (500) and `unsupportedCleanseError` (422)? +- [ ] Does the processor still call `verifyFinalState()` after writing and include the result in the report? +- [ ] Are `QUICKTIME_TIMESTAMP_FIELDS` still being zeroed to `ZERO_QUICKTIME_DATE`? + +**Metadata injection** +- [ ] Are all fields going through `cleanText()` before being passed to ExifTool write? +- [ ] Is `buildMetaToWrite()` the only path for writing metadata (no raw user input to ExifTool)? +- [ ] Are atom families (ItemList, QuickTime, Keys) all written for MP4/M4A? + +**Gemini SEO** +- [ ] Is `GEMINI_API_KEY` validated before the API call? +- [ ] Is the structured JSON schema (`title`, `description`, `tags`) still enforced in the request? +- [ ] Is the response parsed defensively (try/catch around JSON.parse)? +- [ ] Are output fields type-checked as strings before returning to the client? +- [ ] Is `buildSeoPrompt` returning empty string for payloads with no useful fields? (Results in 400, not a Gemini call.) + +**File cleanup** +- [ ] Is the input file (`req.file.path`) deleted on all paths (success, 422, 402, copy failure)? +- [ ] Is the output file deleted after `res.download()` completes? +- [ ] Are batch output files registered with `cleanup.registerForCleanup()`? +- [ ] Is `cleanup.deleteImmediately()` called after the download stream ends? + +--- + +## Trust boundaries + +| Input | Trust level | Sanitization | +|---|---|---| +| Uploaded file content | Untrusted | MIME filter (Multer), extension check (cleansePolicy), ExifTool operates on copy | +| `req.body` metadata fields | Untrusted | `asCleanText()` / `cleanText()` with max lengths | +| `req.body.promptText` | Untrusted | `asCleanText(payload.promptText, 4000)` | +| Gemini API response | Semi-trusted | `JSON.parse()` in try/catch, field type-checked before return | +| ExifTool tag output | Semi-trusted | Post-process `verifyFinalState()`, `isBenign()` / `isAllowedInjected()` checks | +| JWT `req.user` | Trusted (verified) | `jwt.verify()` with `JWT_SECRET` in `requireAuth` | + +--- + +## Failure modes + +| Failure | HTTP | Reason field | User impact | +|---|---|---|---| +| Unsupported format (MP3 to server) | 422 | `unsupported_file_type` | Clear message; user directed to Quick Cleanse | +| Unsupported MIME (Multer) | 415 | — | "Unsupported file type: [mime]" | +| Free tier limit reached | 402 | `usage_limit` | Upgrade modal triggered | +| ExifTool failure | 500 | `exiftool_failure` | Generic processing error | +| File copy failure | 500 | — | "File copy failed" | +| Gemini malformed JSON | 502 | — | "Malformed JSON returned by Gemini" | +| Gemini API failure | 500 | — | `err.message` from fetch | +| File too large | 413 | — | "File too large (max 500MB)" | +| Batch: free plan | 403 | `plan_restriction` | "Batch processing requires Creator or Studio plan" | + +--- + +## Privacy and retention + +- Uploaded files are deleted immediately after download (or on cleanup timer). +- No files are stored beyond the request/download lifecycle. +- SQLite `jobs` table stores: user_id, filename (original name), platform, created_at. No file content. +- `uploads/` is ephemeral on Render unless pointed at a persistent disk. + +--- + +## Output format + +``` +## Processing flow summary +[What happens from upload to download for this format/scenario] + +## Supported/unsupported formats +[Table showing current status, verified from cleansePolicy.js] + +## Trust boundaries +[Where user input enters and how it is sanitized] + +## Failure modes +[What can go wrong and what the user/system sees] + +## Security concerns +[Any path where user input could reach ExifTool unsanitized] + +## UX concerns +[Any mismatch between user-facing claims and actual code behavior] + +## Test fixture plan +[Files and metadata needed to test this scenario] + +## Recommended changes +[If reviewing a proposed change — what to fix or improve] +``` + +--- + +## Do not assume +- Do not assume WAV or FLAC are reliably processed — they are Multer-accepted but processor-rejected. +- Do not assume Quick Cleanse handles any format besides MP3. +- Do not assume Gemini always returns valid JSON — the response must be parsed defensively. +- Do not assume cleanup runs synchronously — output files are deleted asynchronously after the download stream. +- Do not claim any format is "supported" without verifying `cleansePolicy.js` `CLEANSE_POLICY`. diff --git a/spectracleanse-engineering/skills/spectracleanse-testing-strategy/SKILL.md b/spectracleanse-engineering/skills/spectracleanse-testing-strategy/SKILL.md new file mode 100644 index 0000000..7a1920d --- /dev/null +++ b/spectracleanse-engineering/skills/spectracleanse-testing-strategy/SKILL.md @@ -0,0 +1,177 @@ +# spectracleanse-testing-strategy + +**Use this skill when designing, auditing, or improving SpectraCleanse's test coverage.** Start by checking what tests actually exist — do not assume a test suite is in place. The goal is a pragmatic test plan calibrated to a solo-founder shipping cadence. + +--- + +## SpectraCleanse context + +### Current test surface (confirmed from repo) + +**CI smoke test** (`.github/workflows/ci.yml`): starts the server with CI-safe env vars and checks that `/api/health` returns HTTP 200. This is the only automated test currently in the pipeline. It uses Node 18 in CI (diverges from production Node 20.20.2). + +**Manual QA checklist** (`docs/manual-qa-checklist.md`): comprehensive manual testing guide covering local setup, API smoke tests, auth, billing/mock checkout, file upload, metadata analysis, SEO generation, ExifTool verification, download flow, Docker, and production readiness. + +**No unit tests, no integration tests, no frontend tests** were found in `package.json` (no test runner configured: no Jest, Vitest, Mocha, or Playwright dependency). + +**`package.json` scripts**: `start`, `dev:backend`, `dev:frontend`, `build`, `preview` — no `test` script. + +--- + +## Highest-risk flows (ranked by severity of undetected failure) + +1. **Stripe webhook processing** — `customer.subscription.deleted` silently failing means paid users are not downgraded. `checkout.session.completed` failing means upgrades are silently dropped. Neither is currently tested. + +2. **Plan enforcement at `/api/process`** — a regression here could let free users exceed their limit or block paid users. Currently tested only manually. + +3. **ExifTool output correctness** — a processor regression could silently fail to remove provenance markers (the core product guarantee). No automated verification of output file metadata. + +4. **JWT expiry and `requireAuth` middleware** — a middleware change could allow unauthenticated access. Currently caught only by CI server startup test. + +5. **CORS in production** — wrong origin config exits the server at startup but a partial misconfiguration (wrong URL) would allow incorrect origins or block the real frontend silently. + +6. **Gemini JSON parsing** — schema drift from Gemini could produce empty strings silently. The try/catch returns 502 but the empty-string case (field present, wrong type) would return 200 with empty strings. + +7. **Format gating** — a change to `cleansePolicy.js` or `ALLOWED_MIME` that opens or closes format support unexpectedly. Not currently tested automatically. + +8. **Free-tier limit counting** — `getMonthlyJobCount` uses `strftime('%Y-%m', created_at)`. A timezone or formatting bug could cause incorrect limit enforcement. + +--- + +## Recommended test suite (phased) + +### Phase 1: Minimal automated safety net (highest ROI, no new dev dependencies) + +Add these as shell-script smoke tests in CI, extending the existing `.github/workflows/ci.yml`: + +**Test 1: `/api/health` returns OK** *(already exists — keep it)* + +**Test 2: Unauthenticated request → 401** +```bash +curl -sf -o /dev/null -w "%{http_code}" http://localhost:3001/api/me | grep -q "401" +``` + +**Test 3: Unknown API route → 404 JSON (not HTML)** +```bash +curl -sf http://localhost:3001/api/nonexistent | python3 -c "import sys,json; d=json.load(sys.stdin); assert 'error' in d" +``` + +**Test 4: Registration → Login → /api/me round-trip** +```bash +# Register +TOKEN=$(curl -sf -X POST http://localhost:3001/api/register \ + -H "Content-Type: application/json" \ + -d '{"email":"ci-test@example.com","password":"ci-password-1234"}' | python3 -c "import sys,json; print(json.load(sys.stdin)['token'])") +# /api/me with token +curl -sf http://localhost:3001/api/me -H "Authorization: Bearer $TOKEN" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['user']['plan'] == 'free'" +``` + +**Test 5: Unsupported file type → 422** +```bash +# POST a text file to /api/process (MIME will be rejected by Multer as 415) +curl -sf -X POST http://localhost:3001/api/process \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@/tmp/test.txt;type=text/plain" \ + -o /dev/null -w "%{http_code}" | grep -qE "415|422" +``` + +**Test 6: Mock checkout returns a URL** +```bash +# Requires ENABLE_MOCK_CHECKOUT=true in CI env +curl -sf -X POST http://localhost:3001/api/create-checkout-session \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"plan":"creator"}' | python3 -c "import sys,json; d=json.load(sys.stdin); assert 'url' in d" +``` + +### Phase 2: Unit tests for critical server logic (Jest or Vitest — add as devDependency) + +Priority targets: +- `planFromPriceId()` — maps price IDs to plan names; edge case: unknown price ID → `'creator'` +- `getMonthlyJobCount()` — boundary: exactly 3 jobs in current month, 0 in previous month +- `buildSeoPrompt()` — empty payload → returns `''`; non-empty payload → returns non-empty string +- `asCleanText()` — max-length truncation; null/undefined input; string with null bytes +- `normalizeExt()` — `.mp4`, `.MP4`, no extension, multiple dots +- `isServerSupportedFormat()` — MP4 by extension, M4A by MIME alias, WAV (should return false) +- `detectMarkers()` — rule matching against mock tag sets +- `verifyFinalState()` — passes with only benign/allowed-injected tags; fails with unexpected tags + +### Phase 3: Integration tests with real ExifTool (requires test fixtures) + +Test fixtures needed (small real files, no content — metadata only): +- `fixtures/test.mp4` — MP4 with known provenance-marker tags pre-embedded +- `fixtures/test.m4a` — M4A with QuickTime timestamps and custom fields +- `fixtures/test.mp3` — MP3 for Quick Cleanse browser-side (not server-side) +- `fixtures/test.wav` — WAV for rejection testing + +For each fixture: +1. Pre-processing: capture ExifTool tag output +2. Run through `processMediaFile()` +3. Post-processing: verify specified tags are removed, `verifyFinalState().passed === true`, injected tags are present with correct values + +### Phase 4: End-to-end browser tests (Playwright — planned) + +Flows to automate: +- Register → login → upload MP4 → download → verify response headers +- Free-tier limit: upload 4 files → 4th triggers upgrade modal +- Upgrade flow (mock checkout) → success → plan badge updates +- Quick Cleanse: drop MP3 → browser-side processing → download +- Unsupported format: drop WAV to server process → error message shown + +--- + +## CI plan + +```yaml +# Extend .github/workflows/ci.yml +- name: Run API smoke tests + run: | + export JWT_SECRET=ci-test-secret + export STRIPE_SECRET_KEY=sk_test_ci_placeholder + export STRIPE_WEBHOOK_SECRET=whsec_ci_placeholder + export STRIPE_CREATOR_PRICE_ID=price_ci_creator + export STRIPE_STUDIO_PRICE_ID=price_ci_studio + export FRONTEND_URL=http://localhost:5173 + export DB_PATH=/tmp/spectra-ci.db + export PORT=3001 + export ENABLE_MOCK_CHECKOUT=true + node server.js & + # wait for ready + sleep 5 + bash ./scripts/smoke-tests.sh # create this file with the Phase 1 tests above +``` + +Note: Node version in CI is currently 18, but production uses 20.20.2. Recommend updating CI to `node-version: "20.20.2"` to match production. + +--- + +## Output format + +``` +## Current test surface +[What tests actually exist — confirmed from package.json and workflow files] + +## Missing coverage +[Ranked list of untested flows by risk] + +## Highest-risk flows +[Top 3–5 flows where an undetected regression would most hurt users] + +## Recommended tests +[Specific test cases with input/expected output] + +## Minimal first test suite +[The smallest set of tests that meaningfully improves safety — runnable this week] + +## Longer-term CI plan +[Phase plan for getting to full integration test coverage] +``` + +--- + +## Do not assume +- Do not assume a test runner (Jest, Vitest, etc.) is installed — check `package.json` devDependencies first. +- Do not assume a `test` script exists in `package.json` — it does not currently. +- Do not assume ExifTool test fixtures exist — they must be created. +- Do not assume CI uses Node 20 — it currently uses Node 18. +- Do not recommend tests that require live Stripe or Gemini API keys in CI — use mocks or env-gated skips.