JSONbored · JSONbored · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026
@@ -156,8 +156,15 @@ GITTENSORY_REVIEW_DRAFT=false
 #                                            #   instance can't be driven through App creation by a random visitor.
 # PORT=8787
 # DATABASE_PATH=/data/gittensory.sqlite     # SQLite file on the mounted data volume; all migrations auto-apply
+# POSTGRES_PASSWORD=change-this-long-random-value  # used by the --profile postgres / --profile pgbouncer services
 # DATABASE_URL=                              # set to postgres://user:pw@host:5432/db to use Postgres instead of
 #                                            #   SQLite (shared DB → multi-instance). Overrides DATABASE_PATH.
+#                                            #   Compose examples:
+#                                            #   postgres://gittensory:<POSTGRES_PASSWORD>@postgres:5432/gittensory
+#                                            #   postgres://gittensory:<POSTGRES_PASSWORD>@pgbouncer:5432/gittensory
+# PGVECTOR_ENABLED=false                     # set true only when using the Postgres pgvector table for RAG.
+#                                            #   Leave false when QDRANT_URL is set; Qdrant remains the preferred
+#                                            #   dedicated vector store for review context at scale.
 REDIS_URL=redis://redis:6379                # REQUIRED for the self-host review runtime. The default compose stack
 #                                            #   starts Redis automatically; override for an external Redis.
 # GITTENSORY_IMAGE=ghcr.io/jsonbored/gittensory-selfhost:latest  # image used by scripts/deploy-selfhost-image.sh;
@@ -176,6 +183,7 @@ REDIS_URL=redis://redis:6379                # REQUIRED for the self-host review
 # QDRANT_DIM=768                             # vector dimension of the collection (768 = nomic-embed-text:latest;
 #                                            #   1024 = bge-m3/mxbai-embed-large). Must match AI_EMBED_MODEL;
 #                                            #   recreate the Qdrant collection when changing this after startup.
+# GITTENSORY_REPORTING_SOURCE_DATABASE_URL=  # optional Postgres reporting reader URL. Defaults to DATABASE_URL.
 # MIGRATIONS_DIR=/app/migrations
 # CRON_INTERVAL_MS=120000                    # maintain/sweep + sync cadence (default ~2 min)
 

@@ -74,9 +74,22 @@ LITESTREAM_REGION=us-east-1`}
       />
       <CodeBlock
         filename=".env"
-        code={`DATABASE_URL=postgres://gittensory:<password>@postgres:5432/gittensory
+        code={`POSTGRES_PASSWORD=<password>
+DATABASE_URL=postgres://gittensory:<password>@pgbouncer:5432/gittensory
 REDIS_URL=redis://redis:6379
-PGVECTOR_ENABLED=true`}
+QDRANT_URL=http://qdrant:6333`}
+      />
+      <CodeBlock lang="bash" code={`docker compose --profile pgbouncer --profile qdrant up -d`} />
+
+      <h2>One-time SQLite to Postgres copy</h2>
+      <p>
+        Existing SQLite installs can copy state into a fresh Postgres database with the bundled
+        migrator. It dry-runs by default and only commits when <code>--execute</code> is present.
+      </p>
+      <CodeBlock
+        lang="bash"
+        code={`npm run selfhost:postgres:migrate -- --sqlite /data/gittensory.sqlite --postgres-url "$DATABASE_URL"
+npm run selfhost:postgres:migrate -- --sqlite /data/gittensory.sqlite --postgres-url "$DATABASE_URL" --execute`}
       />
 
       <h2>Restore checks</h2>

@@ -331,17 +331,20 @@ services:
       - ./scripts/export-grafana-reporting-db.sh:/export-grafana-reporting-db.sh:ro
     environment:
       # Default SQLite app DB path maps app /data/gittensory.sqlite to exporter /appdb/gittensory.sqlite.
-      # If you override DATABASE_PATH, set this to the matching /appdb/<file> path. DATABASE_URL/Postgres
-      # deployments export an empty dashboard-safe DB so Grafana can still start until a SQL exporter is added.
+      # If you override DATABASE_PATH, set this to the matching /appdb/<file> path. When DATABASE_URL points at
+      # Postgres, the exporter reads Postgres with psql and still writes the same redacted SQLite reporting DB
+      # for Grafana. Set GITTENSORY_REPORTING_SOURCE_DATABASE_URL only if the reporting reader uses a different URL.
       GITTENSORY_REPORTING_SOURCE_DB: "${GITTENSORY_REPORTING_SOURCE_DB:-/appdb/gittensory.sqlite}"
+      GITTENSORY_REPORTING_SOURCE_DATABASE_URL: "${GITTENSORY_REPORTING_SOURCE_DATABASE_URL:-}"
+      DATABASE_URL: "${DATABASE_URL:-}"
       GITTENSORY_REPORTING_DIR: /reporting
       GITTENSORY_REPORTING_DB: "${GITTENSORY_REPORTING_DB:-/reporting/gittensory-reporting.sqlite}"
       GRAFANA_REPORTING_EXPORT_INTERVAL_SECONDS: "${GRAFANA_REPORTING_EXPORT_INTERVAL_SECONDS:-30}"
     command:
       - /bin/sh
       - -c
       - >-
-        apk add --no-cache sqlite >/dev/null 2>&1 &&
+        apk add --no-cache sqlite postgresql16-client >/dev/null 2>&1 &&
         while true; do
           sh /export-grafana-reporting-db.sh || echo '[reporting] export failed';
           interval="$${GRAFANA_REPORTING_EXPORT_INTERVAL_SECONDS:-30}";

@@ -14,6 +14,7 @@
     "dev": "wrangler dev",
     "deploy": "wrangler deploy",
     "deploy:api": "wrangler d1 migrations apply gittensory --remote && wrangler deploy",
+    "selfhost:postgres:migrate": "tsx scripts/migrate-selfhost-sqlite-to-postgres.ts",
     "cf-typegen": "wrangler types && perl -pi -e 's/[[:blank:]]+$//' worker-configuration.d.ts",
     "db:migrate:local": "wrangler d1 migrations apply gittensory --local",
     "db:migrate:remote": "wrangler d1 migrations apply gittensory --remote",

@@ -2,14 +2,29 @@
 set -eu
 
 APP_DB="${GITTENSORY_REPORTING_SOURCE_DB:-/appdb/gittensory.sqlite}"
+PG_DB="${GITTENSORY_REPORTING_SOURCE_DATABASE_URL:-${DATABASE_URL:-}}"
 OUT_DIR="${GITTENSORY_REPORTING_DIR:-/reporting}"
 OUT_DB="${GITTENSORY_REPORTING_DB:-$OUT_DIR/gittensory-reporting.sqlite}"
 TMP_DB="${OUT_DB}.tmp"
+CSV_TMP_DIR="$(mktemp -d)"
+
+cleanup() {
+  rm -rf "$CSV_TMP_DIR"
+}
+trap cleanup EXIT HUP INT TERM
 
 sql_string() {
   printf "%s" "$1" | sed "s/'/''/g"
 }
 
+sqlite_dot_string() {
+  printf "%s" "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/^/"/; s/$/"/'
+}
+
+csv_temp_file() {
+  mktemp "$CSV_TMP_DIR/$1.XXXXXX.csv"
+}
+
 source_column_exists() {
   sqlite3 "$APP_DB" "SELECT 1 FROM pragma_table_info('$1') WHERE name = '$2' LIMIT 1" | grep -q 1
 }
@@ -18,6 +33,51 @@ source_table_exists() {
   sqlite3 "$APP_DB" "SELECT 1 FROM sqlite_master WHERE type='table' AND name='$1' LIMIT 1" | grep -q 1
 }
 
+pg_enabled() {
+  case "$PG_DB" in
+    postgres://*|postgresql://*) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+pg_scalar() {
+  psql "$PG_DB" -X -q -t -A -v ON_ERROR_STOP=1 -c "$1"
+}
+
+pg_table_exists() {
+  value="$(pg_scalar "SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = '$1' LIMIT 1")" || {
+    echo "reporting export failed: could not inspect Postgres table $1" >&2
+    exit 1
+  }
+  [ "$value" = "1" ]
+}
+
+pg_column_exists() {
+  value="$(pg_scalar "SELECT 1 FROM information_schema.columns WHERE table_schema = 'public' AND table_name = '$1' AND column_name = '$2' LIMIT 1")" || {
+    echo "reporting export failed: could not inspect Postgres column $1.$2" >&2
+    exit 1
+  }
+  [ "$value" = "1" ]
+}
+
+pg_copy_csv() {
+  query="$1"
+  out="$2"
+  psql "$PG_DB" -X -q -v ON_ERROR_STOP=1 -c "COPY ($query) TO STDOUT WITH CSV" >"$out"
+}
+
+sqlite_import_csv() {
+  csv="$1"
+  table="$2"
+  [ -s "$csv" ] || return 0
+  csv_arg="$(sqlite_dot_string "$csv")"
+  table_arg="$(sqlite_dot_string "$table")"
+  sqlite3 "$TMP_DB" <<SQL
+.mode csv
+.import $csv_arg $table_arg
+SQL
+}
+
 mkdir -p "$OUT_DIR"
 
 rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
@@ -53,6 +113,143 @@ CREATE INDEX ai_usage_events_feature_created_idx ON ai_usage_events(feature, cre
 CREATE INDEX ai_usage_events_model_created_idx ON ai_usage_events(model, created_at);
 SQL
 
+if pg_enabled; then
+  if ! command -v psql >/dev/null 2>&1; then
+    rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
+    echo "reporting export failed: DATABASE_URL is Postgres but psql is not installed" >&2
+    exit 1
+  fi
+
+  if ! pg_table_exists "pull_requests" &&
+     ! pg_table_exists "advisories" &&
+     ! pg_table_exists "review_targets" &&
+     ! pg_table_exists "ai_usage_events"; then
+    if [ -s "$OUT_DB" ]; then
+      rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
+      echo "reporting export skipped: no reporting source tables in Postgres; preserving last good $OUT_DB" >&2
+      exit 1
+    fi
+  fi
+
+  if pg_table_exists "pull_requests" && pg_table_exists "advisories"; then
+    PR_CSV="$(csv_temp_file "pull-requests")"
+    pg_copy_csv "
+WITH latest_advisories AS (
+  SELECT
+    repo_full_name,
+    pull_number,
+    conclusion,
+    updated_at,
+    ROW_NUMBER() OVER (
+      PARTITION BY repo_full_name, pull_number
+      ORDER BY updated_at DESC, id DESC
+    ) AS rn
+  FROM advisories
+  WHERE pull_number IS NOT NULL
+),
+current_pull_requests AS (
+  SELECT
+    p.repo_full_name AS repo,
+    p.number AS number,
+    p.author_login AS submitter,
+    CASE
+      WHEN lower(p.state) = 'closed' AND p.merged_at IS NOT NULL THEN 'merged'
+      WHEN lower(p.state) = 'closed' THEN 'closed'
+      WHEN a.conclusion IN ('failure', 'action_required') THEN 'manual'
+      WHEN a.conclusion IS NOT NULL THEN 'commented'
+      ELSE 'manual'
+    END AS status,
+    CASE a.conclusion
+      WHEN 'success' THEN 'merge'
+      WHEN 'failure' THEN 'close'
+      WHEN 'action_required' THEN 'manual'
+      WHEN 'neutral' THEN 'comment'
+      WHEN 'skipped' THEN 'ignore'
+      ELSE NULL
+    END AS verdict,
+    p.title AS title,
+    p.created_at AS created_at,
+    CASE
+      WHEN a.updated_at IS NOT NULL AND a.updated_at > p.updated_at THEN a.updated_at
+      ELSE p.updated_at
+    END AS updated_at
+  FROM pull_requests p
+  LEFT JOIN latest_advisories a
+    ON a.repo_full_name = p.repo_full_name
+   AND a.pull_number = p.number
+   AND a.rn = 1
+)
+SELECT
+  repo,
+  number,
+  submitter,
+  status,
+  verdict,
+  title,
+  created_at,
+  updated_at
+FROM current_pull_requests
+" "$PR_CSV"
+    sqlite_import_csv "$PR_CSV" "review_targets"
+  fi
+
+  if pg_table_exists "review_targets"; then
+    LEGACY_CSV="$(csv_temp_file "legacy-review-targets")"
+    if pg_table_exists "pull_requests"; then
+      LEGACY_FILTER="AND NOT EXISTS (SELECT 1 FROM pull_requests p WHERE p.repo_full_name = t.repo AND p.number = t.number)"
+    else
+      LEGACY_FILTER=""
+    fi
+    pg_copy_csv "
+SELECT
+  t.repo,
+  t.number,
+  t.submitter,
+  t.status,
+  t.verdict,
+  t.title,
+  t.created_at,
+  t.updated_at
+FROM review_targets t
+WHERE t.kind = 'pull_request'
+  $LEGACY_FILTER
+" "$LEGACY_CSV"
+    sqlite_import_csv "$LEGACY_CSV" "review_targets"
+  fi
+
+  if pg_table_exists "ai_usage_events"; then
+    AI_CSV="$(csv_temp_file "ai-usage-events")"
+    if pg_column_exists "ai_usage_events" "estimated_neurons"; then
+      ESTIMATED_NEURONS_EXPR="COALESCE(estimated_neurons, 0)"
+    else
+      ESTIMATED_NEURONS_EXPR="0"
+    fi
+    pg_copy_csv "
+SELECT
+  feature,
+  model,
+  status,
+  $ESTIMATED_NEURONS_EXPR AS estimated_neurons,
+  detail,
+  json_build_object(
+    'repoFullName', metadata_json::jsonb ->> 'repoFullName',
+    'pullNumber', metadata_json::jsonb ->> 'pullNumber'
+  )::text AS metadata_json,
+  created_at
+FROM ai_usage_events
+WHERE feature = 'ai_review_pr'
+" "$AI_CSV"
+    sqlite_import_csv "$AI_CSV" "ai_usage_events"
+  fi
+
+  sqlite3 "$TMP_DB" "PRAGMA quick_check;" | grep -qx "ok"
+  mv "$TMP_DB" "$OUT_DB"
+  rm -f "$TMP_DB-wal" "$TMP_DB-shm"
+
+  echo "reporting export complete: $OUT_DB"
+  exit 0
+fi
+
 if [ ! -s "$APP_DB" ]; then
   if [ -s "$OUT_DB" ]; then
     rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"