Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,15 @@ GITTENSORY_REVIEW_DRAFT=false
# # instance can't be driven through App creation by a random visitor.
# PORT=8787
# DATABASE_PATH=/data/gittensory.sqlite # SQLite file on the mounted data volume; all migrations auto-apply
# POSTGRES_PASSWORD=change-this-long-random-value # used by the --profile postgres / --profile pgbouncer services
# DATABASE_URL= # set to postgres://user:pw@host:5432/db to use Postgres instead of
# # SQLite (shared DB → multi-instance). Overrides DATABASE_PATH.
# # Compose examples:
# # postgres://gittensory:<POSTGRES_PASSWORD>@postgres:5432/gittensory
# # postgres://gittensory:<POSTGRES_PASSWORD>@pgbouncer:5432/gittensory
# PGVECTOR_ENABLED=false # set true only when using the Postgres pgvector table for RAG.
# # Leave false when QDRANT_URL is set; Qdrant remains the preferred
# # dedicated vector store for review context at scale.
REDIS_URL=redis://redis:6379 # REQUIRED for the self-host review runtime. The default compose stack
# # starts Redis automatically; override for an external Redis.
# GITTENSORY_IMAGE=ghcr.io/jsonbored/gittensory-selfhost:latest # image used by scripts/deploy-selfhost-image.sh;
Expand All @@ -176,6 +183,7 @@ REDIS_URL=redis://redis:6379 # REQUIRED for the self-host review
# QDRANT_DIM=768 # vector dimension of the collection (768 = nomic-embed-text:latest;
# # 1024 = bge-m3/mxbai-embed-large). Must match AI_EMBED_MODEL;
# # recreate the Qdrant collection when changing this after startup.
# GITTENSORY_REPORTING_SOURCE_DATABASE_URL= # optional Postgres reporting reader URL. Defaults to DATABASE_URL.
# MIGRATIONS_DIR=/app/migrations
# CRON_INTERVAL_MS=120000 # maintain/sweep + sync cadence (default ~2 min)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,22 @@ LITESTREAM_REGION=us-east-1`}
/>
<CodeBlock
filename=".env"
code={`DATABASE_URL=postgres://gittensory:<password>@postgres:5432/gittensory
code={`POSTGRES_PASSWORD=<password>
DATABASE_URL=postgres://gittensory:<password>@pgbouncer:5432/gittensory
REDIS_URL=redis://redis:6379
PGVECTOR_ENABLED=true`}
QDRANT_URL=http://qdrant:6333`}
/>
<CodeBlock lang="bash" code={`docker compose --profile pgbouncer --profile qdrant up -d`} />

<h2>One-time SQLite to Postgres copy</h2>
<p>
Existing SQLite installs can copy state into a fresh Postgres database with the bundled
migrator. It dry-runs by default and only commits when <code>--execute</code> is present.
</p>
<CodeBlock
lang="bash"
code={`npm run selfhost:postgres:migrate -- --sqlite /data/gittensory.sqlite --postgres-url "$DATABASE_URL"
npm run selfhost:postgres:migrate -- --sqlite /data/gittensory.sqlite --postgres-url "$DATABASE_URL" --execute`}
/>

<h2>Restore checks</h2>
Expand Down
9 changes: 6 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -331,17 +331,20 @@ services:
- ./scripts/export-grafana-reporting-db.sh:/export-grafana-reporting-db.sh:ro
environment:
# Default SQLite app DB path maps app /data/gittensory.sqlite to exporter /appdb/gittensory.sqlite.
# If you override DATABASE_PATH, set this to the matching /appdb/<file> path. DATABASE_URL/Postgres
# deployments export an empty dashboard-safe DB so Grafana can still start until a SQL exporter is added.
# If you override DATABASE_PATH, set this to the matching /appdb/<file> path. When DATABASE_URL points at
# Postgres, the exporter reads Postgres with psql and still writes the same redacted SQLite reporting DB
# for Grafana. Set GITTENSORY_REPORTING_SOURCE_DATABASE_URL only if the reporting reader uses a different URL.
GITTENSORY_REPORTING_SOURCE_DB: "${GITTENSORY_REPORTING_SOURCE_DB:-/appdb/gittensory.sqlite}"
GITTENSORY_REPORTING_SOURCE_DATABASE_URL: "${GITTENSORY_REPORTING_SOURCE_DATABASE_URL:-}"
DATABASE_URL: "${DATABASE_URL:-}"
GITTENSORY_REPORTING_DIR: /reporting
GITTENSORY_REPORTING_DB: "${GITTENSORY_REPORTING_DB:-/reporting/gittensory-reporting.sqlite}"
GRAFANA_REPORTING_EXPORT_INTERVAL_SECONDS: "${GRAFANA_REPORTING_EXPORT_INTERVAL_SECONDS:-30}"
command:
- /bin/sh
- -c
- >-
apk add --no-cache sqlite >/dev/null 2>&1 &&
apk add --no-cache sqlite postgresql16-client >/dev/null 2>&1 &&
while true; do
sh /export-grafana-reporting-db.sh || echo '[reporting] export failed';
interval="$${GRAFANA_REPORTING_EXPORT_INTERVAL_SECONDS:-30}";
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"dev": "wrangler dev",
"deploy": "wrangler deploy",
"deploy:api": "wrangler d1 migrations apply gittensory --remote && wrangler deploy",
"selfhost:postgres:migrate": "tsx scripts/migrate-selfhost-sqlite-to-postgres.ts",
"cf-typegen": "wrangler types && perl -pi -e 's/[[:blank:]]+$//' worker-configuration.d.ts",
"db:migrate:local": "wrangler d1 migrations apply gittensory --local",
"db:migrate:remote": "wrangler d1 migrations apply gittensory --remote",
Expand Down
197 changes: 197 additions & 0 deletions scripts/export-grafana-reporting-db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,29 @@
set -eu

APP_DB="${GITTENSORY_REPORTING_SOURCE_DB:-/appdb/gittensory.sqlite}"
PG_DB="${GITTENSORY_REPORTING_SOURCE_DATABASE_URL:-${DATABASE_URL:-}}"
OUT_DIR="${GITTENSORY_REPORTING_DIR:-/reporting}"
OUT_DB="${GITTENSORY_REPORTING_DB:-$OUT_DIR/gittensory-reporting.sqlite}"
TMP_DB="${OUT_DB}.tmp"
CSV_TMP_DIR="$(mktemp -d)"

cleanup() {
rm -rf "$CSV_TMP_DIR"
}
trap cleanup EXIT HUP INT TERM

sql_string() {
printf "%s" "$1" | sed "s/'/''/g"
}

sqlite_dot_string() {
printf "%s" "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/^/"/; s/$/"/'
}

csv_temp_file() {
mktemp "$CSV_TMP_DIR/$1.XXXXXX.csv"
}

source_column_exists() {
sqlite3 "$APP_DB" "SELECT 1 FROM pragma_table_info('$1') WHERE name = '$2' LIMIT 1" | grep -q 1
}
Expand All @@ -18,6 +33,51 @@ source_table_exists() {
sqlite3 "$APP_DB" "SELECT 1 FROM sqlite_master WHERE type='table' AND name='$1' LIMIT 1" | grep -q 1
}

pg_enabled() {
case "$PG_DB" in
postgres://*|postgresql://*) return 0 ;;
*) return 1 ;;
esac
}

pg_scalar() {
psql "$PG_DB" -X -q -t -A -v ON_ERROR_STOP=1 -c "$1"
}

pg_table_exists() {
value="$(pg_scalar "SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = '$1' LIMIT 1")" || {
echo "reporting export failed: could not inspect Postgres table $1" >&2
exit 1
}
[ "$value" = "1" ]
}

pg_column_exists() {
value="$(pg_scalar "SELECT 1 FROM information_schema.columns WHERE table_schema = 'public' AND table_name = '$1' AND column_name = '$2' LIMIT 1")" || {
echo "reporting export failed: could not inspect Postgres column $1.$2" >&2
exit 1
}
[ "$value" = "1" ]
}

pg_copy_csv() {
query="$1"
out="$2"
psql "$PG_DB" -X -q -v ON_ERROR_STOP=1 -c "COPY ($query) TO STDOUT WITH CSV" >"$out"
}

sqlite_import_csv() {
csv="$1"
table="$2"
[ -s "$csv" ] || return 0
csv_arg="$(sqlite_dot_string "$csv")"
table_arg="$(sqlite_dot_string "$table")"
sqlite3 "$TMP_DB" <<SQL
.mode csv
.import $csv_arg $table_arg
SQL
}

mkdir -p "$OUT_DIR"

rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
Expand Down Expand Up @@ -53,6 +113,143 @@ CREATE INDEX ai_usage_events_feature_created_idx ON ai_usage_events(feature, cre
CREATE INDEX ai_usage_events_model_created_idx ON ai_usage_events(model, created_at);
SQL

if pg_enabled; then
if ! command -v psql >/dev/null 2>&1; then
rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
echo "reporting export failed: DATABASE_URL is Postgres but psql is not installed" >&2
exit 1
fi

if ! pg_table_exists "pull_requests" &&
! pg_table_exists "advisories" &&
! pg_table_exists "review_targets" &&
! pg_table_exists "ai_usage_events"; then
if [ -s "$OUT_DB" ]; then
rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
echo "reporting export skipped: no reporting source tables in Postgres; preserving last good $OUT_DB" >&2
exit 1
fi
fi

if pg_table_exists "pull_requests" && pg_table_exists "advisories"; then
PR_CSV="$(csv_temp_file "pull-requests")"
pg_copy_csv "
WITH latest_advisories AS (
SELECT
repo_full_name,
pull_number,
conclusion,
updated_at,
ROW_NUMBER() OVER (
PARTITION BY repo_full_name, pull_number
ORDER BY updated_at DESC, id DESC
) AS rn
FROM advisories
WHERE pull_number IS NOT NULL
),
current_pull_requests AS (
SELECT
p.repo_full_name AS repo,
p.number AS number,
p.author_login AS submitter,
CASE
WHEN lower(p.state) = 'closed' AND p.merged_at IS NOT NULL THEN 'merged'
WHEN lower(p.state) = 'closed' THEN 'closed'
WHEN a.conclusion IN ('failure', 'action_required') THEN 'manual'
WHEN a.conclusion IS NOT NULL THEN 'commented'
ELSE 'manual'
END AS status,
CASE a.conclusion
WHEN 'success' THEN 'merge'
WHEN 'failure' THEN 'close'
WHEN 'action_required' THEN 'manual'
WHEN 'neutral' THEN 'comment'
WHEN 'skipped' THEN 'ignore'
ELSE NULL
END AS verdict,
p.title AS title,
p.created_at AS created_at,
CASE
WHEN a.updated_at IS NOT NULL AND a.updated_at > p.updated_at THEN a.updated_at
ELSE p.updated_at
END AS updated_at
FROM pull_requests p
LEFT JOIN latest_advisories a
ON a.repo_full_name = p.repo_full_name
AND a.pull_number = p.number
AND a.rn = 1
)
SELECT
repo,
number,
submitter,
status,
verdict,
title,
created_at,
updated_at
FROM current_pull_requests
" "$PR_CSV"
sqlite_import_csv "$PR_CSV" "review_targets"
fi

if pg_table_exists "review_targets"; then
LEGACY_CSV="$(csv_temp_file "legacy-review-targets")"
if pg_table_exists "pull_requests"; then
LEGACY_FILTER="AND NOT EXISTS (SELECT 1 FROM pull_requests p WHERE p.repo_full_name = t.repo AND p.number = t.number)"
else
LEGACY_FILTER=""
fi
pg_copy_csv "
SELECT
t.repo,
t.number,
t.submitter,
t.status,
t.verdict,
t.title,
t.created_at,
t.updated_at
FROM review_targets t
WHERE t.kind = 'pull_request'
$LEGACY_FILTER
" "$LEGACY_CSV"
sqlite_import_csv "$LEGACY_CSV" "review_targets"
fi

if pg_table_exists "ai_usage_events"; then
AI_CSV="$(csv_temp_file "ai-usage-events")"
if pg_column_exists "ai_usage_events" "estimated_neurons"; then
ESTIMATED_NEURONS_EXPR="COALESCE(estimated_neurons, 0)"
else
ESTIMATED_NEURONS_EXPR="0"
fi
pg_copy_csv "
SELECT
feature,
model,
status,
$ESTIMATED_NEURONS_EXPR AS estimated_neurons,
detail,
json_build_object(
'repoFullName', metadata_json::jsonb ->> 'repoFullName',
'pullNumber', metadata_json::jsonb ->> 'pullNumber'
)::text AS metadata_json,
created_at
FROM ai_usage_events
WHERE feature = 'ai_review_pr'
" "$AI_CSV"
sqlite_import_csv "$AI_CSV" "ai_usage_events"
fi

sqlite3 "$TMP_DB" "PRAGMA quick_check;" | grep -qx "ok"
mv "$TMP_DB" "$OUT_DB"
rm -f "$TMP_DB-wal" "$TMP_DB-shm"

echo "reporting export complete: $OUT_DB"
exit 0
fi

if [ ! -s "$APP_DB" ]; then
if [ -s "$OUT_DB" ]; then
rm -f "$TMP_DB" "$TMP_DB-wal" "$TMP_DB-shm"
Expand Down
Loading
Loading