Skip to content

Commit 124a7f5

Browse files
committed
various fix on ml pipeline, ui, models, backed
1 parent 74ff8f0 commit 124a7f5

File tree

29 files changed

+2070
-478
lines changed

29 files changed

+2070
-478
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,6 @@ test-output/
128128
coverage/
129129
.coverage
130130
htmlcov/
131+
132+
# Persistent data (local dev)
133+
/data/

Makefile

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,33 +26,42 @@ help: ## Show this help
2626
@echo ''
2727
@echo '${GREEN}AWARE Fund${RESET} - The Vanguard of Prediction Markets'
2828
@echo ''
29-
@echo '${YELLOW}Local Development:${RESET}'
30-
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | grep -E 'local|up|down|build|logs|status|clean' | awk 'BEGIN {FS = ":.*?## "}; {printf " ${BLUE}%-15s${RESET} %s\n", $$1, $$2}'
29+
@echo '${YELLOW}Quick Start:${RESET}'
30+
@echo ' ${BLUE}make build${RESET} Rebuild Docker images (after code changes)'
31+
@echo ' ${BLUE}make local${RESET} Start everything (services + analytics)'
32+
@echo ' ${BLUE}make down${RESET} Stop everything'
3133
@echo ''
32-
@echo '${YELLOW}Server Deployment:${RESET}'
33-
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | grep -E 'deploy|server' | awk 'BEGIN {FS = ":.*?## "}; {printf " ${BLUE}%-15s${RESET} %s\n", $$1, $$2}'
34+
@echo '${YELLOW}ML Training:${RESET}'
35+
@echo ' ${BLUE}make train${RESET} Quick (10K traders, ~8 min)'
36+
@echo ' ${BLUE}make train-all${RESET} All eligible traders (currently ~4K)'
3437
@echo ''
35-
@echo '${YELLOW}Development:${RESET}'
36-
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | grep -E 'test|lint|java|python' | awk 'BEGIN {FS = ":.*?## "}; {printf " ${BLUE}%-15s${RESET} %s\n", $$1, $$2}'
38+
@echo '${YELLOW}Other:${RESET}'
39+
@echo ' ${BLUE}make analytics${RESET} Run analytics only'
40+
@echo ' ${BLUE}make logs${RESET} View logs'
41+
@echo ' ${BLUE}make status${RESET} Health check'
3742
@echo ''
3843

3944
# ═══════════════════════════════════════════════════════════════════════════════
4045
# LOCAL DEVELOPMENT
4146
# ═══════════════════════════════════════════════════════════════════════════════
4247

43-
local: docker-check ## Start full local stack (all services)
48+
local: docker-check ## Start everything + run analytics (one command!)
4449
@echo '${GREEN}Starting AWARE Fund local stack...${RESET}'
4550
docker compose -f docker-compose.local.yaml up -d
4651
@echo ''
47-
@echo '${GREEN}Services starting. Access:${RESET}'
52+
@echo '${YELLOW}Waiting for services to be ready...${RESET}'
53+
@sleep 10
54+
@echo '${GREEN}Running analytics pipeline...${RESET}'
55+
@docker exec aware-analytics python3 run_all.py 2>&1 | grep -E "(INFO|WARNING|Complete)" | tail -20 || true
56+
@echo ''
57+
@echo '${GREEN}✓ AWARE Fund ready!${RESET}'
58+
@echo ''
4859
@echo ' Web Dashboard: http://localhost:3000'
4960
@echo ' Python API: http://localhost:8000'
5061
@echo ' Strategy Service: http://localhost:8081'
51-
@echo ' Executor Service: http://localhost:8080'
5262
@echo ' ClickHouse: http://localhost:8123'
5363
@echo ''
54-
@echo 'Run ${BLUE}make logs${RESET} to view logs'
55-
@echo 'Run ${BLUE}make status${RESET} to check health'
64+
@echo 'Commands: ${BLUE}make train${RESET} (retrain ML) | ${BLUE}make logs${RESET} | ${BLUE}make down${RESET}'
5665

5766
up: local ## Alias for 'make local'
5867

@@ -141,12 +150,42 @@ python-setup: ## Setup Python virtual environments
141150
cd aware-fund/services/analytics && python -m venv .venv && .venv/bin/pip install -r requirements.txt
142151
cd aware-fund/services/api && python -m venv .venv && .venv/bin/pip install -r requirements.txt
143152

144-
python-analytics: ## Run analytics jobs (requires infra)
153+
python-analytics: ## Run analytics jobs ONCE (requires infra)
145154
cd aware-fund/services/analytics && source .venv/bin/activate && CLICKHOUSE_HOST=localhost python run_all.py
146155

156+
python-analytics-continuous: ## Run ML analytics continuously (hourly updates)
157+
@echo '${GREEN}Starting ML analytics pipeline (continuous mode)...${RESET}'
158+
cd aware-fund/services/analytics && source .venv/bin/activate && CLICKHOUSE_HOST=localhost python run_all.py --continuous --interval 3600
159+
147160
python-api: ## Start Python API (requires infra)
148161
cd aware-fund/services/api && source .venv/bin/activate && CLICKHOUSE_HOST=localhost uvicorn main:app --reload --host 0.0.0.0 --port 8000
149162

163+
# ═══════════════════════════════════════════════════════════════════════════════
164+
# ML PIPELINE
165+
# ═══════════════════════════════════════════════════════════════════════════════
166+
167+
train: ## Retrain ML models (quick: 10K traders, ~8 min)
168+
@echo '${GREEN}Retraining ML models (quick mode)...${RESET}'
169+
@echo 'Training on 10K traders, 50 epochs (~8 min)'
170+
@docker exec aware-analytics python3 -m ml.training.train --max-traders 10000 --epochs 50
171+
@echo ''
172+
@echo '${GREEN}Running analytics with new model...${RESET}'
173+
@docker exec aware-analytics python3 run_all.py 2>&1 | grep -E "(INFO|Complete)" | tail -10
174+
@echo '${GREEN}✓ Training complete! Refresh UI to see changes.${RESET}'
175+
176+
train-all: ## Retrain on ALL eligible traders (no limit)
177+
@echo '${GREEN}Retraining ML models on ALL data...${RESET}'
178+
@echo 'Training on all eligible traders, 100 epochs'
179+
@docker exec aware-analytics python3 -m ml.training.train --max-traders 999999 --epochs 100
180+
@echo ''
181+
@echo '${GREEN}Running analytics with new model...${RESET}'
182+
@docker exec aware-analytics python3 run_all.py 2>&1 | grep -E "(INFO|Complete)" | tail -10
183+
@echo '${GREEN}✓ Training complete! Refresh UI to see changes.${RESET}'
184+
185+
analytics: ## Run analytics pipeline only (no training)
186+
@echo '${GREEN}Running analytics...${RESET}'
187+
@docker exec aware-analytics python3 run_all.py 2>&1 | grep -E "(INFO|WARNING|Complete)" | tail -20
188+
150189
# ═══════════════════════════════════════════════════════════════════════════════
151190
# WEB DASHBOARD
152191
# ═══════════════════════════════════════════════════════════════════════════════
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
-- ============================================================================
2+
-- AWARE ML Explainability Schema
3+
-- ============================================================================
4+
-- Stores ML model metadata, feature importance, tier boundaries, and training history
5+
-- for transparency, monitoring, and debugging.
6+
-- ============================================================================
7+
8+
-- ----------------------------------------------------------------------------
9+
-- Feature Importance
10+
-- ----------------------------------------------------------------------------
11+
-- Stores feature importance scores from XGBoost and other models.
12+
-- Updated after each training run.
13+
14+
CREATE TABLE IF NOT EXISTS polybot.aware_ml_feature_importance (
15+
feature_name String,
16+
importance_score Float32,
17+
importance_rank UInt16,
18+
model_version LowCardinality(String),
19+
importance_type LowCardinality(String) DEFAULT 'weight', -- 'weight', 'gain', 'cover'
20+
calculated_at DateTime64(3) DEFAULT now64(3)
21+
)
22+
ENGINE = ReplacingMergeTree(calculated_at)
23+
ORDER BY (model_version, feature_name)
24+
SETTINGS index_granularity = 8192;
25+
26+
27+
-- ----------------------------------------------------------------------------
28+
-- Tier Boundaries
29+
-- ----------------------------------------------------------------------------
30+
-- Documents the score ranges for each tier (BRONZE, SILVER, GOLD, DIAMOND).
31+
-- Used for explainability and consistency checks.
32+
33+
CREATE TABLE IF NOT EXISTS polybot.aware_ml_tier_boundaries (
34+
tier_name LowCardinality(String), -- BRONZE, SILVER, GOLD, DIAMOND
35+
tier_order UInt8, -- 1, 2, 3, 4 for sorting
36+
score_min Float32,
37+
score_max Float32,
38+
confidence_threshold Float32 DEFAULT 0.5,
39+
description String,
40+
model_version LowCardinality(String),
41+
updated_at DateTime64(3) DEFAULT now64(3)
42+
)
43+
ENGINE = ReplacingMergeTree(updated_at)
44+
ORDER BY (model_version, tier_order)
45+
SETTINGS index_granularity = 8192;
46+
47+
48+
-- ----------------------------------------------------------------------------
49+
-- Training Runs
50+
-- ----------------------------------------------------------------------------
51+
-- Logs each model training run for tracking and rollback capability.
52+
53+
CREATE TABLE IF NOT EXISTS polybot.aware_ml_training_runs (
54+
run_id UUID DEFAULT generateUUIDv4(),
55+
model_version LowCardinality(String),
56+
started_at DateTime64(3),
57+
completed_at DateTime64(3),
58+
duration_seconds UInt32,
59+
status LowCardinality(String) DEFAULT 'running', -- 'running', 'success', 'failed', 'rolled_back'
60+
61+
-- Training data stats
62+
n_traders UInt32,
63+
n_trades UInt32,
64+
train_split_ratio Float32,
65+
66+
-- Final metrics
67+
tier_accuracy Float32,
68+
sharpe_mae Float32,
69+
val_loss Float32,
70+
71+
-- Trigger info
72+
trigger_reason LowCardinality(String), -- 'scheduled', 'drift', 'manual'
73+
triggered_by String DEFAULT 'system',
74+
75+
-- Hyperparameters (JSON blob)
76+
hyperparameters String,
77+
78+
-- Notes
79+
notes String DEFAULT ''
80+
)
81+
ENGINE = MergeTree()
82+
ORDER BY (started_at)
83+
PARTITION BY toYYYYMM(started_at)
84+
SETTINGS index_granularity = 8192;
85+
86+
87+
-- ----------------------------------------------------------------------------
88+
-- Drift Reports
89+
-- ----------------------------------------------------------------------------
90+
-- Stores drift detection results for monitoring.
91+
92+
CREATE TABLE IF NOT EXISTS polybot.aware_ml_drift_reports (
93+
report_id UUID DEFAULT generateUUIDv4(),
94+
checked_at DateTime64(3) DEFAULT now64(3),
95+
96+
-- Summary
97+
alert_level LowCardinality(String), -- 'normal', 'warning', 'critical'
98+
drift_ratio Float32, -- Fraction of features that drifted
99+
n_features UInt16,
100+
n_drifted UInt16,
101+
102+
-- Baseline info
103+
baseline_date DateTime64(3),
104+
n_samples_baseline UInt32,
105+
n_samples_current UInt32,
106+
107+
-- Action taken
108+
retrain_triggered UInt8 DEFAULT 0,
109+
retrain_reason String DEFAULT '',
110+
111+
-- Detailed results (JSON array)
112+
feature_results String
113+
)
114+
ENGINE = MergeTree()
115+
ORDER BY (checked_at)
116+
PARTITION BY toYYYYMM(checked_at)
117+
SETTINGS index_granularity = 8192;
118+
119+
120+
-- ============================================================================
121+
-- Seed Data: Default Tier Boundaries
122+
-- ============================================================================
123+
124+
INSERT INTO polybot.aware_ml_tier_boundaries
125+
(tier_name, tier_order, score_min, score_max, confidence_threshold, description, model_version)
126+
VALUES
127+
('BRONZE', 1, 0.0, 49.9, 0.3, 'Entry-level traders. May have limited history or inconsistent performance.', 'ensemble_v1'),
128+
('SILVER', 2, 50.0, 69.9, 0.4, 'Developing traders. Show some edge but not consistently profitable.', 'ensemble_v1'),
129+
('GOLD', 3, 70.0, 89.9, 0.5, 'Skilled traders. Consistent profitability with good risk management.', 'ensemble_v1'),
130+
('DIAMOND', 4, 90.0, 100.0, 0.7, 'Elite traders. Top performers with exceptional track records.', 'ensemble_v1');
131+
132+
133+
-- ============================================================================
134+
-- Views
135+
-- ============================================================================
136+
137+
-- Latest feature importance
138+
CREATE VIEW IF NOT EXISTS polybot.v_ml_feature_importance_latest AS
139+
SELECT
140+
feature_name,
141+
importance_score,
142+
importance_rank,
143+
model_version,
144+
importance_type
145+
FROM polybot.aware_ml_feature_importance FINAL
146+
WHERE model_version = (
147+
SELECT model_version
148+
FROM polybot.aware_ml_feature_importance
149+
ORDER BY calculated_at DESC
150+
LIMIT 1
151+
)
152+
ORDER BY importance_rank;
153+
154+
155+
-- Latest training run
156+
CREATE VIEW IF NOT EXISTS polybot.v_ml_training_latest AS
157+
SELECT *
158+
FROM polybot.aware_ml_training_runs
159+
WHERE status = 'success'
160+
ORDER BY completed_at DESC
161+
LIMIT 1;
162+
163+
164+
-- Drift trend (last 30 days)
165+
CREATE VIEW IF NOT EXISTS polybot.v_ml_drift_trend AS
166+
SELECT
167+
toDate(checked_at) AS date,
168+
avg(drift_ratio) AS avg_drift_ratio,
169+
max(drift_ratio) AS max_drift_ratio,
170+
countIf(alert_level = 'warning') AS warning_count,
171+
countIf(alert_level = 'critical') AS critical_count,
172+
countIf(retrain_triggered = 1) AS retrains_triggered
173+
FROM polybot.aware_ml_drift_reports
174+
WHERE checked_at >= now() - INTERVAL 30 DAY
175+
GROUP BY date
176+
ORDER BY date;

analytics-service/clickhouse/init/300_user_investments.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ INSERT INTO polybot.aware_fund_summary
227227
VALUES
228228
('PSI-10', 'active', 0, 0, 1.0, 0, 0, 0, 0, 0, 0.005, 0.10, 10,
229229
'Top 10 Smart Money traders by score. Mirrors their positions proportionally.', today()),
230+
('PSI-25', 'active', 0, 0, 1.0, 0, 0, 0, 0, 0, 0.005, 0.10, 10,
231+
'Top 25 Smart Money traders by score. Broader diversification than PSI-10.', today()),
230232
('PSI-SPORTS', 'active', 0, 0, 1.0, 0, 0, 0, 0, 0, 0.005, 0.10, 10,
231233
'Top sports betting specialists. Focus on NFL, NBA, Soccer markets.', today()),
232234
('PSI-CRYPTO', 'active', 0, 0, 1.0, 0, 0, 0, 0, 0, 0.005, 0.10, 10,

aware-fund/services/analytics/Dockerfile

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,19 @@ FROM python:3.11-slim
22

33
WORKDIR /app
44

5-
# Install dependencies
5+
# Install system dependencies for ML
6+
RUN apt-get update && apt-get install -y --no-install-recommends \
7+
gcc \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
# Install Python dependencies
611
COPY requirements.txt .
712
RUN pip install --no-cache-dir -r requirements.txt
813

914
# Copy source code
1015
COPY *.py .
16+
COPY ml/ ./ml/
17+
COPY notifications/ ./notifications/
1118

1219
# Environment defaults
1320
ENV CLICKHOUSE_HOST=clickhouse
@@ -17,6 +24,7 @@ ENV SCORING_INTERVAL_SECONDS=3600
1724
ENV MIN_TRADES=10
1825
ENV MAX_TRADERS=10000
1926
ENV LOG_LEVEL=INFO
27+
ENV PYTHONUNBUFFERED=1
2028

21-
# Default: run orchestrator
22-
CMD ["python", "run_all.py"]
29+
# Default: run ML analytics continuously (hourly scoring + drift monitoring)
30+
CMD ["python", "run_all.py", "--continuous", "--interval", "3600"]

aware-fund/services/analytics/anomaly_detection.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,10 @@ def get_integrity_score(self, username: str) -> IntegrityScore:
216216
def _get_traders_to_scan(self) -> list[str]:
217217
"""Get traders with sufficient activity to analyze"""
218218
query = """
219-
SELECT DISTINCT username
219+
SELECT DISTINCT proxy_address
220220
FROM polybot.aware_global_trades
221221
WHERE ts >= now() - INTERVAL 30 DAY
222-
AND username != ''
223-
GROUP BY username
222+
GROUP BY proxy_address
224223
HAVING count() >= 20
225224
LIMIT 5000
226225
"""

aware-fund/services/analytics/edge_persistence.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,10 @@ def predict(self, username: str) -> Optional[PersistencePrediction]:
229229
def _get_eligible_traders(self) -> list[str]:
230230
"""Get traders eligible for prediction"""
231231
query = f"""
232-
SELECT DISTINCT username
232+
SELECT DISTINCT proxy_address
233233
FROM polybot.aware_global_trades
234234
WHERE ts >= now() - INTERVAL 90 DAY
235-
AND username != ''
236-
GROUP BY username
235+
GROUP BY proxy_address
237236
HAVING count() >= {self.config.min_trades}
238237
LIMIT 2000
239238
"""

aware-fund/services/analytics/hidden_alpha.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ def find_hidden_gems(self) -> list[HiddenTrader]:
151151
sharpe_ratio >= {self.config.min_sharpe_for_gem}
152152
AND total_volume_usd <= {self.config.max_volume_for_hidden}
153153
AND total_trades >= {self.config.min_trades_for_gem}
154-
AND username != ''
155154
),
156155
-- Calculate percentile ranks for quality metrics
157156
ranked AS (
@@ -293,7 +292,6 @@ def find_rising_stars(self) -> list[HiddenTrader]:
293292
AND win_rate >= {self.config.min_win_rate_star}
294293
AND sharpe_ratio >= {self.config.min_sharpe_star}
295294
AND total_trades >= 10
296-
AND username != ''
297295
),
298296
-- Calculate performance relative to tenure
299297
performance_adjusted AS (
@@ -453,7 +451,6 @@ def find_niche_specialists(self) -> list[HiddenTrader]:
453451
unique_markets <= 5
454452
AND total_trades >= 20
455453
AND sharpe_ratio >= 1.0
456-
AND username != ''
457454
ORDER BY sharpe_ratio DESC
458455
LIMIT {self.config.max_discoveries_per_type}
459456
"""
@@ -523,7 +520,6 @@ def find_contrarians(self) -> list[HiddenTrader]:
523520
AND total_pnl > 0
524521
AND sharpe_ratio >= 0.5
525522
AND total_trades >= 20
526-
AND username != ''
527523
ORDER BY total_pnl DESC
528524
LIMIT {self.config.max_discoveries_per_type}
529525
"""

0 commit comments

Comments
 (0)