diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..14580ea --- /dev/null +++ b/.gitignore @@ -0,0 +1,92 @@ +# Environment variables and API keys +.env +.env.local +.env.*.local +*.key +*_keys.txt + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Jupyter Notebook +.ipynb_checkpoints + +# Experiment results and logs +results/ +experiments/ +logs/ +*.log +problematic_requests/ + +# Model checkpoints and cache +*.pt +*.pth +*.ckpt +checkpoints/ +.cache/ +huggingface/ + +# Data files (uncomment if you want to exclude large datasets) +# data/ +# *.jsonl +# *.csv +# *.parquet + +# Temporary files +tmp/ +temp/ +*.tmp + +# MacOS +.DS_Store +.AppleDouble +.LSOverride + +# Coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ diff --git a/ace/temporal_bullet.py b/ace/temporal_bullet.py new file mode 100644 index 0000000..a612f8b --- /dev/null +++ b/ace/temporal_bullet.py @@ -0,0 +1,148 @@ +""" +============================================================================== +temporal_bullet.py +============================================================================== + +Enhanced ACE bullet with temporal tracking for relevance-based retrieval. + +This module extends ACE's playbook bullets with: +- Temporal metadata (creation time, last used, usage timeline) +- Scoring functions (recency, frequency, utility, relevance) +- Archiving logic based on staleness + +""" + +from dataclasses import dataclass, field +from datetime import datetime +from typing import List +from math import exp + + +@dataclass +class TemporalBullet: + """Enhanced ACE bullet with temporal tracking""" + + # Original ACE fields + id: str + content: str + bullet_type: str # "str", "cal", "mis", etc. + helpful_count: int = 0 + harmful_count: int = 0 + + # NEW: Temporal tracking + created_at: datetime = field(default_factory=datetime.now) + last_used_at: datetime = field(default_factory=datetime.now) + usage_timeline: List[datetime] = field(default_factory=list) + + # NEW: Context tracking + task_types_used: List[str] = field(default_factory=list) + + # Configuration + RECENCY_DECAY_RATE: float = 0.1 # Half-life ~7 days + FREQUENCY_WINDOW_DAYS: int = 30 + + def recency_score(self, current_time: datetime = None) -> float: + """ + Exponential decay based on last use. + Score ranges from 0 (very old) to 1.0 (just used). + Half-life of ~7 days with decay_rate=0.1. + """ + if current_time is None: + current_time = datetime.now() + + days_since_use = (current_time - self.last_used_at).days + return exp(-self.RECENCY_DECAY_RATE * days_since_use) + + def frequency_score(self, current_time: datetime = None) -> float: + """ + Recent usage frequency (uses per day in last 30 days). + Score ranges from 0 (never used) to ~1.0 (used daily). + """ + if current_time is None: + current_time = datetime.now() + + recent_uses = [ + t for t in self.usage_timeline + if (current_time - t).days <= self.FREQUENCY_WINDOW_DAYS + ] + return len(recent_uses) / self.FREQUENCY_WINDOW_DAYS + + def utility_score(self) -> float: + """ + Net utility based on helpful/harmful counters. + Returns normalized score (can be negative if harmful > helpful). + """ + return float(self.helpful_count - self.harmful_count) + + def relevance_score(self, current_time: datetime = None) -> float: + """ + Combined relevance score for retrieval ranking. + + Formula: utility * recency * (1 + frequency) + + Rationale: + - Base score is utility (helpful - harmful) + - Multiplied by recency (recent knowledge weighted higher) + - Boosted by frequency (frequently used = more relevant) + """ + utility = self.utility_score() + recency = self.recency_score(current_time) + frequency = self.frequency_score(current_time) + + return utility * recency * (1 + frequency) + + def mark_used(self, task_type: str = None, current_time: datetime = None): + """ + Record that this bullet was retrieved/used. + Updates last_used_at, appends to usage_timeline, tracks task type. + """ + if current_time is None: + current_time = datetime.now() + + self.last_used_at = current_time + self.usage_timeline.append(current_time) + + if task_type and task_type not in self.task_types_used: + self.task_types_used.append(task_type) + + def should_archive(self, + min_days_inactive: int = 30, + current_time: datetime = None) -> bool: + """ + Determine if bullet should be archived. + + Archive if: + 1. Not used in min_days_inactive days, OR + 2. Recency score below threshold (0.05 = ~30 days at default decay) + + Even if helpful > harmful, stale knowledge gets archived. + """ + if current_time is None: + current_time = datetime.now() + + days_inactive = (current_time - self.last_used_at).days + recency = self.recency_score(current_time) + + return days_inactive >= min_days_inactive or recency < 0.05 + + def to_dict(self) -> dict: + """Serialize for storage""" + return { + 'id': self.id, + 'content': self.content, + 'bullet_type': self.bullet_type, + 'helpful_count': self.helpful_count, + 'harmful_count': self.harmful_count, + 'created_at': self.created_at.isoformat(), + 'last_used_at': self.last_used_at.isoformat(), + 'usage_timeline': [t.isoformat() for t in self.usage_timeline], + 'task_types_used': self.task_types_used + } + + @classmethod + def from_dict(cls, data: dict) -> 'TemporalBullet': + """Deserialize from storage""" + data['created_at'] = datetime.fromisoformat(data['created_at']) + data['last_used_at'] = datetime.fromisoformat(data['last_used_at']) + data['usage_timeline'] = [datetime.fromisoformat(t) for t in data['usage_timeline']] + return cls(**data) diff --git a/ace/temporal_curator.py b/ace/temporal_curator.py new file mode 100644 index 0000000..87cac36 --- /dev/null +++ b/ace/temporal_curator.py @@ -0,0 +1,202 @@ +""" +============================================================================== +temporal_curator.py +============================================================================== + +Extends ACE's Curator with temporal relevance tracking. + +Original ACE Curator operations: +- Semantic deduplication +- Helpful/harmful counter updates +- Pruning based on harmful > helpful + +New temporal operations: +- Recency-based archiving +- Relevance-ranked retrieval +- Temporal decay analysis + +""" + +from typing import List, Dict +from datetime import datetime +from ace.temporal_bullet import TemporalBullet + + +class TemporalCurator: + """ + Extends ACE's Curator with temporal relevance tracking. + """ + + def __init__(self, + playbook: List[TemporalBullet] = None, + archive_inactive_days: int = 30): + self.playbook = playbook if playbook is not None else [] + self.archive = [] + self.archive_inactive_days = archive_inactive_days + + def merge_delta(self, + delta_bullets: List[TemporalBullet], + current_time: datetime = None) -> List[TemporalBullet]: + """ + Merge delta updates into playbook (ACE's core operation). + + Enhanced with temporal tracking: + 1. Semantic deduplication (original ACE) + 2. Update helpful/harmful counters (original ACE) + 3. NEW: Update last_used_at and usage_timeline + 4. NEW: Track which task types use each bullet + """ + if current_time is None: + current_time = datetime.now() + + updated_playbook = [] + + for delta_bullet in delta_bullets: + # Find semantically similar bullets in existing playbook + similar = self._find_similar(delta_bullet, self.playbook) + + if similar: + # Update existing bullet + similar.helpful_count += delta_bullet.helpful_count + similar.harmful_count += delta_bullet.harmful_count + similar.mark_used(current_time=current_time) + updated_playbook.append(similar) + else: + # Add new bullet + delta_bullet.created_at = current_time + delta_bullet.last_used_at = current_time + updated_playbook.append(delta_bullet) + + # Add bullets not in delta (but update their staleness) + for bullet in self.playbook: + if bullet not in updated_playbook: + updated_playbook.append(bullet) + + self.playbook = updated_playbook + return self.playbook + + def archive_stale_bullets(self, current_time: datetime = None) -> Dict: + """ + Archive bullets that haven't been used recently. + + NEW operation not in original ACE: + - Moves stale bullets to archive (even if helpful > harmful) + - Preserves them for potential reactivation + - Reduces active playbook size + """ + if current_time is None: + current_time = datetime.now() + + active = [] + archived = [] + + for bullet in self.playbook: + if bullet.should_archive(self.archive_inactive_days, current_time): + self.archive.append(bullet) + archived.append(bullet) + else: + active.append(bullet) + + self.playbook = active + + return { + 'archived_count': len(archived), + 'active_count': len(active), + 'archived_bullets': archived + } + + def retrieve_relevant(self, + task_type: str = None, + top_k: int = 10, + current_time: datetime = None) -> List[TemporalBullet]: + """ + Retrieve most relevant bullets based on temporal relevance. + + NEW: Relevance-based retrieval (complements original ACE's full playbook) + - Ranks by relevance_score() = utility * recency * (1 + frequency) + - Can filter by task_type if provided + - Returns top_k most relevant + """ + if current_time is None: + current_time = datetime.now() + + # Filter by task type if specified + candidates = self.playbook + if task_type: + candidates = [ + b for b in self.playbook + if task_type in b.task_types_used or not b.task_types_used + ] + + # Handle empty playbook + if not candidates: + return [] + + # Rank by relevance score + ranked = sorted( + candidates, + key=lambda b: b.relevance_score(current_time), + reverse=True + ) + + # Mark retrieved bullets as used + top_bullets = ranked[:top_k] + for bullet in top_bullets: + bullet.mark_used(task_type, current_time) + + return top_bullets + + def get_temporal_stats(self, current_time: datetime = None) -> Dict: + """ + Analyze temporal patterns in playbook. + + NEW: Temporal analytics for understanding knowledge decay + """ + if current_time is None: + current_time = datetime.now() + + if not self.playbook: + return { + 'total_bullets': 0, + 'avg_recency': 0, + 'avg_frequency': 0, + 'avg_relevance': 0, + 'avg_age_days': 0, + 'avg_inactive_days': 0, + 'stale_bullets': 0 + } + + recency_scores = [b.recency_score(current_time) for b in self.playbook] + frequency_scores = [b.frequency_score(current_time) for b in self.playbook] + relevance_scores = [b.relevance_score(current_time) for b in self.playbook] + + ages_days = [(current_time - b.created_at).days for b in self.playbook] + inactive_days = [(current_time - b.last_used_at).days for b in self.playbook] + + return { + 'total_bullets': len(self.playbook), + 'avg_recency': sum(recency_scores) / len(recency_scores), + 'avg_frequency': sum(frequency_scores) / len(frequency_scores), + 'avg_relevance': sum(relevance_scores) / len(relevance_scores), + 'avg_age_days': sum(ages_days) / len(ages_days), + 'avg_inactive_days': sum(inactive_days) / len(inactive_days), + 'stale_bullets': sum(1 for b in self.playbook + if b.should_archive(self.archive_inactive_days, current_time)) + } + + def _find_similar(self, + bullet: TemporalBullet, + candidates: List[TemporalBullet], + threshold: float = 0.85) -> TemporalBullet: + """ + Find semantically similar bullet (ACE's deduplication). + + Original ACE uses embedding similarity. + Simplified here - use exact content match for now. + TODO: Replace with embedding-based similarity for production. + """ + # Simplified: exact content match + for candidate in candidates: + if candidate.content == bullet.content: + return candidate + return None