meta-llama · abraham-leal · Jul 24, 2025 · Jul 24, 2025
diff --git a/configs/facility-with-weave.yaml b/configs/facility-with-weave.yaml
@@ -0,0 +1,34 @@
+# Facility dataset configuration with W&B Weave tracking enabled
+
+system_prompt:
+  file: "../use-cases/facility-support-analyzer/facility_prompt_sys.txt"
+  inputs: ["question"]
+  outputs: ["answer"]
+
+# Dataset configuration
+dataset:
+  path: "../use-cases/facility-support-analyzer/dataset.json"
+  input_field: ["fields", "input"]
+  golden_output_field: "answer"
+
+# Model configuration (minimal required settings)
+model:
+  name: "openrouter/meta-llama/llama-3.3-70b-instruct"
+  task_model: "openrouter/meta-llama/llama-3.3-70b-instruct"
+  proposer_model: "openrouter/meta-llama/llama-3.3-70b-instruct"
+
+# Metric configuration (simplified but maintains compatibility)
+metric:
+  class: "llama_prompt_ops.core.metrics.FacilityMetric"
+  strict_json: false
+  output_field: "answer"
+
+# Optimization settings
+optimization:
+  strategy: "llama"
+
+# W&B Weave tracking configuration
+weave:
+  enabled: true
+  project_name: "llama-prompt-optimization"
+  entity: null  # Optional: your W&B entity name
diff --git a/docs/README.md b/docs/README.md
@@ -40,6 +40,33 @@ llama-prompt-ops supports various inference providers and endpoints to fit your
 - vLLM (local deployment)
 - NVIDIA NIMs (optimized containers)
 
+## W&B Weave Integration
+
+Track and visualize your prompt optimization experiments with W&B Weave. When enabled, Weave automatically tracks:
+
+- **Prompt Evolution**: Original and optimized prompt versions
+- **Dataset Versions**: Training, validation, and test datasets  
+- **LLM Call Traces**: All model calls with inputs, outputs, tokens, and costs
+
+### Quick Start
+
+1. Add Weave configuration to your YAML file:
+```yaml
+weave:
+  enabled: true
+  project_name: "my-optimization-project"
+  entity: "my-team"  # Optional
+```
+
+2. Run optimization with tracking:
+```bash
+llama-prompt-ops migrate --config config.yaml --weave
+```
+
+3. View results at: `https://wandb.ai/[entity]/[project-name]`
+
+See the [full Weave integration details](#) for advanced configuration options.
+
 ## Supported Formats at a Glance
 
 ### Prompt Formats

diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,8 @@ dependencies = [
     "litellm>=1.63.0",
     "huggingface-hub>=0.29.0",
     "datasets>=2.21.0",
-    "propcache==0.3.1"
+    "propcache==0.3.1",
+    "weave>=0.51.0"
 ]
 
 [project.optional-dependencies]

diff --git a/src/llama_prompt_ops/integrations/__init__.py b/src/llama_prompt_ops/integrations/__init__.py
@@ -0,0 +1,9 @@
+"""
+Integration modules for llama-prompt-ops.
+
+This package contains integrations with external tracking and logging services.
+"""
+
+from .weave_tracker import WeaveTracker
+
+__all__ = ["WeaveTracker"]
diff --git a/src/llama_prompt_ops/integrations/weave_tracker.py b/src/llama_prompt_ops/integrations/weave_tracker.py
@@ -0,0 +1,198 @@
+"""
+W&B Weave integration for tracking prompts, datasets, and LLM calls.
+
+Uses Weave's native classes:
+- weave.StringPrompt for versioned prompts (with names)
+- weave.Dataset for versioned datasets (with names)
+- Automatic LLM tracing via weave.init()
+"""
+from typing import Dict, Any, Optional, List
+import logging
+
+try:
+    import weave
+    from weave import StringPrompt, Dataset
+    WEAVE_AVAILABLE = True
+except ImportError:
+    WEAVE_AVAILABLE = False
+    weave = None
+    StringPrompt = None
+    Dataset = None
+
+from datasets import Dataset as HFDataset
+
+
+logger = logging.getLogger(__name__)
+
+
+class WeaveTracker:
+    """
+    Lightweight W&B Weave integration using native Weave classes.
+
+    Provides:
+    - Prompt versioning via weave.StringPrompt (named objects)
+    - Dataset versioning via weave.Dataset (named objects)
+    - Automatic LLM tracing via weave.init()
+    """
+
+    def __init__(
+        self, 
+        project_name: str,
+        entity: Optional[str] = None,
+        enabled: bool = True
+    ):
+        """
+        Initialize Weave tracking.
+
+        Args:
+            project_name: Weave project name
+            entity: W&B entity (optional)
+            enabled: Whether tracking is enabled
+        """
+        self.project_name = project_name
+        self.entity = entity
+        self.enabled = enabled
+
+        if not WEAVE_AVAILABLE:
+            logger.warning("Weave not available. Install with: pip install weave")
+            self.enabled = False
+            return
+
+        if self.enabled:
+            self._initialize_weave()
+
+    def _initialize_weave(self) -> None:
+        """Initialize Weave project - enables automatic LLM tracing."""
+        try:
+            if self.entity:
+                project_path = f"{self.entity}/{self.project_name}"
+            else:
+                project_path = self.project_name
+
+            weave.init(project_path)
+            logger.info(f"Weave initialized: {project_path}")
+
+        except Exception as e:
+            logger.error(f"Failed to initialize Weave: {e}")
+            self.enabled = False
+
+    def is_enabled(self) -> bool:
+        """Check if Weave tracking is enabled."""
+        return self.enabled and WEAVE_AVAILABLE
+
+    def track_prompt_evolution(
+        self,
+        original_prompt: str,
+        optimized_prompt: str,
+        prompt_name: str = "system_prompt",
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> Optional[str]:
+        """
+        Track prompt evolution using the same named prompt for versioning.
+
+        Args:
+            original_prompt: Original prompt text
+            optimized_prompt: Optimized prompt text
+            prompt_name: Name for both versions (creates v1, v2, etc.)
+            metadata: Optimization metadata (unused for now)
+
+        Returns:
+            Reference to published optimized prompt version
+        """
+        if not self.is_enabled():
+            return None
+
+        try:
+            # Create StringPrompts (name goes with publish, not constructor)
+            original = StringPrompt(original_prompt)
+            optimized = StringPrompt(optimized_prompt)
+
+            # Publish with same name to create versions
+            weave.publish(original, name=prompt_name)
+            optimized_ref = weave.publish(optimized, name=prompt_name)
+
+            logger.info(f"Tracked prompt evolution: {optimized_ref}")
+            return str(optimized_ref)
+
+        except Exception as e:
+            logger.error(f"Failed to track prompt evolution: {e}")
+            return None
+
+    def track_dataset(
+        self,
+        dataset: HFDataset,
+        split: str = "train",
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> Optional[str]:
+        """
+        Track dataset using named weave.Dataset.
+
+        Args:
+            dataset: HuggingFace dataset to track
+            split: Dataset split name
+            metadata: Additional metadata (unused for now)
+
+        Returns:
+            Reference to published dataset
+        """
+        if not self.is_enabled():
+            return None
+
+        try:
+            # Convert HF dataset to format expected by weave.Dataset
+            rows = [dict(row) for row in dataset]
+
+            # Create named Weave Dataset for auto-versioning
+            weave_dataset = Dataset(
+                name=f"dataset_{split}",
+                rows=rows
+            )
+
+            # Publish dataset (automatically versioned by name)
+            ref = weave.publish(weave_dataset)
+            logger.info(f"Tracked dataset ({split}): {ref}")
+            return str(ref)
+
+        except Exception as e:
+            logger.error(f"Failed to track dataset: {e}")
+            return None
+
+    def get_prompt(self, name: str = "system_prompt") -> Optional[StringPrompt]:
+        """
+        Retrieve prompt using Weave refs.
+
+        Args:
+            name: Prompt name to retrieve
+
+        Returns:
+            StringPrompt object, None if not found
+        """
+        if not self.is_enabled():
+            return None
+
+        try:
+            ref = weave.ref(name)
+            return ref.get()
+        except Exception as e:
+            logger.error(f"Failed to get prompt: {e}")
+            return None
+
+    def get_dataset(self, split: str = "train") -> Optional[Dataset]:
+        """
+        Retrieve dataset using Weave refs.
+
+        Args:
+            split: Dataset split to retrieve
+
+        Returns:
+            Dataset object, None if not found
+        """
+        if not self.is_enabled():
+            return None
+
+        try:
+            ref = weave.ref(f"dataset_{split}")
+            return ref.get()
+        except Exception as e:
+            logger.error(f"Failed to get dataset: {e}")
+            return None