Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions configs/facility-with-weave.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Facility dataset configuration with W&B Weave tracking enabled

system_prompt:
file: "../use-cases/facility-support-analyzer/facility_prompt_sys.txt"
inputs: ["question"]
outputs: ["answer"]

# Dataset configuration
dataset:
path: "../use-cases/facility-support-analyzer/dataset.json"
input_field: ["fields", "input"]
golden_output_field: "answer"

# Model configuration (minimal required settings)
model:
name: "openrouter/meta-llama/llama-3.3-70b-instruct"
task_model: "openrouter/meta-llama/llama-3.3-70b-instruct"
proposer_model: "openrouter/meta-llama/llama-3.3-70b-instruct"

# Metric configuration (simplified but maintains compatibility)
metric:
class: "llama_prompt_ops.core.metrics.FacilityMetric"
strict_json: false
output_field: "answer"

# Optimization settings
optimization:
strategy: "llama"

# W&B Weave tracking configuration
weave:
enabled: true
project_name: "llama-prompt-optimization"
entity: null # Optional: your W&B entity name
27 changes: 27 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,33 @@ llama-prompt-ops supports various inference providers and endpoints to fit your
- vLLM (local deployment)
- NVIDIA NIMs (optimized containers)

## W&B Weave Integration

Track and visualize your prompt optimization experiments with W&B Weave. When enabled, Weave automatically tracks:

- **Prompt Evolution**: Original and optimized prompt versions
- **Dataset Versions**: Training, validation, and test datasets
- **LLM Call Traces**: All model calls with inputs, outputs, tokens, and costs

### Quick Start

1. Add Weave configuration to your YAML file:
```yaml
weave:
enabled: true
project_name: "my-optimization-project"
entity: "my-team" # Optional
```

2. Run optimization with tracking:
```bash
llama-prompt-ops migrate --config config.yaml --weave
```

3. View results at: `https://wandb.ai/[entity]/[project-name]`

See the [full Weave integration details](#) for advanced configuration options.

## Supported Formats at a Glance

### Prompt Formats
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ dependencies = [
"litellm>=1.63.0",
"huggingface-hub>=0.29.0",
"datasets>=2.21.0",
"propcache==0.3.1"
"propcache==0.3.1",
"weave>=0.51.0"
]

[project.optional-dependencies]
Expand Down
9 changes: 9 additions & 0 deletions src/llama_prompt_ops/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Integration modules for llama-prompt-ops.

This package contains integrations with external tracking and logging services.
"""

from .weave_tracker import WeaveTracker

__all__ = ["WeaveTracker"]
198 changes: 198 additions & 0 deletions src/llama_prompt_ops/integrations/weave_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
"""
W&B Weave integration for tracking prompts, datasets, and LLM calls.

Uses Weave's native classes:
- weave.StringPrompt for versioned prompts (with names)
- weave.Dataset for versioned datasets (with names)
- Automatic LLM tracing via weave.init()
"""
from typing import Dict, Any, Optional, List
import logging

try:
import weave
from weave import StringPrompt, Dataset
WEAVE_AVAILABLE = True
except ImportError:
WEAVE_AVAILABLE = False
weave = None
StringPrompt = None
Dataset = None

from datasets import Dataset as HFDataset


logger = logging.getLogger(__name__)


class WeaveTracker:
"""
Lightweight W&B Weave integration using native Weave classes.

Provides:
- Prompt versioning via weave.StringPrompt (named objects)
- Dataset versioning via weave.Dataset (named objects)
- Automatic LLM tracing via weave.init()
"""

def __init__(
self,
project_name: str,
entity: Optional[str] = None,
enabled: bool = True
):
"""
Initialize Weave tracking.

Args:
project_name: Weave project name
entity: W&B entity (optional)
enabled: Whether tracking is enabled
"""
self.project_name = project_name
self.entity = entity
self.enabled = enabled

if not WEAVE_AVAILABLE:
logger.warning("Weave not available. Install with: pip install weave")
self.enabled = False
return

if self.enabled:
self._initialize_weave()

def _initialize_weave(self) -> None:
"""Initialize Weave project - enables automatic LLM tracing."""
try:
if self.entity:
project_path = f"{self.entity}/{self.project_name}"
else:
project_path = self.project_name

weave.init(project_path)
logger.info(f"Weave initialized: {project_path}")

except Exception as e:
logger.error(f"Failed to initialize Weave: {e}")
self.enabled = False

def is_enabled(self) -> bool:
"""Check if Weave tracking is enabled."""
return self.enabled and WEAVE_AVAILABLE

def track_prompt_evolution(
self,
original_prompt: str,
optimized_prompt: str,
prompt_name: str = "system_prompt",
metadata: Optional[Dict[str, Any]] = None
) -> Optional[str]:
"""
Track prompt evolution using the same named prompt for versioning.

Args:
original_prompt: Original prompt text
optimized_prompt: Optimized prompt text
prompt_name: Name for both versions (creates v1, v2, etc.)
metadata: Optimization metadata (unused for now)

Returns:
Reference to published optimized prompt version
"""
if not self.is_enabled():
return None

try:
# Create StringPrompts (name goes with publish, not constructor)
original = StringPrompt(original_prompt)
optimized = StringPrompt(optimized_prompt)

# Publish with same name to create versions
weave.publish(original, name=prompt_name)
optimized_ref = weave.publish(optimized, name=prompt_name)

logger.info(f"Tracked prompt evolution: {optimized_ref}")
return str(optimized_ref)

except Exception as e:
logger.error(f"Failed to track prompt evolution: {e}")
return None

def track_dataset(
self,
dataset: HFDataset,
split: str = "train",
metadata: Optional[Dict[str, Any]] = None
) -> Optional[str]:
"""
Track dataset using named weave.Dataset.

Args:
dataset: HuggingFace dataset to track
split: Dataset split name
metadata: Additional metadata (unused for now)

Returns:
Reference to published dataset
"""
if not self.is_enabled():
return None

try:
# Convert HF dataset to format expected by weave.Dataset
rows = [dict(row) for row in dataset]

# Create named Weave Dataset for auto-versioning
weave_dataset = Dataset(
name=f"dataset_{split}",
rows=rows
)

# Publish dataset (automatically versioned by name)
ref = weave.publish(weave_dataset)
logger.info(f"Tracked dataset ({split}): {ref}")
return str(ref)

except Exception as e:
logger.error(f"Failed to track dataset: {e}")
return None

def get_prompt(self, name: str = "system_prompt") -> Optional[StringPrompt]:
"""
Retrieve prompt using Weave refs.

Args:
name: Prompt name to retrieve

Returns:
StringPrompt object, None if not found
"""
if not self.is_enabled():
return None

try:
ref = weave.ref(name)
return ref.get()
except Exception as e:
logger.error(f"Failed to get prompt: {e}")
return None

def get_dataset(self, split: str = "train") -> Optional[Dataset]:
"""
Retrieve dataset using Weave refs.

Args:
split: Dataset split to retrieve

Returns:
Dataset object, None if not found
"""
if not self.is_enabled():
return None

try:
ref = weave.ref(f"dataset_{split}")
return ref.get()
except Exception as e:
logger.error(f"Failed to get dataset: {e}")
return None
Loading