Skip to content

Commit 5161d9a

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents ba6533b + 49f7b17 commit 5161d9a

33 files changed

+1405
-1818
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,4 @@ results/
183183
statistics/
184184
.embedding_cache/
185185
wandb/
186+
uv.lock

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ repos:
77
- id: trailing-whitespace
88
- id: end-of-file-fixer
99
- id: check-added-large-files
10-
- repo: https://github.com/psf/black
11-
rev: 25.1.0
10+
- repo: https://github.com/psf/black-pre-commit-mirror
11+
rev: 25.11.0
1212
hooks:
1313
- id: black
1414
- repo: https://github.com/astral-sh/ruff-pre-commit
15-
rev: 'v0.12.8'
15+
rev: 'v0.14.4'
1616
hooks:
1717
- id: ruff
1818
args: [--fix, --exit-non-zero-on-fix]

CHANGELOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,36 @@
11
# CHANGELOG
22

33

4+
## v0.1.1 (2025-08-27)
5+
6+
### Bug Fixes
7+
8+
- Make simulator workable
9+
([`0ceaa10`](https://github.com/EleutherAI/delphi/commit/0ceaa10bd7b1b7beb411c488ff348c06fb868a67))
10+
11+
* bump vllm dependency to latest
12+
13+
* change simulation scoring default to all at once for local models
14+
15+
* use Role class instead of hardcoding
16+
17+
* delete old oai simulator scorer
18+
19+
* refactor
20+
21+
* [pre-commit.ci] auto fixes from pre-commit.com hooks
22+
23+
for more information, see https://pre-commit.ci
24+
25+
* linter fix
26+
27+
* replace lambda with function
28+
29+
---------
30+
31+
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
32+
33+
434
## v0.1.0 (2025-06-17)
535

636
### Features

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ Install this library as a local editable installation. Run the following command
1616

1717
To run the default pipeline from the command line, use the following command:
1818

19-
`python -m delphi meta-llama/Meta-Llama-3-8B EleutherAI/sae-llama-3-8b-32x --n_tokens 10_000_000 --max_latents 100 --hookpoints layers.5 --scorers detection --filter_bos --name llama-3-8B`
19+
`python -m delphi EleutherAI/pythia-160m EleutherAI/Pythia-160m-SST-k32-32k --n_tokens 10_000_000 --max_latents 100 --hookpoints layers.5.mlp --scorers detection --filter_bos --name llama-3-8B`
2020

2121
This command will:
2222
1. Cache activations for the first 10 million tokens of the default dataset, `EleutherAI/SmolLM2-135M-10B`.
2323
2. Generate explanations for the first 100 features of layer 5 using the default explainer model, `hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4`.
2424
3. Score the explanations using the detection scorer.
2525
4. Log summary metrics including per-scorer F1 scores and confusion matrices, and produce histograms of the scorer classification accuracies.
2626

27-
The pipeline is highly configurable and can also be called programmatically (see the [end-to-end test](https://github.com/EleutherAI/delphi/blob/main/delphi/tests/e2e.py) for an example).
27+
The pipeline is highly configurable and can also be called programmatically (see the [end-to-end test](https://github.com/EleutherAI/delphi/blob/main/tests/e2e.py) for an example).
2828

2929
To use experimental features, create a custom pipeline. You can take inspiration from the main pipeline in [delphi.\_\_main\_\_](https://github.com/EleutherAI/delphi/blob/main/delphi/__main__.py).
3030

delphi/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.1.0"
1+
__version__ = "0.1.1"
22

33
import logging
44

delphi/__main__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ async def process_cache(
152152
if run_cfg.explainer_provider == "offline":
153153
llm_client = Offline(
154154
run_cfg.explainer_model,
155-
max_memory=0.9,
155+
max_memory=run_cfg.max_memory,
156156
# Explainer models context length - must be able to accommodate the longest
157157
# set of examples
158158
max_model_len=run_cfg.explainer_model_max_len,
@@ -262,13 +262,21 @@ def scorer_postprocess(result, score_dir, scorer_name=None):
262262
scorer_path.mkdir(parents=True, exist_ok=True)
263263

264264
if scorer_name == "simulation":
265-
scorer = OpenAISimulator(llm_client, tokenizer=tokenizer, all_at_once=False)
265+
if isinstance(llm_client, Offline):
266+
scorer = OpenAISimulator(
267+
llm_client, tokenizer=tokenizer, all_at_once=True
268+
)
269+
else:
270+
scorer = OpenAISimulator(
271+
llm_client, tokenizer=tokenizer, all_at_once=False
272+
)
266273
elif scorer_name == "fuzz":
267274
scorer = FuzzingScorer(
268275
llm_client,
269276
n_examples_shown=run_cfg.num_examples_per_scorer_prompt,
270277
verbose=run_cfg.verbose,
271278
log_prob=run_cfg.log_probs,
279+
fuzz_type=run_cfg.fuzz_type,
272280
)
273281
elif scorer_name == "detection":
274282
scorer = DetectionScorer(

delphi/clients/offline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
destroy_distributed_environment,
1212
destroy_model_parallel,
1313
)
14+
from vllm.inputs import TokensPrompt
1415

1516
from delphi import logger
1617

@@ -103,6 +104,7 @@ async def process_func(
103104
prompt = self.tokenizer.apply_chat_template(
104105
batch, add_generation_prompt=True, tokenize=True
105106
)
107+
prompt = TokensPrompt(prompt_token_ids=prompt)
106108
prompts.append(prompt)
107109
if self.statistics:
108110
non_cached_tokens = len(
@@ -121,7 +123,7 @@ async def process_func(
121123
None,
122124
partial(
123125
self.client.generate, # type: ignore
124-
prompt_token_ids=prompts,
126+
prompts,
125127
sampling_params=self.sampling_params,
126128
use_tqdm=False,
127129
),

delphi/config.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,18 @@ class RunConfig(Serializable):
156156
)
157157
"""Scorer methods to score latent explanations. Options are 'fuzz', 'detection',
158158
'simulation' and 'surprisal_intervention'."""
159+
fuzz_type: Literal["default", "active"] = "default"
160+
"""Type of fuzzing to use for the fuzz scorer. Default uses non-activating
161+
examples and highlights n_incorrect tokens. Active uses activating examples
162+
and highlights non-activating tokens."""
159163

160164
name: str = ""
161165
"""The name of the run. Results are saved in a directory with this name."""
162166

163167
max_latents: int | None = None
164168
"""Maximum number of features to explain for each sparse model."""
165169

166-
filter_bos: bool = False
170+
filter_bos: bool = True
167171
"""Whether to filter out BOS tokens from the cache."""
168172

169173
log_probs: bool = False
@@ -187,6 +191,11 @@ class RunConfig(Serializable):
187191
)
188192
"""Number of GPUs to use for explanation and scoring."""
189193

194+
max_memory: float = field(
195+
default=0.7,
196+
)
197+
"""Fraction of GPU memory to allocate to running explainer model."""
198+
190199
seed: int = field(
191200
default=22,
192201
)

delphi/pipeline.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,9 @@ async def process_item(self, item: Any, semaphore: asyncio.Semaphore) -> Any:
161161
async with semaphore:
162162
result = item
163163
for pipe in self.pipes:
164-
if result is not None:
165-
result = await pipe(result)
166-
else:
167-
pass
164+
if result is None:
165+
return None
166+
167+
result = await pipe(result)
168+
168169
return result

delphi/scorers/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from .embedding.example_embedding import ExampleEmbeddingScorer
66
from .intervention.surprisal_intervention_scorer import SurprisalInterventionScorer
77
from .scorer import Scorer
8-
from .simulator.oai_simulator import OpenAISimulator
8+
from .simulator.simulation.oai_simulator import (
9+
RefactoredOpenAISimulator as OpenAISimulator,
10+
)
911
from .surprisal.surprisal import SurprisalScorer
1012

1113
__all__ = [

0 commit comments

Comments
 (0)