Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions pyrit/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,21 @@
from pyrit.datasets.adv_bench_dataset import fetch_adv_bench_dataset
from pyrit.datasets.aya_redteaming_dataset import fetch_aya_redteaming_dataset
from pyrit.datasets.babelscape_alert_dataset import fetch_babelscape_alert_dataset
from pyrit.datasets.ccp_sensitive_prompts_dataset import fetch_ccp_sensitive_prompts_dataset
from pyrit.datasets.darkbench_dataset import fetch_darkbench_dataset
from pyrit.datasets.multilingual_vulnerability_dataset import fetch_multilingual_vulnerability_dataset
from pyrit.datasets.decoding_trust_stereotypes_dataset import fetch_decoding_trust_stereotypes_dataset
from pyrit.datasets.dataset_helper import fetch_examples
from pyrit.datasets.equitymedqa_dataset import fetch_equitymedqa_dataset_unique_values
from pyrit.datasets.forbidden_questions_dataset import fetch_forbidden_questions_dataset
from pyrit.datasets.harmbench_dataset import fetch_harmbench_dataset
from pyrit.datasets.harmbench_multimodal_dataset import fetch_harmbench_multimodal_dataset_async
from pyrit.datasets.fetch_jailbreakv_28k_dataset import fetch_jailbreakv_28k_dataset
from pyrit.datasets.fetch_jbb_behaviors import (
fetch_jbb_behaviors_dataset,
fetch_jbb_behaviors_by_harm_category,
fetch_jbb_behaviors_by_jbb_category,
)
from pyrit.datasets.librAI_do_not_answer_dataset import fetch_librAI_do_not_answer_dataset
from pyrit.datasets.llm_latent_adversarial_training_harmful_dataset import (
fetch_llm_latent_adversarial_training_harmful_dataset,
Expand All @@ -23,17 +31,10 @@
from pyrit.datasets.seclists_bias_testing_dataset import fetch_seclists_bias_testing_dataset
from pyrit.datasets.sosbench_dataset import fetch_sosbench_dataset
from pyrit.datasets.tdc23_redteaming_dataset import fetch_tdc23_redteaming_dataset
from pyrit.datasets.wmdp_dataset import fetch_wmdp_dataset
from pyrit.datasets.xstest_dataset import fetch_xstest_dataset
from pyrit.datasets.equitymedqa_dataset import fetch_equitymedqa_dataset_unique_values
from pyrit.datasets.text_jailbreak import TextJailBreak
from pyrit.datasets.transphobia_awareness_dataset import fetch_transphobia_awareness_dataset
from pyrit.datasets.ccp_sensitive_prompts_dataset import fetch_ccp_sensitive_prompts_dataset
from pyrit.datasets.fetch_jbb_behaviors import (
fetch_jbb_behaviors_dataset,
fetch_jbb_behaviors_by_harm_category,
fetch_jbb_behaviors_by_jbb_category,
)
from pyrit.datasets.wmdp_dataset import fetch_wmdp_dataset
from pyrit.datasets.xstest_dataset import fetch_xstest_dataset
from pyrit.datasets.sorry_bench_dataset import fetch_sorry_bench_dataset


Expand Down Expand Up @@ -68,4 +69,5 @@
"fetch_jbb_behaviors_dataset",
"fetch_jbb_behaviors_by_harm_category",
"fetch_jbb_behaviors_by_jbb_category",
"fetch_jailbreakv_28k_dataset",
]
226 changes: 226 additions & 0 deletions pyrit/datasets/fetch_jailbreakv_28k_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
import pathlib
import uuid
import zipfile
from typing import Dict, List, Literal, Optional

from datasets import load_dataset

from pyrit.models import SeedDataset, SeedPrompt

logger = logging.getLogger(__name__)

HarmLiteral = Literal[
"Unethical Behavior",
"Economic Harm",
"Hate Speech",
"Government Decision",
"Physical Harm",
"Fraud",
"Political Sensitivity",
"Malware",
"Illegal Activity",
"Bias",
"Violence",
"Animal Abuse",
"Tailored Unlicensed Advice",
"Privacy Violation",
"Health Consultation",
"Child Abuse Content",
]


def fetch_jailbreakv_28k_dataset(
*,
data_home: Optional[str] = None,
zip_dir: str = str(pathlib.Path.home()),
split: Literal["JailBreakV_28K", "mini_JailBreakV_28K"] = "mini_JailBreakV_28K",
text_field: Literal["jailbreak_query", "redteam_query"] = "redteam_query",
harm_categories: Optional[List[HarmLiteral]] = None,
min_prompts: int = 50,
) -> SeedDataset:
"""
Fetch examples from the JailBreakV 28k Dataset with optional filtering and create a SeedPromptDataset.
Many images are missing from the dataset in HuggingFace and the team hosts the full image files in Google Drive.
As of 10/2025 the HF dataset is missing most images, so it is ignored.
To use this dataset, please fill out this form and download images from Google Drive:
https://docs.google.com/forms/d/e/1FAIpQLSc_p1kCs3p9z-3FbtSeF7uLYsiQk0tvsGi6F0e_z5xCEmN1gQ/viewform
And provide the path to the zip file in the zip_dir parameter.

Args:
data_home: Directory used as cache_dir in call to HF to store cached data. Defaults to None.
If None, the default cache directory will be used.
zip_dir (str): The directory containing the zip file. Defaults to the home directory.
If the zip is not present there, an error is raised.
split (str): The split of the dataset to fetch. Defaults to "mini_JailBreakV_28K".
Options are "JailBreakV_28K" and "mini_JailBreakV_28K".
text_field (str): The field to use as the prompt text. Defaults to "redteam_query".
Options are "jailbreak_query" and "redteam_query".
harm_categories: List of harm categories to filter the examples.
Defaults to None, which means all categories are included.
Otherwise, only prompts with at least one matching category are included.
min_prompts (int): The minimum number of prompts to return. Defaults to 50.
If the number of prompts after filtering is less than this value, an error is raised.

Returns:
SeedPromptDataset: A SeedPromptDataset containing the filtered examples.

Note:
For more information and access to the original dataset and related materials, visit:
https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k/blob/main/README.md \n
Related paper: https://arxiv.org/abs/2404.03027 \n
The dataset license: MIT
Authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo

Warning:
Due to the nature of these prompts, it may be advisable to consult your relevant legal
department before testing them with LLMs to ensure compliance and reduce potential risks.
"""

source = "JailbreakV-28K/JailBreakV-28k"

# Unzip the file if it is not already extracted
zip_file_path = pathlib.Path(zip_dir) / "JailBreakV_28K.zip"
zip_extracted_path = pathlib.Path(zip_dir) / "JailBreakV_28K"
if not zip_file_path.exists():
raise FileNotFoundError("No zip file provided. Images not present for multimodal prompts.")
else:
# Only unzip if the target directory does not already exist
if not zip_extracted_path.exists():
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
zip_ref.extractall(pathlib.Path(zip_dir))
try:
logger.info(f"Loading JailBreakV-28k dataset from {source}")

# Normalize the harm categories to match pyrit harm category conventions
harm_categories_normalized = (
None if not harm_categories else [_normalize_policy(policy) for policy in harm_categories]
)

# Load the dataset from HuggingFace
data = load_dataset(source, "JailBreakV_28K", cache_dir=data_home)

dataset_split = data[split]

per_call_cache: Dict[str, str] = {}

seed_prompts = []

# Define common metadata that will be used across all seed prompts
common_metadata = {
"dataset_name": "JailbreakV-28K",
"authors": ["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"],
"description": (
"Benchmark for Assessing the Robustness of "
"Multimodal Large Language Models against Jailbreak Attacks. "
),
"groups": ["The Ohio State University", "Peking University", "University of Wisconsin-Madison"],
"source": "https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k",
"name": "JailBreakV-28K",
}

# tracker for items in the dataset where image_path does not match an image in the repo
missing_images = 0

for item in dataset_split:
policy = _normalize_policy(item.get("policy", ""))
# Skip if user requested policy filter and items policy does not match
if not (harm_categories_normalized) or policy in harm_categories_normalized:
image_rel_path = item.get("image_path", "")
image_abs_path = ""
if image_rel_path:
image_abs_path = _resolve_image_path(
rel_path=image_rel_path,
local_directory=zip_extracted_path,
call_cache=per_call_cache,
)
if not image_abs_path:
missing_images += 1
continue

group_id = uuid.uuid4()
text_seed_prompt = SeedPrompt(
value=item.get(text_field, ""),
harm_categories=[policy],
prompt_group_id=group_id,
data_type="text",
**common_metadata, # type: ignore[arg-type]
)
image_seed_prompt = SeedPrompt(
value=image_abs_path,
harm_categories=[policy],
prompt_group_id=group_id,
data_type="image_path",
**common_metadata, # type: ignore[arg-type]
)
seed_prompts.append(text_seed_prompt)
seed_prompts.append(image_seed_prompt)

except Exception as e:
logger.error(f"Failed to load JailBreakV-28K dataset: {str(e)}")
raise Exception(f"Error loading JailBreakV-28K dataset: {str(e)}")
if len(seed_prompts) < min_prompts:
raise ValueError(
f"JailBreakV-28K fetch produced {missing_images} missing images. "
f"Only {len(seed_prompts)} multimodal prompts were produced. "
f"This is below the minimum required prompts of {min_prompts}. "
f"Please ensure the zip_dir parameter is provided with the full image set or "
f"check your backup image source."
)
elif missing_images > 0:
logger.warning(f"Failed to resolve {missing_images} image paths in JailBreakV-28K dataset")
if not seed_prompts:
raise ValueError(
"JailBreakV-28K fetch produced 0 prompts. "
"Likely caused by all items returned after filtering having invalid image paths."
)
seed_prompt_dataset = SeedDataset(prompts=seed_prompts)
return seed_prompt_dataset


def _normalize_policy(policy: str) -> str:
"""Create a machine-friendly variant alongside the human-readable policy."""
return policy.strip().lower().replace(" ", "_").replace("-", "_")


def _resolve_image_path(
*,
rel_path: str,
local_directory: pathlib.Path = pathlib.Path.home(),
call_cache: Dict[str, str] = {},
) -> str:
"""
Resolve a repo-relative image path to a local absolute path using hf_hub_download.
Uses a cache (module-level by default) to avoid re-downloading the same file.

Args:
rel_path: path relative to the dataset repository root (e.g., "images/0001.png").
local_directory: Directory to search for the image, defaults
cache: optional dict to use instead of the module-level cache.

Returns:
Absolute local path if resolved, else None (and caches the miss).
"""
if not rel_path:
return ""

# check if image has already been cached
if rel_path in call_cache:
return call_cache[rel_path]
image_path = local_directory / rel_path
try:
if image_path and image_path.exists():
abs_path = str(image_path)
else:
logger.debug(f"File {image_path} in {local_directory}")
abs_path = ""

call_cache[rel_path] = abs_path
return abs_path
except Exception as e:
logger.error(f"Failed to download image {rel_path}: {str(e)}")
call_cache[rel_path] = ""
return ""
2 changes: 1 addition & 1 deletion tests/unit/datasets/test_exists_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_all_fetch_functions_are_tested():

tested_fetch_functions = get_tested_fetch_functions(test_file_path)

missing = fetch_functions - tested_fetch_functions - set(["fetch_examples"])
missing = fetch_functions - tested_fetch_functions - set(["fetch_examples", "fetch_jailbreakv_28k_dataset"])
assert not missing, (
f"The following fetch_* functions from pyrit.datasets are not tested in "
f"test_fetch_datasets.py: {sorted(missing)}"
Expand Down
110 changes: 110 additions & 0 deletions tests/unit/datasets/test_fetch_jailbreakv_28k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import pathlib
from contextlib import nullcontext
from unittest.mock import MagicMock, patch

import pytest

from pyrit.datasets.fetch_jailbreakv_28k_dataset import fetch_jailbreakv_28k_dataset
from pyrit.models import SeedDataset, SeedPrompt


class TestFetchJailbreakv28kDataset:
"""Test suite for the fetch_jailbreakv_28k_dataset function."""

@pytest.mark.parametrize("text_field", [None, "jailbreak_query"])
@pytest.mark.parametrize(
"harm_categories",
[None, ["Economic Harm"], ["Government Decision"]],
)
@pytest.mark.parametrize("min_prompts", [0, 2, 5])
@patch("pyrit.datasets.fetch_jailbreakv_28k_dataset.pathlib.Path")
@patch("pyrit.datasets.fetch_jailbreakv_28k_dataset._resolve_image_path")
@patch("pyrit.datasets.fetch_jailbreakv_28k_dataset.load_dataset")
def test_fetch_jailbreakv_28k_dataset_success(
self, mock_load_dataset, mock_resolve_image_path, mock_pathlib, text_field, harm_categories, min_prompts
):
# Mock Path to simulate zip file exists and is already extracted
mock_zip_path = MagicMock()
mock_zip_path.exists.return_value = True
mock_extracted_path = MagicMock()
mock_extracted_path.exists.return_value = True

mock_pathlib.Path.return_value.__truediv__.side_effect = [
mock_zip_path, # First call: zip_file_path
mock_extracted_path, # Second call: zip_extracted_path
mock_extracted_path, # Additional calls for image resolution
]
# Mock dataset response
mock_dataset = {
"mini_JailBreakV_28K": [
{
"redteam_query": "test query 1",
"jailbreak_query": "jailbreak: test query 1",
"policy": "Economic Harm",
"image_path": "mock_folder/valid",
},
{
"redteam_query": "test query 2",
"jailbreak_query": "jailbreak: test query 2",
"policy": "Government Decision",
"image_path": "invalid",
},
{
"redteam_query": "test query 3",
"jailbreak_query": "jailbreak: test query 3",
"policy": "Fraud",
"image_path": "mock_folder/valid",
},
]
}
mock_load_dataset.return_value = mock_dataset

def fake_resolve_image_path(
*, rel_path: str = "", local_directory: pathlib.Path = pathlib.Path(), **kwargs
) -> str:
return "" if rel_path == "invalid" else f"mock_path/{rel_path}"

mock_resolve_image_path.side_effect = fake_resolve_image_path

# Call the function
# Select context: expect error only for this filter
expect_error = (
harm_categories == ["Government Decision"]
or (min_prompts == 1 and harm_categories == ["Government Decision"])
or min_prompts == 5
)
ctx = pytest.raises(ValueError) if expect_error else nullcontext()

# Single call
with ctx:
result = fetch_jailbreakv_28k_dataset(
text_field=text_field, harm_categories=harm_categories, min_prompts=min_prompts
)
if expect_error:
return
# Assertions

assert isinstance(result, SeedDataset)
if harm_categories is None:
assert len(result.prompts) == 4
assert sum(p.data_type == "text" for p in result.prompts) == 2
assert sum(p.data_type == "image_path" for p in result.prompts) == 2
elif harm_categories == ["Economic Harm"]:
assert len(result.prompts) == 2
assert sum(p.data_type == "text" for p in result.prompts) == 1
assert sum(p.data_type == "image_path" for p in result.prompts) == 1
print(result.prompts)
assert result.prompts[0].harm_categories == ["economic_harm"]
assert all(isinstance(prompt, SeedPrompt) for prompt in result.prompts)
print(result.prompts)
if text_field == "jailbreak_query":
for prompt in result.prompts:
if prompt.data_type == "text":
assert "jailbreak" in prompt.value
else:
for prompt in result.prompts:
if prompt.data_type == "text":
assert "jailbreak" not in prompt.value