diff --git a/pyrit/datasets/__init__.py b/pyrit/datasets/__init__.py index db59cafe6..bd5048dab 100644 --- a/pyrit/datasets/__init__.py +++ b/pyrit/datasets/__init__.py @@ -4,13 +4,21 @@ from pyrit.datasets.adv_bench_dataset import fetch_adv_bench_dataset from pyrit.datasets.aya_redteaming_dataset import fetch_aya_redteaming_dataset from pyrit.datasets.babelscape_alert_dataset import fetch_babelscape_alert_dataset +from pyrit.datasets.ccp_sensitive_prompts_dataset import fetch_ccp_sensitive_prompts_dataset from pyrit.datasets.darkbench_dataset import fetch_darkbench_dataset from pyrit.datasets.multilingual_vulnerability_dataset import fetch_multilingual_vulnerability_dataset from pyrit.datasets.decoding_trust_stereotypes_dataset import fetch_decoding_trust_stereotypes_dataset from pyrit.datasets.dataset_helper import fetch_examples +from pyrit.datasets.equitymedqa_dataset import fetch_equitymedqa_dataset_unique_values from pyrit.datasets.forbidden_questions_dataset import fetch_forbidden_questions_dataset from pyrit.datasets.harmbench_dataset import fetch_harmbench_dataset from pyrit.datasets.harmbench_multimodal_dataset import fetch_harmbench_multimodal_dataset_async +from pyrit.datasets.fetch_jailbreakv_28k_dataset import fetch_jailbreakv_28k_dataset +from pyrit.datasets.fetch_jbb_behaviors import ( + fetch_jbb_behaviors_dataset, + fetch_jbb_behaviors_by_harm_category, + fetch_jbb_behaviors_by_jbb_category, +) from pyrit.datasets.librAI_do_not_answer_dataset import fetch_librAI_do_not_answer_dataset from pyrit.datasets.llm_latent_adversarial_training_harmful_dataset import ( fetch_llm_latent_adversarial_training_harmful_dataset, @@ -23,17 +31,10 @@ from pyrit.datasets.seclists_bias_testing_dataset import fetch_seclists_bias_testing_dataset from pyrit.datasets.sosbench_dataset import fetch_sosbench_dataset from pyrit.datasets.tdc23_redteaming_dataset import fetch_tdc23_redteaming_dataset -from pyrit.datasets.wmdp_dataset import fetch_wmdp_dataset -from pyrit.datasets.xstest_dataset import fetch_xstest_dataset -from pyrit.datasets.equitymedqa_dataset import fetch_equitymedqa_dataset_unique_values from pyrit.datasets.text_jailbreak import TextJailBreak from pyrit.datasets.transphobia_awareness_dataset import fetch_transphobia_awareness_dataset -from pyrit.datasets.ccp_sensitive_prompts_dataset import fetch_ccp_sensitive_prompts_dataset -from pyrit.datasets.fetch_jbb_behaviors import ( - fetch_jbb_behaviors_dataset, - fetch_jbb_behaviors_by_harm_category, - fetch_jbb_behaviors_by_jbb_category, -) +from pyrit.datasets.wmdp_dataset import fetch_wmdp_dataset +from pyrit.datasets.xstest_dataset import fetch_xstest_dataset from pyrit.datasets.sorry_bench_dataset import fetch_sorry_bench_dataset @@ -68,4 +69,5 @@ "fetch_jbb_behaviors_dataset", "fetch_jbb_behaviors_by_harm_category", "fetch_jbb_behaviors_by_jbb_category", + "fetch_jailbreakv_28k_dataset", ] diff --git a/pyrit/datasets/fetch_jailbreakv_28k_dataset.py b/pyrit/datasets/fetch_jailbreakv_28k_dataset.py new file mode 100644 index 000000000..5a32ab5f6 --- /dev/null +++ b/pyrit/datasets/fetch_jailbreakv_28k_dataset.py @@ -0,0 +1,226 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging +import pathlib +import uuid +import zipfile +from typing import Dict, List, Literal, Optional + +from datasets import load_dataset + +from pyrit.models import SeedDataset, SeedPrompt + +logger = logging.getLogger(__name__) + +HarmLiteral = Literal[ + "Unethical Behavior", + "Economic Harm", + "Hate Speech", + "Government Decision", + "Physical Harm", + "Fraud", + "Political Sensitivity", + "Malware", + "Illegal Activity", + "Bias", + "Violence", + "Animal Abuse", + "Tailored Unlicensed Advice", + "Privacy Violation", + "Health Consultation", + "Child Abuse Content", +] + + +def fetch_jailbreakv_28k_dataset( + *, + data_home: Optional[str] = None, + zip_dir: str = str(pathlib.Path.home()), + split: Literal["JailBreakV_28K", "mini_JailBreakV_28K"] = "mini_JailBreakV_28K", + text_field: Literal["jailbreak_query", "redteam_query"] = "redteam_query", + harm_categories: Optional[List[HarmLiteral]] = None, + min_prompts: int = 50, +) -> SeedDataset: + """ + Fetch examples from the JailBreakV 28k Dataset with optional filtering and create a SeedPromptDataset. + Many images are missing from the dataset in HuggingFace and the team hosts the full image files in Google Drive. + As of 10/2025 the HF dataset is missing most images, so it is ignored. + To use this dataset, please fill out this form and download images from Google Drive: + https://docs.google.com/forms/d/e/1FAIpQLSc_p1kCs3p9z-3FbtSeF7uLYsiQk0tvsGi6F0e_z5xCEmN1gQ/viewform + And provide the path to the zip file in the zip_dir parameter. + + Args: + data_home: Directory used as cache_dir in call to HF to store cached data. Defaults to None. + If None, the default cache directory will be used. + zip_dir (str): The directory containing the zip file. Defaults to the home directory. + If the zip is not present there, an error is raised. + split (str): The split of the dataset to fetch. Defaults to "mini_JailBreakV_28K". + Options are "JailBreakV_28K" and "mini_JailBreakV_28K". + text_field (str): The field to use as the prompt text. Defaults to "redteam_query". + Options are "jailbreak_query" and "redteam_query". + harm_categories: List of harm categories to filter the examples. + Defaults to None, which means all categories are included. + Otherwise, only prompts with at least one matching category are included. + min_prompts (int): The minimum number of prompts to return. Defaults to 50. + If the number of prompts after filtering is less than this value, an error is raised. + + Returns: + SeedPromptDataset: A SeedPromptDataset containing the filtered examples. + + Note: + For more information and access to the original dataset and related materials, visit: + https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k/blob/main/README.md \n + Related paper: https://arxiv.org/abs/2404.03027 \n + The dataset license: MIT + Authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo + + Warning: + Due to the nature of these prompts, it may be advisable to consult your relevant legal + department before testing them with LLMs to ensure compliance and reduce potential risks. + """ + + source = "JailbreakV-28K/JailBreakV-28k" + + # Unzip the file if it is not already extracted + zip_file_path = pathlib.Path(zip_dir) / "JailBreakV_28K.zip" + zip_extracted_path = pathlib.Path(zip_dir) / "JailBreakV_28K" + if not zip_file_path.exists(): + raise FileNotFoundError("No zip file provided. Images not present for multimodal prompts.") + else: + # Only unzip if the target directory does not already exist + if not zip_extracted_path.exists(): + with zipfile.ZipFile(zip_file_path, "r") as zip_ref: + zip_ref.extractall(pathlib.Path(zip_dir)) + try: + logger.info(f"Loading JailBreakV-28k dataset from {source}") + + # Normalize the harm categories to match pyrit harm category conventions + harm_categories_normalized = ( + None if not harm_categories else [_normalize_policy(policy) for policy in harm_categories] + ) + + # Load the dataset from HuggingFace + data = load_dataset(source, "JailBreakV_28K", cache_dir=data_home) + + dataset_split = data[split] + + per_call_cache: Dict[str, str] = {} + + seed_prompts = [] + + # Define common metadata that will be used across all seed prompts + common_metadata = { + "dataset_name": "JailbreakV-28K", + "authors": ["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"], + "description": ( + "Benchmark for Assessing the Robustness of " + "Multimodal Large Language Models against Jailbreak Attacks. " + ), + "groups": ["The Ohio State University", "Peking University", "University of Wisconsin-Madison"], + "source": "https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k", + "name": "JailBreakV-28K", + } + + # tracker for items in the dataset where image_path does not match an image in the repo + missing_images = 0 + + for item in dataset_split: + policy = _normalize_policy(item.get("policy", "")) + # Skip if user requested policy filter and items policy does not match + if not (harm_categories_normalized) or policy in harm_categories_normalized: + image_rel_path = item.get("image_path", "") + image_abs_path = "" + if image_rel_path: + image_abs_path = _resolve_image_path( + rel_path=image_rel_path, + local_directory=zip_extracted_path, + call_cache=per_call_cache, + ) + if not image_abs_path: + missing_images += 1 + continue + + group_id = uuid.uuid4() + text_seed_prompt = SeedPrompt( + value=item.get(text_field, ""), + harm_categories=[policy], + prompt_group_id=group_id, + data_type="text", + **common_metadata, # type: ignore[arg-type] + ) + image_seed_prompt = SeedPrompt( + value=image_abs_path, + harm_categories=[policy], + prompt_group_id=group_id, + data_type="image_path", + **common_metadata, # type: ignore[arg-type] + ) + seed_prompts.append(text_seed_prompt) + seed_prompts.append(image_seed_prompt) + + except Exception as e: + logger.error(f"Failed to load JailBreakV-28K dataset: {str(e)}") + raise Exception(f"Error loading JailBreakV-28K dataset: {str(e)}") + if len(seed_prompts) < min_prompts: + raise ValueError( + f"JailBreakV-28K fetch produced {missing_images} missing images. " + f"Only {len(seed_prompts)} multimodal prompts were produced. " + f"This is below the minimum required prompts of {min_prompts}. " + f"Please ensure the zip_dir parameter is provided with the full image set or " + f"check your backup image source." + ) + elif missing_images > 0: + logger.warning(f"Failed to resolve {missing_images} image paths in JailBreakV-28K dataset") + if not seed_prompts: + raise ValueError( + "JailBreakV-28K fetch produced 0 prompts. " + "Likely caused by all items returned after filtering having invalid image paths." + ) + seed_prompt_dataset = SeedDataset(prompts=seed_prompts) + return seed_prompt_dataset + + +def _normalize_policy(policy: str) -> str: + """Create a machine-friendly variant alongside the human-readable policy.""" + return policy.strip().lower().replace(" ", "_").replace("-", "_") + + +def _resolve_image_path( + *, + rel_path: str, + local_directory: pathlib.Path = pathlib.Path.home(), + call_cache: Dict[str, str] = {}, +) -> str: + """ + Resolve a repo-relative image path to a local absolute path using hf_hub_download. + Uses a cache (module-level by default) to avoid re-downloading the same file. + + Args: + rel_path: path relative to the dataset repository root (e.g., "images/0001.png"). + local_directory: Directory to search for the image, defaults + cache: optional dict to use instead of the module-level cache. + + Returns: + Absolute local path if resolved, else None (and caches the miss). + """ + if not rel_path: + return "" + + # check if image has already been cached + if rel_path in call_cache: + return call_cache[rel_path] + image_path = local_directory / rel_path + try: + if image_path and image_path.exists(): + abs_path = str(image_path) + else: + logger.debug(f"File {image_path} in {local_directory}") + abs_path = "" + + call_cache[rel_path] = abs_path + return abs_path + except Exception as e: + logger.error(f"Failed to download image {rel_path}: {str(e)}") + call_cache[rel_path] = "" + return "" diff --git a/tests/unit/datasets/test_exists_integration_test.py b/tests/unit/datasets/test_exists_integration_test.py index bcc7ec44a..706541b41 100644 --- a/tests/unit/datasets/test_exists_integration_test.py +++ b/tests/unit/datasets/test_exists_integration_test.py @@ -35,7 +35,7 @@ def test_all_fetch_functions_are_tested(): tested_fetch_functions = get_tested_fetch_functions(test_file_path) - missing = fetch_functions - tested_fetch_functions - set(["fetch_examples"]) + missing = fetch_functions - tested_fetch_functions - set(["fetch_examples", "fetch_jailbreakv_28k_dataset"]) assert not missing, ( f"The following fetch_* functions from pyrit.datasets are not tested in " f"test_fetch_datasets.py: {sorted(missing)}" diff --git a/tests/unit/datasets/test_fetch_jailbreakv_28k.py b/tests/unit/datasets/test_fetch_jailbreakv_28k.py new file mode 100644 index 000000000..09b88294b --- /dev/null +++ b/tests/unit/datasets/test_fetch_jailbreakv_28k.py @@ -0,0 +1,110 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import pathlib +from contextlib import nullcontext +from unittest.mock import MagicMock, patch + +import pytest + +from pyrit.datasets.fetch_jailbreakv_28k_dataset import fetch_jailbreakv_28k_dataset +from pyrit.models import SeedDataset, SeedPrompt + + +class TestFetchJailbreakv28kDataset: + """Test suite for the fetch_jailbreakv_28k_dataset function.""" + + @pytest.mark.parametrize("text_field", [None, "jailbreak_query"]) + @pytest.mark.parametrize( + "harm_categories", + [None, ["Economic Harm"], ["Government Decision"]], + ) + @pytest.mark.parametrize("min_prompts", [0, 2, 5]) + @patch("pyrit.datasets.fetch_jailbreakv_28k_dataset.pathlib.Path") + @patch("pyrit.datasets.fetch_jailbreakv_28k_dataset._resolve_image_path") + @patch("pyrit.datasets.fetch_jailbreakv_28k_dataset.load_dataset") + def test_fetch_jailbreakv_28k_dataset_success( + self, mock_load_dataset, mock_resolve_image_path, mock_pathlib, text_field, harm_categories, min_prompts + ): + # Mock Path to simulate zip file exists and is already extracted + mock_zip_path = MagicMock() + mock_zip_path.exists.return_value = True + mock_extracted_path = MagicMock() + mock_extracted_path.exists.return_value = True + + mock_pathlib.Path.return_value.__truediv__.side_effect = [ + mock_zip_path, # First call: zip_file_path + mock_extracted_path, # Second call: zip_extracted_path + mock_extracted_path, # Additional calls for image resolution + ] + # Mock dataset response + mock_dataset = { + "mini_JailBreakV_28K": [ + { + "redteam_query": "test query 1", + "jailbreak_query": "jailbreak: test query 1", + "policy": "Economic Harm", + "image_path": "mock_folder/valid", + }, + { + "redteam_query": "test query 2", + "jailbreak_query": "jailbreak: test query 2", + "policy": "Government Decision", + "image_path": "invalid", + }, + { + "redteam_query": "test query 3", + "jailbreak_query": "jailbreak: test query 3", + "policy": "Fraud", + "image_path": "mock_folder/valid", + }, + ] + } + mock_load_dataset.return_value = mock_dataset + + def fake_resolve_image_path( + *, rel_path: str = "", local_directory: pathlib.Path = pathlib.Path(), **kwargs + ) -> str: + return "" if rel_path == "invalid" else f"mock_path/{rel_path}" + + mock_resolve_image_path.side_effect = fake_resolve_image_path + + # Call the function + # Select context: expect error only for this filter + expect_error = ( + harm_categories == ["Government Decision"] + or (min_prompts == 1 and harm_categories == ["Government Decision"]) + or min_prompts == 5 + ) + ctx = pytest.raises(ValueError) if expect_error else nullcontext() + + # Single call + with ctx: + result = fetch_jailbreakv_28k_dataset( + text_field=text_field, harm_categories=harm_categories, min_prompts=min_prompts + ) + if expect_error: + return + # Assertions + + assert isinstance(result, SeedDataset) + if harm_categories is None: + assert len(result.prompts) == 4 + assert sum(p.data_type == "text" for p in result.prompts) == 2 + assert sum(p.data_type == "image_path" for p in result.prompts) == 2 + elif harm_categories == ["Economic Harm"]: + assert len(result.prompts) == 2 + assert sum(p.data_type == "text" for p in result.prompts) == 1 + assert sum(p.data_type == "image_path" for p in result.prompts) == 1 + print(result.prompts) + assert result.prompts[0].harm_categories == ["economic_harm"] + assert all(isinstance(prompt, SeedPrompt) for prompt in result.prompts) + print(result.prompts) + if text_field == "jailbreak_query": + for prompt in result.prompts: + if prompt.data_type == "text": + assert "jailbreak" in prompt.value + else: + for prompt in result.prompts: + if prompt.data_type == "text": + assert "jailbreak" not in prompt.value