diff --git a/.gitignore b/.gitignore index 0fe1f9aa7..f2b181315 100644 --- a/.gitignore +++ b/.gitignore @@ -194,3 +194,4 @@ doc/_autosummary/ # Ignore notebooks directory in Docker folder /docker/notebooks/ /docker/notebooks/* +pyrit/auxiliary_attacks/gcg/experiments/mlruns/* diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb index 7ad6fdf14..d0c8b3ea4 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb @@ -42,23 +42,21 @@ "execution_count": null, "id": "4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "romanlutz\n" - ] - } - ], + "outputs": [], "source": [ + "from pyrit.common import DUCK_DB, initialize_pyrit\n", "import os\n", "\n", + "# Technically, we don't need to initialize PyRIT locally as the job is running in Azure.\n", + "# However, we store environment variables in .env or .env.local files, which are read by PyRIT.\n", + "# If you set your environment variables differently, you can skip this step.\n", + "initialize_pyrit(memory_db_type=DUCK_DB)\n", + "\n", "# Enter details of your AML workspace\n", "subscription_id = os.environ.get(\"AZURE_SUBSCRIPTION_ID\")\n", "resource_group = os.environ.get(\"AZURE_RESOURCE_GROUP\")\n", "workspace = os.environ.get(\"AZURE_ML_WORKSPACE_NAME\")\n", - "print(workspace)" + "print(subscription_id, resource_group, workspace)" ] }, { @@ -101,18 +99,7 @@ "metadata": { "lines_to_next_cell": 2 }, - "outputs": [ - { - "data": { - "text/plain": [ - "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit', 'description': 'PyRIT environment created from a Docker context.', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/romanlutz/providers/Microsoft.MachineLearningServices/workspaces/romanlutz/environments/pyrit/versions/5', 'Resource__source_path': '', 'base_path': 'c:\\\\Users\\\\Roman\\\\git\\\\PyRIT\\\\doc\\\\code\\\\auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '5', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -186,38 +173,30 @@ "execution_count": null, "id": "12", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.\n", - "\n", - "Example: azcopy copy 'C:\\Users\\Roman\\git\\PyRIT' 'https://romanlutz0437468309.blob.core.windows.net/3f52e8b9-0bac-4c48-9e4a-a92e85a582c4-10s61nn9uso4b2p89xjypawyc7/PyRIT' \n", - "\n", - "See https://learn.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.\n", - "\u001b[32mUploading PyRIT (194.65 MBs): 100%|##########| 194652493/194652493 [01:19<00:00, 2447407.71it/s] \n", - "\u001b[39m\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Submit the command\n", "returned_job = ml_client.create_or_update(job)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfe5c544", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all" }, + "kernelspec": { + "display_name": "pyrit-python313-fresh2", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py index 996821994..784512300 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py @@ -6,9 +6,9 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.16.4 +# jupytext_version: 1.17.1 # kernelspec: -# display_name: pyrit-dev +# display_name: pyrit-python313-fresh2 # language: python # name: python3 # --- @@ -31,13 +31,19 @@ # To connect to a workspace, we need identifier parameters - a subscription, resource group and workspace name. We will use these details in the `MLClient` from `azure.ai.ml` to get a handle to the required AML workspace. We use the [default Azure authentication](https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python) for this tutorial. # %% +from pyrit.common import DUCK_DB, initialize_pyrit import os +# Technically, we don't need to initialize PyRIT locally as the job is running in Azure. +# However, we store environment variables in .env or .env.local files, which are read by PyRIT. +# If you set your environment variables differently, you can skip this step. +initialize_pyrit(memory_db_type=DUCK_DB) + # Enter details of your AML workspace subscription_id = os.environ.get("AZURE_SUBSCRIPTION_ID") resource_group = os.environ.get("AZURE_RESOURCE_GROUP") workspace = os.environ.get("AZURE_ML_WORKSPACE_NAME") -print(workspace) +print(subscription_id, resource_group, workspace) # %% from azure.ai.ml import MLClient @@ -100,9 +106,11 @@ resources=JobResourceConfiguration( instance_type="Standard_NC96ads_A100_v4", instance_count=1, - ), + ) ) # %% # Submit the command returned_job = ml_client.create_or_update(job) + +# %% diff --git a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py index b2b977a42..5c8c1a2fd 100644 --- a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py +++ b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py @@ -16,8 +16,6 @@ import torch import torch.multiprocessing as mp import torch.nn as nn -from fastchat.conversation import Conversation, SeparatorStyle -from fastchat.model import get_conversation_template from transformers import ( AutoModelForCausalLM, AutoTokenizer, @@ -125,7 +123,6 @@ def __init__( goal, target, tokenizer, - conv_template, control_init="! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes=["I'm sorry", "I apologize", "As an", "I'm just", "I'm an", "I am", "I cannot", "Hello!"], *args, @@ -155,117 +152,104 @@ def __init__( self.target = target self.control = control_init self.tokenizer = tokenizer - self.conv_template = conv_template self.test_prefixes = test_prefixes - self.conv_template.messages = [] - self.test_new_toks = len(self.tokenizer(self.target).input_ids) + 2 # buffer for prefix in self.test_prefixes: self.test_new_toks = max(self.test_new_toks, len(self.tokenizer(prefix).input_ids)) self._update_ids() - + def _detect_assistant_role(self, prompt, target_prompt): + """Dynamically detect the assistant role tag from the actual prompt, + instead of using model specific assistant role tag from fastchat""" + # common assistant role patterns to try + assistant_patterns = [ + "<|assistant|>", + "assistant:" + ] + + # checking for any assistant pattern in the prompt + for pattern in assistant_patterns: + pos = prompt.find(pattern) + if pos != -1: + return pos, pattern + + # if not, look for target and work backwards + if target_prompt != -1: + search_start = max(0, target_prompt - 200) + search_text = prompt[search_start:target_prompt] + + for pattern in assistant_patterns: + pos = search_text.rfind(pattern) + if pos != -1: + return search_start + pos, pattern + + # if not, assume assistant role is just before target + if target_prompt != -1: + before_target = prompt[:target_prompt] + last_newline = before_target.rfind('\n') + if last_newline != -1: + return last_newline + 1, "assistant" + else: + return target_prompt, "assistant" + + return 0, "assistant" + # new _update_ids using apply_chat_template def _update_ids(self): + conv_roles=("<|user|>", "<|assistant|>") + messages = [ + {"role": "user", "content": f"{self.goal} {self.control}"}, + {"role": "assistant", "content": f"{self.target}"}, + ] - self.conv_template.append_message(self.conv_template.roles[0], f"{self.goal} {self.control}") - self.conv_template.append_message(self.conv_template.roles[1], f"{self.target}") - prompt = self.conv_template.get_prompt() - + print(f"prompt constructed from, {messages=}") + prompt = self.tokenizer.apply_chat_template(messages, tokenize=False) + print(f"_update_ids new version - {self.goal=}, {self.control=}, {self.target=}") + # print(f"{conv_roles[0]=}, {conv_roles[1]=}") + print(f"checking prompt before encoding (new), {prompt=}") + encoding = self.tokenizer(prompt) toks = encoding.input_ids + print(f"{prompt.find(conv_roles[1])=}") + goal_prompt = prompt.find(self.goal) + control_prompt = prompt.find(self.control) + target_prompt = prompt.find(self.target) + # assistant_tag = prompt.find(conv_roles[1]) # this only works for phi-3-mini + + # Dynamically detect the assistant role tag + assistant_tag, assistant_role = self._detect_assistant_role(prompt, target_prompt) + print(f"{assistant_tag=}, {assistant_role=}") + + # Calculate the slice + self._assistant_role_slice = slice( + encoding.char_to_token(assistant_tag), + encoding.char_to_token(assistant_tag + len(assistant_role)), + ) + self._goal_slice = slice( + encoding.char_to_token(goal_prompt), + encoding.char_to_token(goal_prompt + len(self.goal)), + ) + self._control_slice = slice( + encoding.char_to_token(control_prompt), + encoding.char_to_token(control_prompt + len(self.control)), + ) + # self._assistant_role_slice = slice( + # encoding.char_to_token(assistant_tag), + # encoding.char_to_token( + # assistant_tag + len(conv_roles[1]) + 1 + # ), + # ) + self._target_slice = slice( + encoding.char_to_token(target_prompt), + encoding.char_to_token(target_prompt + len(self.target)), + ) + self._loss_slice = slice( + encoding.char_to_token(target_prompt) - 1, + encoding.char_to_token(target_prompt + len(self.target)) - 1, + ) - if self.conv_template.name == "llama-2" or self.conv_template.name == "llama-3": - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks))) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks)) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 2) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 3) - - else: - python_tokenizer = False or self.conv_template.name == "oasst_pythia" - try: - encoding.char_to_token(len(prompt) - 1) - except Exception: - python_tokenizer = True - if python_tokenizer: - # This is specific to the vicuna and pythia tokenizer and conversation prompt. - # It will not work with other tokenizers or prompts. - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks) - 1)) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks) - 1) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 1) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 2) - else: - self._system_slice = slice(None, encoding.char_to_token(len(self.conv_template.system))) - self._user_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[0])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[0]) + len(self.conv_template.roles[0]) + 1 - ), - ) - self._goal_slice = slice( - encoding.char_to_token(prompt.find(self.goal)), - encoding.char_to_token(prompt.find(self.goal) + len(self.goal)), - ) - self._control_slice = slice( - encoding.char_to_token(prompt.find(self.control)), - encoding.char_to_token(prompt.find(self.control) + len(self.control)), - ) - self._assistant_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[1])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[1]) + len(self.conv_template.roles[1]) + 1 - ), - ) - self._target_slice = slice( - encoding.char_to_token(prompt.find(self.target)), - encoding.char_to_token(prompt.find(self.target) + len(self.target)), - ) - self._loss_slice = slice( - encoding.char_to_token(prompt.find(self.target)) - 1, - encoding.char_to_token(prompt.find(self.target) + len(self.target)) - 1, - ) - - self.input_ids = torch.tensor(toks[: self._target_slice.stop], device="cpu") - self.conv_template.messages = [] - + self.input_ids = torch.tensor(toks[:self._target_slice.stop], device="cpu") + @torch.no_grad() def generate(self, model, gen_config=None): if gen_config is None: @@ -457,7 +441,6 @@ def __init__( goals, targets, tokenizer, - conv_template, control_init="! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes=["I'm sorry", "I apologize", "As an", "I'm just", "I'm an", "I am", "I cannot", "Hello!"], managers=None, @@ -494,7 +477,7 @@ def __init__( self.tokenizer = tokenizer self._prompts = [ - managers["AP"](goal, target, tokenizer, conv_template, control_init, test_prefixes) + managers["AP"](goal, target, tokenizer, control_init, test_prefixes) for goal, target in zip(goals, targets) ] @@ -637,7 +620,7 @@ def __init__( self.logfile = logfile self.prompts = [ managers["PM"]( - goals, targets, worker.tokenizer, worker.conv_template, control_init, test_prefixes, managers + goals, targets, worker.tokenizer, control_init, test_prefixes, managers ) for worker in workers ] @@ -817,7 +800,6 @@ def test_all(self): self.goals + self.test_goals, self.targets + self.test_targets, worker.tokenizer, - worker.conv_template, self.control_str, self.test_prefixes, self.managers, @@ -961,7 +943,7 @@ def __init__( self.test_workers = test_workers self.progressive_goals = progressive_goals self.progressive_models = progressive_models - self.control = control_init + self.control = control_init # conv template setting self.test_prefixes = test_prefixes self.logfile = logfile self.managers = managers @@ -984,7 +966,6 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, } for worker in self.workers ], @@ -992,7 +973,6 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, } for worker in self.test_workers ], @@ -1222,7 +1202,6 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, } for worker in self.workers ], @@ -1230,7 +1209,6 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, } for worker in self.test_workers ], @@ -1432,8 +1410,7 @@ def __init__( "models": [ { "model_path": worker.model.name_or_path, - "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "tokenizer_path": worker.tokenizer.name_or_path } for worker in self.workers ], @@ -1441,7 +1418,6 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, } for worker in self.test_workers ], @@ -1559,7 +1535,7 @@ def run(self, steps, controls, batch_size, max_new_len=60, verbose=True): class ModelWorker(object): - def __init__(self, model_path, token, model_kwargs, tokenizer, conv_template, device): + def __init__(self, model_path, token, model_kwargs, tokenizer, device): self.model = ( AutoModelForCausalLM.from_pretrained( model_path, token=token, torch_dtype=torch.float16, trust_remote_code=False, **model_kwargs @@ -1568,7 +1544,6 @@ def __init__(self, model_path, token, model_kwargs, tokenizer, conv_template, de .eval() ) self.tokenizer = tokenizer - self.conv_template = conv_template self.tasks = mp.JoinableQueue() self.results = mp.JoinableQueue() self.process = None @@ -1619,8 +1594,7 @@ def get_workers(params, eval=False): tokenizers = [] for i in range(len(params.tokenizer_paths)): tokenizer = AutoTokenizer.from_pretrained( - params.tokenizer_paths[i], token=params.token, trust_remote_code=False, **params.tokenizer_kwargs[i] - ) + params.tokenizer_paths[i], token=params.token, trust_remote_code=False, use_fast = True) if "oasst-sft-6-llama-30b" in params.tokenizer_paths[i]: tokenizer.bos_token_id = 1 tokenizer.unk_token_id = 0 @@ -1643,41 +1617,12 @@ def get_workers(params, eval=False): logger.info(f"Loaded {len(tokenizers)} tokenizers") - raw_conv_templates = [] - for template in params.conversation_templates: - if template in ["llama-2", "mistral", "llama-3-8b", "vicuna"]: - raw_conv_templates.append(get_conversation_template(template)), - elif template in ["phi-3-mini"]: - conv_template = Conversation( - name="phi-3-mini", - system_template="<|system|>\n{system_message}", - system_message="", - roles=("<|user|>", "<|assistant|>"), - sep_style=SeparatorStyle.CHATML, - sep="<|end|>", - stop_token_ids=[32000, 32001, 32007], - ) - raw_conv_templates.append(conv_template) - else: - raise ValueError("Conversation template not recognized") - - conv_templates = [] - for conv in raw_conv_templates: - if conv.name == "zero_shot": - conv.roles = tuple(["### " + r for r in conv.roles]) - conv.sep = "\n" - elif conv.name == "llama-2": - conv.sep2 = conv.sep2.strip() - conv_templates.append(conv) - - logger.info(f"Loaded {len(conv_templates)} conversation templates") workers = [ ModelWorker( params.model_paths[i], params.token, params.model_kwargs[i], tokenizers[i], - conv_templates[i], params.devices[i], ) for i in range(len(params.model_paths)) @@ -1731,4 +1676,4 @@ def get_goals_and_targets(params): logger.info("Loaded {} train goals".format(len(train_goals))) logger.info("Loaded {} test goals".format(len(test_goals))) - return train_goals, train_targets, test_goals, test_targets + return train_goals, train_targets, test_goals, test_targets \ No newline at end of file diff --git a/pyrit/auxiliary_attacks/gcg/experiments/train.py b/pyrit/auxiliary_attacks/gcg/experiments/train.py index 9e3a5b79e..fbf05cbbb 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/train.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/train.py @@ -4,7 +4,6 @@ import logging import time from typing import Union - import mlflow import numpy as np import torch.multiprocessing as mp @@ -108,7 +107,6 @@ def generate_suffix( params.logfile = logfile params.random_seed = random_seed logger.info(f"Parameters: {params}") - # Start mlflow logging mlflow.start_run() log_gpu_memory(step=0) @@ -186,4 +184,4 @@ def process_fn2(s): ) for worker in workers + test_workers: - worker.stop() + worker.stop() \ No newline at end of file diff --git a/pyrit/auxiliary_attacks/gcg/src/Dockerfile b/pyrit/auxiliary_attacks/gcg/src/Dockerfile index 7301cca57..c79593376 100644 --- a/pyrit/auxiliary_attacks/gcg/src/Dockerfile +++ b/pyrit/auxiliary_attacks/gcg/src/Dockerfile @@ -8,6 +8,8 @@ ENV CONDA_ENVIRONMENT_PATH /azureml-envs/pyrit ENV PATH $CONDA_ENVIRONMENT_PATH/bin:$PATH # Create conda environment +RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \ + conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r COPY conda_dependencies.yaml . RUN conda env create -p $CONDA_ENVIRONMENT_PATH -f conda_dependencies.yaml -q && \ rm conda_dependencies.yaml && \