Skip to content

Commit

Permalink
torchtune usecase
Browse files Browse the repository at this point in the history
  • Loading branch information
pbelevich committed Apr 12, 2024
1 parent b6461fb commit db51efe
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 0 deletions.
6 changes: 6 additions & 0 deletions 3.test_cases/torchtune/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
checkponts
models
miniconda3
pt_torchtune
torchtune
Miniconda3-latest-Linux-x86_64.sh
25 changes: 25 additions & 0 deletions 3.test_cases/torchtune/0.create_conda_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -ex

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x Miniconda3-latest-Linux-x86_64.sh
./Miniconda3-latest-Linux-x86_64.sh -b -f -p ./miniconda3

source ./miniconda3/bin/activate

conda create -y -p ./pt_torchtune python=3.10

source activate ./pt_torchtune/

# Install AWS Pytorch, see https://aws-pytorch-doc.com/
# conda install -y pytorch=2.2.0 torchvision torchaudio torchtriton=2.2.0 pytorch-cuda=12.1 transformers datasets --strict-channel-priority --override-channels -c https://aws-ml-conda.s3.us-west-2.amazonaws.com -c nvidia -c conda-forge
conda install -y pytorch torchvision torchaudio pytorch-cuda=12.1 transformers datasets -c pytorch -c nvidia

git clone https://github.com/pytorch/torchtune.git
pip install -e ./torchtune

# Create checkpoint dir
mkdir checkpoints
22 changes: 22 additions & 0 deletions 3.test_cases/torchtune/1.download_hf_model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

# set -ex;

# Default value for HF_MODEL
DEFAULT_HF_MODEL="meta-llama/Llama-2-7b"
read -p "Please enter Hugging Face model ($DEFAULT_HF_MODEL): " HF_MODEL
if [ -z "$HF_MODEL" ]; then
HF_MODEL="$DEFAULT_HF_MODEL"
fi

read -p "Please enter Hugging Face Access Tokens: " HF_TOKEN

mkdir -p models/${HF_MODEL}

tune download \
${HF_MODEL} \
--output-dir models/${HF_MODEL} \
--hf-token ${HF_TOKEN}
52 changes: 52 additions & 0 deletions 3.test_cases/torchtune/2.full_finetune_distributed.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

#SBATCH --nodes=1 # number of nodes to use
#SBATCH --job-name=full_ft # name of your job
#SBATCH --exclusive # job has exclusive use of the resource, no sharing

set -ex;

###########################
###### User Variables #####
###########################

GPUS_PER_NODE=4 # 4 for G5.12x, 8 for P4/P5

###########################
## Environment Variables ##
###########################

## Plenty of EFA level variables
## Comment out for non-efa instances (G4d, P3)
## For G5.12x, Comment out RDMA and Fork safe
## For G4dn and other G5, comment out all
# export FI_EFA_USE_DEVICE_RDMA=1 # use for p4d
# export FI_EFA_FORK_SAFE=1
export FI_LOG_LEVEL=1
export FI_PROVIDER=efa
export NCCL_DEBUG=INFO
## Switching SYNC_MEMOPS to zero can boost throughput with FSDP
## Disables CU_POINTER_ATTRIBUTE_SYNC_MEMOPS
## Reduces memory synchronizations
## https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__UNIFIED.html
export FI_EFA_SET_CUDA_SYNC_MEMOPS=0

###########################
####### Torch Dist #######
###########################

declare -a TORCHRUN_ARGS=(
--nproc_per_node=$GPUS_PER_NODE \
--nnodes=$SLURM_JOB_NUM_NODES \
--rdzv_id=$SLURM_JOB_ID \
--rdzv_backend=c10d \
--rdzv_endpoint=$(hostname) \
)

export TORCHTUNE=./pt_torchtune/bin/tune
export TRAIN_CONFIG=./llama2_7B_full.yaml

srun -l ${TORCHTUNE} run "${TORCHRUN_ARGS[@]}" full_finetune_distributed --config ${TRAIN_CONFIG}
77 changes: 77 additions & 0 deletions 3.test_cases/torchtune/llama2_7B_full.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Config for multi-device full finetuning in full_finetune_distributed.py
# using a Llama2 7B model
#
# This config assumes that you've run the following command before launching
# this run:
# tune download meta-llama/Llama-2-7b \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
#
# To launch on 4 devices, run the following command from root:
# tune run --nproc_per_node 4 full_finetune_distributed \
# --config llama2/7B_full \
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed \
# --config llama2/7B_full \
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# Single device full finetuning requires more memory optimizations. It's
# best to use 7B_full_single_device.yaml for those cases


# Tokenizer
tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
path: models/meta-llama/Llama-2-7b/tokenizer.model

# Dataset
dataset:
_component_: torchtune.datasets.alpaca_dataset
train_on_input: True
seed: null
shuffle: True

# Model Arguments
model:
_component_: torchtune.models.llama2.llama2_7b

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
checkpoint_dir: models/meta-llama/Llama-2-7b
checkpoint_files: [consolidated.00.pth]
recipe_checkpoint: null
output_dir: models/meta-llama/Llama-2-7b
model_type: LLAMA2
resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
optimizer:
_component_: torch.optim.AdamW
lr: 2e-5
loss:
_component_: torch.nn.CrossEntropyLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1


# Training env
device: cuda

# Memory management
enable_activation_checkpointing: True

# Reduced precision
dtype: bf16

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama2-finetune
log_every_n_steps: null

0 comments on commit db51efe

Please sign in to comment.