Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion scripts/vllm/parse_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,31 @@ def parse_throughput_csv(file_path):

return df_new

def parse_accuracy_csv(file_path):
# Read the CSV file
df = pd.read_csv(file_path)

# Create a new DataFrame
df_new = pd.DataFrame()

# Combine the columns of model, tp, batch_size, input_len, output_len, and dtype to a new column named 'model'
df_new['model'] = df['model'] + '_' + df['tp'].astype(str) + '_' + df['dtype']

# Put the column of 'throughput_gen (tok/sec)' to a new column named 'performance'
df_new['performance'] = df['ppl']

# Add a new column named 'metric' and set the value to 'samples/sec'
df_new['metric'] = 'perplexity'

return df_new


def parse_args():
parser = argparse.ArgumentParser(description='Parse the CSV file about latency and throughput.')
parser.add_argument('--file_latency', type=str, help='The file name of the latency report')
parser.add_argument('--file_throughput', type=str, help='The file name of the throughput report')
parser.add_argument('--file_accuracy', type=str, help='The file name of the accuracy report')

args = parser.parse_args()
return args

Expand All @@ -56,6 +76,7 @@ def parse_args():
args = parse_args()
file_latency = args.file_latency
file_throughput = args.file_throughput
file_accuracy = args.file_accuracy

# Extract the model name from the file name
model_name = file_latency.split('/')[-1].split('_')[0]
Expand Down Expand Up @@ -83,8 +104,18 @@ def parse_args():
else:
print('The file of throughput summary is not found.')

# Check if the file exists
if file_accuracy and os.path.exists(file_accuracy):
# Parse the CSV file
df_accuracy = parse_accuracy_csv(file_accuracy)

# Print the first 5 rows of the DataFrame
print(df_accuracy.head())
else:
print('The file of throughput summary is not found.')

# Combine the DataFrames of latency and throughput and write to a new CSV file
df_combined = pd.concat([df_latency, df_throughput], ignore_index=True)
df_combined = pd.concat([df_latency, df_throughput, df_accuracy], ignore_index=True)

# Get the parent directory of the __file__
parent_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down
21 changes: 21 additions & 0 deletions scripts/vllm/vllm_benchmark_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,24 @@ def extract_val(dirty_list, key):
writer.writerow(model_details)
except csv.Error as e:
sys.exit('file {}: {}'.format(args.input_json, e))

elif args.mode == "accuracy":
with open(args.input_json, newline='') as inpf:
header_write = 0 if os.path.exists(args.output_csv) else 1
with open(args.output_csv,'a+',newline='') as outf:
writer = csv.writer(outf, delimiter=',')
if header_write:
writer.writerow(['model', 'ppl', 'tp', 'dtype']) if header_write else None

# workaround to vllm's dirty json output from multi-gpu cases
dirty_json = inpf.read()
dirty_list = dirty_json.replace(",","").replace(":","").replace("\"","").split()
ppl = float(extract_val(dirty_list, "ppl"))
try:
model_details = args.model ,\
str(ppl) ,\
args.tp ,\
args.dtype
writer.writerow(model_details)
except csv.Error as e:
sys.exit('file {}: {}'.format(args.input_json, e))
16 changes: 15 additions & 1 deletion scripts/vllm/vllm_benchmark_report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ report_dir="reports_${datatype}_${tag}"
report_summary_dir="${report_dir}/summary"
tool_latency="/app/vllm/benchmarks/benchmark_latency.py"
tool_throughput="/app/vllm/benchmarks/benchmark_throughput.py"
tool_accuracy="/app/vllm/benchmarks/P3L.py"
tool_report="vllm_benchmark_report.py"
n_warm=3
n_itr=5
Expand Down Expand Up @@ -156,11 +157,24 @@ if [ "$scenario" == "throughput" ] || [ "$scenario" == "all" ]; then
done < <(tail -n +2 config.csv)
done
fi
# model_name
# tp
if [ "$scenario" == "accuracy" ] || [ "$scenario" == "all" ]; then
echo "[INFO] ACCURACY(P3L)"
mode="accuracy"
outcsv=${report_summary_dir}/${model_name}_${mode}_report.csv
outjson=${report_dir}/${model_name}_${mode}_${datatype}.json

python3 $tool_accuracy --model $model --tensor-parallel-size $tp --output-json $outjson
python3 $tool_report --mode $mode --model $model_name --tp $tp --input-json $outjson --output-csv $outcsv --dtype $datatype

fi

echo "Generate report of multiple results"
tool_parser="parse_csv.py"
latency_summary_csv=${report_summary_dir}/${model_name}_latency_report.csv
throughput_summary_csv=${report_summary_dir}/${model_name}_throughput_report.csv
python3 $tool_parser --file_latency $latency_summary_csv --file_throughput $throughput_summary_csv
accuracy_summary_csv=${report_summary_dir}/${model_name}_accuracy_report.csv
python3 $tool_parser --file_latency $latency_summary_csv --file_throughput $throughput_summary_csv --file_accuracy $accuracy_summary_csv

mv perf_${model_name}.csv ../