From 750559842243965acc735fdd83a0e814e237530e Mon Sep 17 00:00:00 2001 From: Douglas Lehr Date: Tue, 4 Mar 2025 00:21:09 -0600 Subject: [PATCH] Add ppl accuracy benchmark to vllm tests --- scripts/vllm/parse_csv.py | 33 ++++++++++++++++++++++++++- scripts/vllm/vllm_benchmark_report.py | 21 +++++++++++++++++ scripts/vllm/vllm_benchmark_report.sh | 16 ++++++++++++- 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/scripts/vllm/parse_csv.py b/scripts/vllm/parse_csv.py index 2f25649..9113bd4 100644 --- a/scripts/vllm/parse_csv.py +++ b/scripts/vllm/parse_csv.py @@ -42,11 +42,31 @@ def parse_throughput_csv(file_path): return df_new +def parse_accuracy_csv(file_path): + # Read the CSV file + df = pd.read_csv(file_path) + + # Create a new DataFrame + df_new = pd.DataFrame() + + # Combine the columns of model, tp, batch_size, input_len, output_len, and dtype to a new column named 'model' + df_new['model'] = df['model'] + '_' + df['tp'].astype(str) + '_' + df['dtype'] + + # Put the column of 'throughput_gen (tok/sec)' to a new column named 'performance' + df_new['performance'] = df['ppl'] + + # Add a new column named 'metric' and set the value to 'samples/sec' + df_new['metric'] = 'perplexity' + + return df_new + def parse_args(): parser = argparse.ArgumentParser(description='Parse the CSV file about latency and throughput.') parser.add_argument('--file_latency', type=str, help='The file name of the latency report') parser.add_argument('--file_throughput', type=str, help='The file name of the throughput report') + parser.add_argument('--file_accuracy', type=str, help='The file name of the accuracy report') + args = parser.parse_args() return args @@ -56,6 +76,7 @@ def parse_args(): args = parse_args() file_latency = args.file_latency file_throughput = args.file_throughput + file_accuracy = args.file_accuracy # Extract the model name from the file name model_name = file_latency.split('/')[-1].split('_')[0] @@ -83,8 +104,18 @@ def parse_args(): else: print('The file of throughput summary is not found.') + # Check if the file exists + if file_accuracy and os.path.exists(file_accuracy): + # Parse the CSV file + df_accuracy = parse_accuracy_csv(file_accuracy) + + # Print the first 5 rows of the DataFrame + print(df_accuracy.head()) + else: + print('The file of throughput summary is not found.') + # Combine the DataFrames of latency and throughput and write to a new CSV file - df_combined = pd.concat([df_latency, df_throughput], ignore_index=True) + df_combined = pd.concat([df_latency, df_throughput, df_accuracy], ignore_index=True) # Get the parent directory of the __file__ parent_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/scripts/vllm/vllm_benchmark_report.py b/scripts/vllm/vllm_benchmark_report.py index 79f4649..1bdc72a 100644 --- a/scripts/vllm/vllm_benchmark_report.py +++ b/scripts/vllm/vllm_benchmark_report.py @@ -119,3 +119,24 @@ def extract_val(dirty_list, key): writer.writerow(model_details) except csv.Error as e: sys.exit('file {}: {}'.format(args.input_json, e)) + +elif args.mode == "accuracy": + with open(args.input_json, newline='') as inpf: + header_write = 0 if os.path.exists(args.output_csv) else 1 + with open(args.output_csv,'a+',newline='') as outf: + writer = csv.writer(outf, delimiter=',') + if header_write: + writer.writerow(['model', 'ppl', 'tp', 'dtype']) if header_write else None + + # workaround to vllm's dirty json output from multi-gpu cases + dirty_json = inpf.read() + dirty_list = dirty_json.replace(",","").replace(":","").replace("\"","").split() + ppl = float(extract_val(dirty_list, "ppl")) + try: + model_details = args.model ,\ + str(ppl) ,\ + args.tp ,\ + args.dtype + writer.writerow(model_details) + except csv.Error as e: + sys.exit('file {}: {}'.format(args.input_json, e)) diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh index c3b8b21..56e865f 100755 --- a/scripts/vllm/vllm_benchmark_report.sh +++ b/scripts/vllm/vllm_benchmark_report.sh @@ -83,6 +83,7 @@ report_dir="reports_${datatype}_${tag}" report_summary_dir="${report_dir}/summary" tool_latency="/app/vllm/benchmarks/benchmark_latency.py" tool_throughput="/app/vllm/benchmarks/benchmark_throughput.py" +tool_accuracy="/app/vllm/benchmarks/P3L.py" tool_report="vllm_benchmark_report.py" n_warm=3 n_itr=5 @@ -156,11 +157,24 @@ if [ "$scenario" == "throughput" ] || [ "$scenario" == "all" ]; then done < <(tail -n +2 config.csv) done fi +# model_name +# tp +if [ "$scenario" == "accuracy" ] || [ "$scenario" == "all" ]; then + echo "[INFO] ACCURACY(P3L)" + mode="accuracy" + outcsv=${report_summary_dir}/${model_name}_${mode}_report.csv + outjson=${report_dir}/${model_name}_${mode}_${datatype}.json + + python3 $tool_accuracy --model $model --tensor-parallel-size $tp --output-json $outjson + python3 $tool_report --mode $mode --model $model_name --tp $tp --input-json $outjson --output-csv $outcsv --dtype $datatype + +fi echo "Generate report of multiple results" tool_parser="parse_csv.py" latency_summary_csv=${report_summary_dir}/${model_name}_latency_report.csv throughput_summary_csv=${report_summary_dir}/${model_name}_throughput_report.csv -python3 $tool_parser --file_latency $latency_summary_csv --file_throughput $throughput_summary_csv +accuracy_summary_csv=${report_summary_dir}/${model_name}_accuracy_report.csv +python3 $tool_parser --file_latency $latency_summary_csv --file_throughput $throughput_summary_csv --file_accuracy $accuracy_summary_csv mv perf_${model_name}.csv ../