From 750559842243965acc735fdd83a0e814e237530e Mon Sep 17 00:00:00 2001
From: Douglas Lehr <Doug.Lehr@amd.com>
Date: Tue, 4 Mar 2025 00:21:09 -0600
Subject: [PATCH] Add ppl accuracy benchmark to vllm tests

---
 scripts/vllm/parse_csv.py             | 33 ++++++++++++++++++++++++++-
 scripts/vllm/vllm_benchmark_report.py | 21 +++++++++++++++++
 scripts/vllm/vllm_benchmark_report.sh | 16 ++++++++++++-
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/scripts/vllm/parse_csv.py b/scripts/vllm/parse_csv.py
index 2f25649..9113bd4 100644
--- a/scripts/vllm/parse_csv.py
+++ b/scripts/vllm/parse_csv.py
@@ -42,11 +42,31 @@ def parse_throughput_csv(file_path):
     
     return df_new
 
+def parse_accuracy_csv(file_path):
+    # Read the CSV file
+    df = pd.read_csv(file_path)
+
+    # Create a new DataFrame
+    df_new = pd.DataFrame()
+
+    # Combine the columns of model, tp, batch_size, input_len, output_len, and dtype to a new column named 'model'
+    df_new['model'] = df['model'] + '_' + df['tp'].astype(str) + '_' + df['dtype']
+    
+    # Put the column of 'throughput_gen (tok/sec)' to a new column named 'performance'
+    df_new['performance'] = df['ppl']
+    
+    # Add a new column named 'metric' and set the value to 'samples/sec'
+    df_new['metric'] = 'perplexity'
+    
+    return df_new
+
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Parse the CSV file about latency and throughput.')
     parser.add_argument('--file_latency', type=str, help='The file name of the latency report')
     parser.add_argument('--file_throughput', type=str, help='The file name of the throughput report')
+    parser.add_argument('--file_accuracy', type=str, help='The file name of the accuracy report')
+
     args = parser.parse_args()
     return args
 
@@ -56,6 +76,7 @@ def parse_args():
     args = parse_args()
     file_latency = args.file_latency
     file_throughput = args.file_throughput
+    file_accuracy = args.file_accuracy
 
     # Extract the model name from the file name
     model_name = file_latency.split('/')[-1].split('_')[0]
@@ -83,8 +104,18 @@ def parse_args():
     else:
         print('The file of throughput summary is not found.')
 
+    # Check if the file exists
+    if file_accuracy and os.path.exists(file_accuracy):
+        # Parse the CSV file
+        df_accuracy = parse_accuracy_csv(file_accuracy)
+        
+        # Print the first 5 rows of the DataFrame
+        print(df_accuracy.head())
+    else:
+        print('The file of throughput summary is not found.')
+
     # Combine the DataFrames of latency and throughput and write to a new CSV file
-    df_combined = pd.concat([df_latency, df_throughput], ignore_index=True)
+    df_combined = pd.concat([df_latency, df_throughput, df_accuracy], ignore_index=True)
 
     # Get the parent directory of the __file__
     parent_dir = os.path.dirname(os.path.abspath(__file__))
diff --git a/scripts/vllm/vllm_benchmark_report.py b/scripts/vllm/vllm_benchmark_report.py
index 79f4649..1bdc72a 100644
--- a/scripts/vllm/vllm_benchmark_report.py
+++ b/scripts/vllm/vllm_benchmark_report.py
@@ -119,3 +119,24 @@ def extract_val(dirty_list, key):
                 writer.writerow(model_details)
             except csv.Error as e:
                 sys.exit('file {}: {}'.format(args.input_json, e))
+
+elif args.mode == "accuracy":
+    with open(args.input_json, newline='') as inpf:
+        header_write = 0 if os.path.exists(args.output_csv) else 1
+        with open(args.output_csv,'a+',newline='') as outf:
+            writer = csv.writer(outf, delimiter=',')
+            if header_write:
+                writer.writerow(['model', 'ppl', 'tp', 'dtype']) if header_write else None
+
+            # workaround to vllm's dirty json output from multi-gpu cases
+            dirty_json = inpf.read()
+            dirty_list = dirty_json.replace(",","").replace(":","").replace("\"","").split()
+            ppl = float(extract_val(dirty_list, "ppl"))
+            try:
+                model_details = args.model                            ,\
+                                str(ppl) ,\
+                                args.tp                               ,\
+                                args.dtype
+                writer.writerow(model_details)
+            except csv.Error as e:
+                sys.exit('file {}: {}'.format(args.input_json, e))
diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh
index c3b8b21..56e865f 100755
--- a/scripts/vllm/vllm_benchmark_report.sh
+++ b/scripts/vllm/vllm_benchmark_report.sh
@@ -83,6 +83,7 @@ report_dir="reports_${datatype}_${tag}"
 report_summary_dir="${report_dir}/summary"
 tool_latency="/app/vllm/benchmarks/benchmark_latency.py"
 tool_throughput="/app/vllm/benchmarks/benchmark_throughput.py"
+tool_accuracy="/app/vllm/benchmarks/P3L.py"
 tool_report="vllm_benchmark_report.py"
 n_warm=3
 n_itr=5
@@ -156,11 +157,24 @@ if [ "$scenario" == "throughput" ] || [ "$scenario" == "all" ]; then
         done < <(tail -n +2 config.csv)
     done
 fi
+# model_name
+# tp
+if [ "$scenario" == "accuracy" ] || [ "$scenario" == "all" ]; then
+    echo "[INFO] ACCURACY(P3L)"
+    mode="accuracy"
+    outcsv=${report_summary_dir}/${model_name}_${mode}_report.csv
+    outjson=${report_dir}/${model_name}_${mode}_${datatype}.json
+
+    python3 $tool_accuracy --model $model --tensor-parallel-size $tp --output-json $outjson
+    python3 $tool_report --mode $mode --model $model_name --tp $tp --input-json $outjson --output-csv $outcsv --dtype $datatype
+
+fi
 
 echo "Generate report of multiple results"
 tool_parser="parse_csv.py"
 latency_summary_csv=${report_summary_dir}/${model_name}_latency_report.csv
 throughput_summary_csv=${report_summary_dir}/${model_name}_throughput_report.csv
-python3 $tool_parser --file_latency $latency_summary_csv --file_throughput $throughput_summary_csv
+accuracy_summary_csv=${report_summary_dir}/${model_name}_accuracy_report.csv
+python3 $tool_parser --file_latency $latency_summary_csv --file_throughput $throughput_summary_csv --file_accuracy $accuracy_summary_csv
 
 mv perf_${model_name}.csv ../