-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbench_cuda.py
105 lines (100 loc) · 5.67 KB
/
bench_cuda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import subprocess
import time
from src.codegen import vbr_spmm_cuda_codegen, vbr_spmv_cuda_codegen
BENCHMARK_FREQ = 5
def bench_spmv():
vbr_files = os.listdir("Generated_VBR")
print("Benchmarking inspector")
with open(os.path.join("results", "benchmarks_inspector_spmv_cuda.csv"), "w") as fInspector:
for filename in vbr_files:
fname = filename[:-len(".vbr")]
spmv_file = fname + ".cu"
print(filename, flush=True)
inspector_times = []
vbr_spmv_cuda_codegen(fname, density=0, dir_name="Generated_SpMV_cuda", vbr_dir="Generated_VBR")
subprocess.run(["nvcc", "-O3", "-o", fname, spmv_file], cwd="Generated_SpMV_cuda")
for i in range(BENCHMARK_FREQ):
# SpMV code generation by inspecting the VBR matrix
print("Benchmarking inspector iteration", i, flush=True)
spmv_codegen_time = vbr_spmv_cuda_codegen(fname, density=0, dir_name="Generated_SpMV_cuda", vbr_dir="Generated_VBR")
time1 = time.time_ns() // 1_000
# compile the generated code for SpMV operation
subprocess.run(["nvcc", "-O3", "-o", fname, spmv_file], cwd="Generated_SpMV_cuda")
time2 = time.time_ns() // 1_000
compilation_time = time2 - time1
inspector_time = spmv_codegen_time + compilation_time
inspector_times.append(inspector_time)
# save inspector time (code generation + compilation) to file
p = f"{fname},{','.join([str(x) for x in inspector_times])}\n"
print(p, flush = True)
fInspector.write(p)
print("Benchmarking executor")
with open(os.path.join("results", f"benchmarks_spmv_cuda.csv"), "w") as fMy:
for filename in vbr_files:
fname = filename[:-len(".vbr")]
spmv_file = fname + ".cu"
print(filename, flush=True)
# compile the generated code for SpMV operation
vbr_spmv_cuda_codegen(fname, density=0, dir_name="Generated_SpMV_cuda", vbr_dir="Generated_VBR")
subprocess.run(["nvcc", "-O3", "-o", fname, spmv_file], cwd="Generated_SpMV_cuda")
output = subprocess.run(["./"+fname], capture_output=True, cwd="Generated_SpMV_cuda")
execution_times = []
for i in range(BENCHMARK_FREQ):
print("Benchmarking executor iteration", i, flush=True)
output = subprocess.run(["./"+fname], capture_output=True, cwd="Generated_SpMV_cuda")
execution_time = output.stdout.decode("utf-8").split("\n")[0].split(" = ")[1]
execution_times.append(execution_time)
# save execution times to file
p = f"{fname},{','.join(execution_times)}\n"
print(p, flush=True)
fMy.write(p)
def bench_spmm():
vbr_files = os.listdir("Generated_VBR")
print("Benchmarking inspector")
with open(os.path.join("results", "benchmarks_inspector_spmm_cuda.csv"), "w") as fInspector:
for filename in vbr_files:
fname = filename[:-len(".vbr")]
spmm_file = fname + ".cu"
print(filename, flush=True)
inspector_times = []
vbr_spmm_cuda_codegen(fname, density=0, dir_name="Generated_SpMM_cuda", vbr_dir="Generated_VBR")
subprocess.run(["nvcc", "-O3", "-mprefer-vector-width=512", "-mavx", "-o", fname, spmm_file], cwd="Generated_SpMM_cuda")
for i in range(BENCHMARK_FREQ):
# SpMV code generation by inspecting the VBR matrix
print("Benchmarking inspector iteration", i, flush=True)
spmm_codegen_time = vbr_spmm_cuda_codegen(fname, density=0, dir_name="Generated_SpMM_cuda", vbr_dir="Generated_VBR")
time1 = time.time_ns() // 1_000
# compile the generated code for SpMV operation
subprocess.run(["nvcc", "-O3", "-mprefer-vector-width=512", "-mavx", "-o", fname, spmm_file], cwd="Generated_SpMM_cuda")
time2 = time.time_ns() // 1_000
compilation_time = time2 - time1
inspector_time = spmm_codegen_time + compilation_time
inspector_times.append(inspector_time)
# save inspector time (code generation + compilation) to file
p = f"{fname},{','.join([str(x) for x in inspector_times])}\n"
print(p, flush = True)
fInspector.write(p)
print("Benchmarking executor")
with open(os.path.join("results", f"benchmarks_spmm_cuda.csv"), "w") as fMy:
for filename in vbr_files:
fname = filename[:-len(".vbr")]
spmm_file = fname + ".cu"
print(filename, flush=True)
# compile the generated code for SpMV operation
vbr_spmm_cuda_codegen(fname, density=0, dir_name="Generated_SpMM_cuda", vbr_dir="Generated_VBR")
subprocess.run(["nvcc", "-O3", "-mprefer-vector-width=512", "-mavx", "-o", fname, spmm_file], cwd="Generated_SpMM_cuda")
output = subprocess.run(["./"+fname], capture_output=True, cwd="Generated_SpMM_cuda")
execution_times = []
for i in range(BENCHMARK_FREQ):
print(f"Benchmarking executor iteration", i, flush=True)
output = subprocess.run(["./"+fname], capture_output=True, cwd="Generated_SpMM_cuda")
execution_time = output.stdout.decode("utf-8").split("\n")[0].split(" = ")[1]
execution_times.append(execution_time)
# save execution times to file
p = f"{fname},{','.join(execution_times)}\n"
print(p, flush=True)
fMy.write(p)
if __name__ == "__main__":
bench_spmv()
bench_spmm()