forked from meifeng/GridMini
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile.old
108 lines (92 loc) · 3.35 KB
/
Makefile.old
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
GPUARCH=-m64 -gencode arch=compute_70,code=sm_70
#MAIN=Benchmark_REAL
#MAIN=Benchmark_su3_mapper
MAIN=Benchmark_su3
#MAIN=Benchmark_su3_debug
##xlC
#CXX=xlC_r
#CXXFLAGS=-qsmp=omp -qoffload -Ofast -std=c++11 -lcudart
#CXXFLAGS+=-DOMPTARGET -DOMPTARGET_MANAGED
##icpx
#CXX=icpx
#CXXFLAGS=-std=c++14 -O2 -fiopenmp -fopenmp-targets=spir64
#CXXFLAGS+=-DOMPTARGET -DOMPTARGET_UVM #-DDEBUG
##PGI
#CXX=pgc++
#CXXFLAGS=-fast --c++14 -acc -Mnollvm -Minfo=accel -ta=tesla:cc70,managed -Mlarge_arrays --no_exceptions
###Main LLVM/Clang
#CXX=clang++
#CXXFLAGS=-std=c++14 -g -fopenmp -fopenmp-cuda-mode -O3 -fopenmp-targets=nvptx64-nvidia-cuda -lcudart
#CXXFLAGS += -DOMPTARGET -Wno-unknown-cuda-version
##CXXFLAGS += -Xclang -fdump-record-layouts
#CXXFLAGS += -DOMPTARGET_UVM
#CXXFLAGS +=-DOMPTARGET_MANAGED
##CXXFLAGS += -DVECTOR_LOOPS
##LLVMFLAGS = -S -emit-llvm
##CXXFLAGS += -DVECTOR_LOOPS -Xclang -fdump-record-layouts-simple
##CXXFLAGS += -DDEBUG
##AOMP Clang - NVIDIA V100 GPU
#CXX=clang++
#CXXFLAGS = -std=c++14 -O3 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-version=50
#CXXFLAGS += -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_70
#CXXFLAGS += -DOMPTARGET
##AOMP Clang - AMD GPU
#CXX=clang++ -fopenmp-version=50 -fopenmp-cuda-mode
#CXXFLAGS=-std=c++14 -O3 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908
#CXXFLAGS += -DOMPTARGET
##CXXFLAGS += -DDEBUG
##NVCC
#CXX=nvcc
#CXXFLAGS=--x cu ${GPUARCH} -I. -ccbin g++ -rdc=true --expt-extended-lambda --expt-relaxed-constexpr -std=c++14
##NVC++ for OpenMP offloading
CXX=nvc++
CXXFLAGS=-std=c++14 -mp=gpu -gpu=cc70 -cuda \
-DOMPTARGET_MANAGED \
-DOMPTARGET -Minfo
##GCC
#CXX=g++
#CXXFLAGS=-std=c++14 -O3 -fopenmp -foffload=nvptx-none -DOMPTARGET #-DOMPTARGET_MANAGED -DDEBUG -lcudart
##CRAY CCE
#CXX=CC
#CXXFLAGS=-std=c++14 -fopenmp -fopenmp-targets=nvptx64 -Xopenmp-target -march=sm_70
#CXXFLAGS += -DOMPTARGET
#CXXFLAGS += -DOMPTARGET_MANAGED
#CXXFLAGS += -DDEBUG
INCLUDES=-I./ -I${CUDA_ROOT}/include
LDFLAGS=-L${CUDA_ROOT}/lib64
all:
$(CXX) $(CXXFLAGS) $(INCLUDES) $(LDFLAGS) \
benchmarks/${MAIN}.cc \
Grid/util/Init.cc \
Grid/communicator/SharedMemory.cc \
Grid/communicator/SharedMemoryNone.cc \
Grid/allocator/AlignedAllocator.cc \
Grid/communicator/Communicator_base.cc \
Grid/communicator/Communicator_none.cc \
Grid/log/Log.cc \
-o ${MAIN}.x \
-DGEN \
-DGEN_SIMD_WIDTH=16 \
-DHAVE_MALLOC_H \
-DGRID_COMMS_NONE \
-DGRID_DEFAULT_PRECISION_DOUBLE \
-DRNG_RANLUX
llvm:
$(CXX) $(CXXFLAGS) $(INCLUDES) $(LDFLAGS) \
benchmarks/${MAIN}.cc \
Grid/util/Init.cc \
Grid/communicator/SharedMemory.cc \
Grid/communicator/SharedMemoryNone.cc \
Grid/allocator/AlignedAllocator.cc \
Grid/communicator/Communicator_base.cc \
Grid/communicator/Communicator_none.cc \
Grid/log/Log.cc \
$(LLVMFLAGS) \
-DGEN \
-DGEN_SIMD_WIDTH=32 \
-DHAVE_MALLOC_H \
-DGRID_COMMS_NONE \
-DGRID_DEFAULT_PRECISION_DOUBLE \
-DRNG_RANLUX
clean:
rm -v *.x *.o