-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLinux-x86-64-cudadp.sopt
54 lines (49 loc) · 1.96 KB
/
Linux-x86-64-cudadp.sopt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# by default some intel compilers put temporaries on the stack
# this might lead to segmentation faults is the stack limit is set to low
# stack limits can be increased by sysadmins or e.g with ulimit -s 256000
# furthermore new ifort (10.0?) compilers support the option
# -heap-arrays 64
# add this to the compilation flags is the other options do not work
# The following settings worked for:
# - AMD64 Opteron
# - SUSE Linux Enterprise Server 10.0 (x86_64)
# - Intel(R) Fortran Compiler for Intel(R) EM64T-based applications, Version 10.0
# - AMD acml library version 3.6.0
# - MPICH2-1.0.5p4
# - FFTW 3.1.2
#
PERL = perl
CC = gcc
CXX = CC
CPP = cpp
NVCC = /usr/local/cuda/bin/nvcc
FC = /opt/intel/fce/10.0.025/bin/ifort -FR
LD = /opt/intel/fce/10.0.025/bin/ifort -i-static -openmp
AR = ar -r
GPUVER = K20X
DFLAGSBASE = -D__CUDAPW -D__FFTSG -D__CUBLASDP
DFLAGS = $(DFLAGSBASE) -DDGEMM=cuda_d_gemm -Ddgemm=cuda_d_gemm -DDSYMM=gpu_d_symm -Ddsymm=gpu_d_symm
DFLAGSCU = $(DFLAGSBASE) -Dcpu_d_gemm=dgemm -Dcpu_d_symm=dsymm
CFLAGS = -O2
CPPFLAGS = -traditional -C $(DFLAGS) -P -I/opt/intel/mkl/10.0.1.014/include/fftw
CXXFLAGS = -O3 -I${CUDA_PATH}/include -std=c++11
CPPFLAGSCU = -traditional -C $(DFLAGSCU) -P -I/opt/intel/mkl/10.0.1.014/include/fftw
NVFLAGS = $(DFLAGS) -arch sm_13 -deviceemu -Xcompiler='-fopenmp' --std=c++11
FCFLAGS = $(DFLAGS) -O2 -xW
FCFLAGSCU = $(DFLAGSCU) -O2 -xW
MKLPATH = /opt/intel/mkl/10.0.1.014/lib/em64t/
CUDAPATH = /usr/local/cuda/lib/
LDFLAGS = $(FCFLAGS)
LIBS = -L$(MKLPATH)\
$(MKLPATH)/libmkl_em64t.a\
$(MKLPATH)/libmkl_lapack.a\
$(MKLPATH)/libguide.a\
$(CUDAPATH)/libcudart.so\
$(CUDAPATH)/libcufft.so\
$(CUDAPATH)/libcublas.so\
/usr/local/lib/libfftw3f.a\
-lpthread
LIBS += -lstdc++ -lcudart -lnvrtc -lcuda
cuda_cublas.o: cuda_cublas.F
$(CPP) $(CPPFLAGSCU) $< > cuda_cublas.f90
$(FC) -c $(FCFLAGSCU) cuda_cublas.f90