Skip to content
Snippets Groups Projects
Commit 2a2041d4 authored by Henning Fehrmann's avatar Henning Fehrmann Committed by Henning Fehrmann
Browse files

some tensor core code compilation instruction. not in the git yet

parent 246b9d52
No related branches found
No related tags found
No related merge requests found
......@@ -3,12 +3,15 @@ GPU=NVIDIA
GPU=AMD
OBJ_blas = blas.o
OBJ_tensor_core = tensor_core.o
OBJ_fftw = fftw.o
OBJ_tensor_core = tensor_core.o
OBJ_tensor2 = tensor2.o
ifeq ($(GPU), AMD)
LDFLAGS = -L/opt/rocm/lib -fopenmp
LDFLAGS_blas = -lhipblas -lrocblas
LDFLAGS_blas = -lrocblas
LDFLAGS_fftw = -lrocfft
CFLAGS = -g -Wall -O3 -fopenmp -I/opt/rocm/include -I/opt/rocm/hip/include -DROC
CC = hipcc
......@@ -18,8 +21,8 @@ else ifeq ($(GPU), NVIDIA)
LDFLAGS_blas = -lcublas
LDFLAGS_fftw = -lcufft
INCLUDE= -I/usr/lib/x86_64-linux-gnu/openmpi/include/
CFLAGS = ${INCLUDE} --compile -O3 -pg -Xcompiler -fopenmp -DCUDA
CUDAFLAGS = --Werror cross-execution-space-call --Wno-deprecated-gpu-targets
CFLAGS = ${INCLUDE} -arch sm_75 --compile -O3 -pg -Xcompiler -fopenmp -DCUDA
CUDAFLAGS = -arch sm_75 --Werror cross-execution-space-call --Wno-deprecated-gpu-targets
else
unknown_HW:
endif
......@@ -29,9 +32,18 @@ all: blas fftw
blas: ${OBJ_blas}
${CC} -o blas ${OBJ_blas} ${LDFLAGS} ${LDFLAGS_blas} ${CUDAFLAGS}
tensor_core: ${OBJ_tensor_core}
${CC} -o tensor_core ${OBJ_tensor_core} ${LDFLAGS} ${LDFLAGS_fftw} ${LDFLAGS_blas} ${CUDAFLAGS}
tensor2: ${OBJ_tensor2}
${CC} -o tensor2 ${OBJ_tensor2} ${LDFLAGS} ${LDFLAGS_fftw} ${LDFLAGS_blas} ${CUDAFLAGS}
fftw: ${OBJ_fftw}
${CC} -o fftw ${OBJ_fftw} ${LDFLAGS} ${LDFLAGS_fftw} ${CUDAFLAGS}
%.o: %.cu ${HEADER}
nvcc ${INCLUDE} -dc $< ${CUDAFLAGS}
%.o: %.c ${HEADER}
${CC} ${CFLAGS} -c $<
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment