Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Henning Fehrmann
NVidia_AMD_Bench
Commits
72043435
Commit
72043435
authored
Feb 05, 2021
by
Henning Fehrmann
Committed by
Henning Fehrmann
Feb 05, 2021
Browse files
tensor core code
parent
c3a18e8d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
72043435
GPU
=
NVIDIA
GPU
=
AMD
GPU
=
NVIDIA
OBJ_blas
=
blas.o
OBJ_fftw
=
fftw.o
OBJ_tensor_core
=
tensor_core.o
ifeq
($(GPU), AMD)
...
...
@@ -29,6 +30,9 @@ all: blas fftw
blas
:
${OBJ_blas}
${CC}
-o
blas
${OBJ_blas}
${LDFLAGS}
${LDFLAGS_blas}
${CUDAFLAGS}
tensor_core
:
${OBJ_tensor_core}
${CC}
-o
tensor_core
${OBJ_tensor_core}
${LDFLAGS}
${LDFLAGS_blas}
${CUDAFLAGS}
fftw
:
${OBJ_fftw}
${CC}
-o
fftw
${OBJ_fftw}
${LDFLAGS}
${LDFLAGS_fftw}
${CUDAFLAGS}
...
...
tensor_core.c
View file @
72043435
...
...
@@ -77,9 +77,9 @@ void
multiplication
(
__HANDLE__
handle
,
const
__COMPLEX8__
*
A
,
const
__COMPLEX8__
*
B
,
__COMPLEX8__
*
C
,
const
float
*
A
,
const
float
*
B
,
float
*
C
,
size_t
m
,
size_t
n
,
size_t
k
...
...
@@ -87,8 +87,26 @@ multiplication
{
__BLAS_OPERATION__
transA
=
__NO_TRANSFORM__
;
__BLAS_OPERATION__
transB
=
__CT_TRANSFORM__
;
const
__COMPLEX8__
alpha
=
{.
x
=
1
.
f
,
.
y
=
0
.
f
};
const
__COMPLEX8__
beta
=
{.
x
=
0
.
f
,
.
y
=
0
.
f
};
const
float
alpha
=
1
.
f
;
const
float
beta
=
0
.
f
;
check_status
(
cublasSgemm
(
handle
,
transA
,
transB
,
m
,
n
,
k
,
&
alpha
,
A
,
m
,
B
,
n
,
&
beta
,
C
,
m
));
cublasGemmAlgo_t
algo
=
CUBLAS_GEMM_DEFAULT_TENSOR_OP
;
check_status
(
cublasGemmEx
(
...
...
@@ -112,6 +130,7 @@ multiplication
CUDA_C_32F
,
algo
));
exit
(
0
);
// cublasIcamax(handle,m * n, C, 1, &result);
cudaDeviceSynchronize
();
...
...
@@ -120,8 +139,8 @@ multiplication
void
prepare_matrices
(
__COMPLEX8__
*
hA
,
__COMPLEX8__
*
hB
,
float
*
hA
,
float
*
hB
,
size_t
m
,
size_t
n
,
size_t
k
...
...
@@ -134,9 +153,10 @@ prepare_matrices
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
size_t
ind
=
j
+
k
*
i
;
hA
[
ind
]
.
x
=
(
float
)
xorshf96
()
*
fact
;
hA
[
ind
]
.
y
=
(
float
)
xorshf96
()
*
fact
;
//
hA[ind] = (float)xorshf96()*fact;
hA
[
ind
]
=
0
.
f
;
}
hA
[
k
*
(
i
+
1
)]
=
1
.
f
;
}
#pragma omp parallel for
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
...
...
@@ -144,9 +164,10 @@ prepare_matrices
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
size_t
ind
=
j
+
k
*
i
;
hB
[
ind
]
.
x
=
(
float
)
xorshf96
()
*
fact
;
hB
[
ind
]
.
y
=
(
float
)
xorshf96
()
*
fact
;
//
hB[ind] = (float)xorshf96()*fact;
hB
[
ind
]
=
0
.
f
;
}
hB
[
k
*
(
i
+
1
)]
=
1
.
f
;
}
}
...
...
@@ -154,7 +175,7 @@ prepare_matrices
void
print_result
(
__COMPLEX8__
*
hC
,
float
*
hC
,
size_t
m
,
size_t
n
,
size_t
k
...
...
@@ -166,7 +187,7 @@ print_result
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
size_t
ind
=
j
+
k
*
i
;
printf
(
"%1.2f
%1.2f
\t
"
,
hC
[
ind
]
.
x
,
hC
[
ind
].
y
);
printf
(
"%1.2f
\t
"
,
hC
[
ind
]);
}
printf
(
"
\n
"
);
}
...
...
@@ -187,9 +208,9 @@ run_test
{
struct
runtime
*
timer
;
__MALLOC
(
timer
,
sizeof
(
*
timer
));
__COMPLEX8__
*
A
;
__COMPLEX8__
*
B
;
__COMPLEX8__
*
C
;
float
*
A
;
float
*
B
;
float
*
C
;
__ASSERT
(
__PREFIX
(
Malloc
)((
void
**
)
&
A
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
k
)));
__ASSERT
(
__PREFIX
(
Malloc
)((
void
**
)
&
B
,
sizeof
(
*
B
)
*
(
size_t
)(
n
*
k
)));
__ASSERT
(
__PREFIX
(
Malloc
)((
void
**
)
&
C
,
sizeof
(
*
C
)
*
(
size_t
)(
m
*
n
)));
...
...
@@ -199,11 +220,11 @@ run_test
exit
(
1
);
}
__COMPLEX8__
*
hA
;
float
*
hA
;
__MALLOC
(
hA
,
sizeof
(
*
hA
)
*
(
size_t
)(
m
*
k
));
__COMPLEX8__
*
hB
;
float
*
hB
;
__MALLOC
(
hB
,
sizeof
(
*
hB
)
*
(
size_t
)(
k
*
n
));
__COMPLEX8__
*
hC
;
float
*
hC
;
__MALLOC
(
hC
,
sizeof
(
*
hC
)
*
(
size_t
)(
n
*
m
));
// timer_start(timer, "Prepare matrices");
...
...
@@ -287,6 +308,7 @@ main
__CREATE_HANDLE
(
&
handle
);
cublasSetMathMode
(
handle
,
CUBLAS_TENSOR_OP_MATH
);
__MALLOC
(
res
,
sizeof
(
*
res
)
*
(
size_t
)(
(
m_max
-
m_min
+
1
)
*
(
n_max
-
n_min
+
1
)
*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment