Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
NVidia_AMD_Bench
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Henning Fehrmann
NVidia_AMD_Bench
Commits
0626ce84
Commit
0626ce84
authored
4 years ago
by
Henning Fehrmann
Committed by
Henning Fehrmann
4 years ago
Browse files
Options
Downloads
Patches
Plain Diff
platform agnostic code
parent
21c8a6d9
No related branches found
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
Makefile
+33
-0
33 additions, 0 deletions
Makefile
blas.c
+52
-67
52 additions, 67 deletions
blas.c
hardware_settings.h
+52
-0
52 additions, 0 deletions
hardware_settings.h
rocmblas_AMD.c
+0
-361
0 additions, 361 deletions
rocmblas_AMD.c
with
137 additions
and
428 deletions
Makefile
0 → 100644
+
33
−
0
View file @
0626ce84
GPU
=
NVIDIA
GPU
=
AMD
OBJ
=
blas.o
ifeq
($(GPU), AMD)
LDFLAGS
=
-L
/opt/rocm/lib
-lhipblas
-lrocblas
-fopenmp
CFLAGS
=
-g
-Wall
-O3
-fopenmp
-I
/opt/rocm/include
-I
/opt/rocm/hip/include
-DROC
CC
=
hipcc
else
ifeq
($(GPU), NVIDIA)
CC
=
nvcc
LDFLAGS
=
-lcublas
-lm
-lgomp
INCLUDE
=
-I
/usr/lib/x86_64-linux-gnu/openmpi/include/
CFLAGS
=
${
INCLUDE
}
--compile
-O3
-pg
-Xcompiler
-fopenmp
-DCUDA
CUDAFLAGS
=
--Werror
cross-execution-space-call
--Wno-deprecated-gpu-targets
else
unknown_HW
:
endif
all
:
${OBJ}
${
CC
}
-o
blas
${
OBJ
}
${
LDFLAGS
}
${
CUDAFLAGS
}
%.o
:
%.c ${HEADER}
${
CC
}
${
CFLAGS
}
-c
$<
clean
:
rm
*
.o
unknown_HW
:
@
echo
"hardware not detected"
This diff is collapsed.
Click to expand it.
cuda_NVidia
.c
→
blas
.c
+
52
−
67
View file @
0626ce84
...
...
@@ -6,7 +6,7 @@
* Version: 1.0
* Created: 27.01.2021 12:45:18
* Revision: none
* Compiler:
nvc
c
* Compiler:
hip
c
*
* Author: Henning Fehrmann (), henning.fehrmann@aei.mpg.de
* Organization: AEI Hannover
...
...
@@ -15,18 +15,14 @@
* =====================================================================================
*/
#include
"hardware_settings.h"
#include
<stdio.h>
#include
<stdlib.h>
#include
<math.h>
#include
<assert.h>
#include
<cuda_runtime.h>
#include
<cuda.h>
#include
<cublas_v2.h>
#include
<time.h>
#include
<omp.h>
#define __ASSERT(x) (assert((x)==cudaSuccess))
size_t
m
=
10000
;
...
...
@@ -35,12 +31,20 @@ size_t k = 10000;
static
unsigned
long
x
=
123456789
,
y
=
362436069
,
z
=
521288629
;
#define __MALLOC(P, size) P = malloc(size); \
if (P == NULL) \
{\
fprintf(stderr, "Allocation if failed at line %d in %s\n", __LINE__, __FILE__); \
exit(EXIT_FAILURE); \
}\
unsigned
long
xorshf96
(
void
)
{
// NOT thread save
unsigned
long
t
;
x
^=
x
<<
16
;
x
^=
x
>>
5
;
...
...
@@ -102,22 +106,22 @@ timer_stop
void
multiplication
(
cublasHandle_t
handle
,
const
cuComplex
*
A
,
const
cuComplex
*
B
,
cuComplex
*
C
__HANDLE__
handle
,
const
__COMPLEX8__
*
A
,
const
__COMPLEX8__
*
B
,
__COMPLEX8__
*
C
)
{
cublasOperation_t
transA
=
CUBLAS_OP_N
;
cublasOperation_t
transB
=
CUBLAS_OP_C
;
const
cuComplex
alpha
=
{.
x
=
1
.
f
,
.
y
=
0
.
f
};
const
cuComplex
beta
=
{.
x
=
0
.
f
,
.
y
=
0
.
f
};
__BLAS_OPERATION__
transA
=
__NO_TRANSFORM__
;
__BLAS_OPERATION__
transB
=
__CT_TRANSFORM__
;
const
__COMPLEX8__
alpha
=
{.
x
=
1
.
f
,
.
y
=
0
.
f
};
const
__COMPLEX8__
beta
=
{.
x
=
0
.
f
,
.
y
=
0
.
f
};
int
lda
=
n
;
int
ldb
=
n
;
int
ldc
=
k
;
cublasCgemm
__CGMEM__
(
handle
,
transA
,
...
...
@@ -139,8 +143,8 @@ multiplication
void
prepare_matrices
(
cuComplex
*
hA
,
cuComplex
*
hB
__COMPLEX8__
*
hA
,
__COMPLEX8__
*
hB
)
{
float
fact
=
1
.
f
/
(
float
)
n
/
(
float
)
x
/
(
float
)
y
/
20
.
f
;
...
...
@@ -170,7 +174,7 @@ prepare_matrices
void
print_result
(
cuComplex
*
hC
__COMPLEX8__
*
hC
)
{
printf
(
"-------- %zu %zu
\n
"
,
m
,
k
);
...
...
@@ -198,45 +202,27 @@ run_test
m
=
dim
;
n
=
dim
;
k
=
dim
;
struct
runtime
*
timer
=
malloc
(
sizeof
(
*
timer
));
cuComplex
*
A
;
cuComplex
*
B
;
cuComplex
*
C
;
__ASSERT
(
cudaMalloc
((
void
**
)
&
A
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
n
)));
if
(
A
==
NULL
)
{
fprintf
(
stderr
,
"A not allocated
\n
"
);
exit
(
1
);
}
__ASSERT
(
cudaMalloc
((
void
**
)
&
B
,
sizeof
(
*
B
)
*
(
size_t
)(
n
*
k
)));
if
(
B
==
NULL
)
{
fprintf
(
stderr
,
"B not allocated
\n
"
);
exit
(
1
);
}
__ASSERT
(
cudaMalloc
((
void
**
)
&
C
,
sizeof
(
*
C
)
*
(
size_t
)(
m
*
k
)));
struct
runtime
*
timer
;
__MALLOC
(
timer
,
sizeof
(
*
timer
));
__COMPLEX8__
*
A
;
__COMPLEX8__
*
B
;
__COMPLEX8__
*
C
;
__ASSERT
(
__PREFIX
(
Malloc
)((
void
**
)
&
A
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
n
)));
__ASSERT
(
__PREFIX
(
Malloc
)((
void
**
)
&
B
,
sizeof
(
*
B
)
*
(
size_t
)(
n
*
k
)));
__ASSERT
(
__PREFIX
(
Malloc
)((
void
**
)
&
C
,
sizeof
(
*
C
)
*
(
size_t
)(
m
*
k
)));
if
(
C
==
NULL
)
{
fprintf
(
stderr
,
"C not allocated
\n
"
);
exit
(
1
);
}
cuComplex
*
hA
=
malloc
(
sizeof
(
*
hA
)
*
(
size_t
)(
m
*
n
));
if
(
hA
==
NULL
)
{
fprintf
(
stderr
,
"hA not allocated
\n
"
);
exit
(
1
);
}
cuComplex
*
hB
=
malloc
(
sizeof
(
*
hB
)
*
(
size_t
)(
k
*
n
));
if
(
hB
==
NULL
)
{
fprintf
(
stderr
,
"hB not allocated
\n
"
);
exit
(
1
);
}
cuComplex
*
hC
=
malloc
(
sizeof
(
*
hC
)
*
(
size_t
)(
m
*
k
));
__COMPLEX8__
*
hA
;
__MALLOC
(
hA
,
sizeof
(
*
hA
)
*
(
size_t
)(
m
*
n
));
__COMPLEX8__
*
hB
;
__MALLOC
(
hB
,
sizeof
(
*
hB
)
*
(
size_t
)(
k
*
n
));
__COMPLEX8__
*
hC
;
__MALLOC
(
hC
,
sizeof
(
*
hC
)
*
(
size_t
)(
m
*
k
));
if
(
hC
==
NULL
)
{
fprintf
(
stderr
,
"hC not allocated
\n
"
);
...
...
@@ -248,14 +234,16 @@ run_test
// timer_stop(timer);
//timer_start(timer, "Memcopy");
__ASSERT
(
cuda
Memcpy
(
A
,
hA
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
n
),
cuda
MemcpyHostToDevice
));
__ASSERT
(
cuda
Memcpy
(
B
,
hB
,
sizeof
(
*
B
)
*
(
size_t
)(
k
*
n
),
cuda
MemcpyHostToDevice
));
__ASSERT
(
__PREFIX
(
Memcpy
)
(
A
,
hA
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
n
),
__PREFIX
(
MemcpyHostToDevice
))
)
;
__ASSERT
(
__PREFIX
(
Memcpy
)
(
B
,
hB
,
sizeof
(
*
B
)
*
(
size_t
)(
k
*
n
),
__PREFIX
(
MemcpyHostToDevice
))
)
;
// timer_stop(timer);
cudaSetDevice
(
0
);
cublasHandle_t
handle
;
//
cudaSetDevice(0);
__HANDLE__
handle
;
//timer_start(timer, "Create Handle");
cublasCreate
(
&
handle
);
//if(rocblas_create_handle(&handle) != rocblas_status_success) return EXIT_FAILURE;
__CREATE_HANDLE
(
&
handle
);
//timer_stop(timer);
for
(
unsigned
r
=
0
;
r
<
rep
;
r
++
)
...
...
@@ -294,16 +282,17 @@ run_test
+
k
*
m
*
sizeof
(
*
C
)
)
/
1.e+9
);
__ASSERT
(
cuda
Memcpy
(
hC
,
C
,
sizeof
(
*
hC
)
*
(
size_t
)(
k
*
m
),
cuda
MemcpyDeviceToHost
));
__ASSERT
(
__PREFIX
(
Memcpy
)
(
hC
,
C
,
sizeof
(
*
hC
)
*
(
size_t
)(
k
*
m
),
__PREFIX
(
MemcpyDeviceToHost
))
)
;
//print_result(hC);
// timer_start(timer, "Destroy Handle");
if
(
cublasDestroy
(
handle
)
!=
cudaSuccess
)
return
EXIT_FAILURE
;
//if(rocblas_destroy_handle(handle) != rocblas_status_success) return EXIT_FAILURE;
if
(
__DESTROY_HANDLE
(
handle
)
!=
__PREFIX
(
Success
))
return
EXIT_FAILURE
;
// timer_stop(timer);
cuda
Free
(
A
);
cuda
Free
(
B
);
cuda
Free
(
C
);
__PREFIX
(
Free
)
(
A
);
__PREFIX
(
Free
)
(
B
);
__PREFIX
(
Free
)
(
C
);
free
(
hA
);
free
(
hB
);
free
(
hC
);
...
...
@@ -320,12 +309,8 @@ main
int
min_dim
=
1
;
int
max_dim
=
14
;
float
*
res
=
malloc
(
sizeof
(
*
res
)
*
(
size_t
)((
max_dim
-
min_dim
)
*
rep
));
if
(
res
==
NULL
)
{
fprintf
(
stderr
,
"Couldn't allocate res
\n
"
);
exit
(
1
);
}
float
*
res
;
__MALLOC
(
res
,
sizeof
(
*
res
)
*
(
size_t
)((
max_dim
-
min_dim
)
*
rep
));
for
(
int
i
=
min_dim
;
i
<
max_dim
;
i
++
)
{
size_t
dim
=
1
<<
i
;
...
...
This diff is collapsed.
Click to expand it.
hardware_settings.h
0 → 100644
+
52
−
0
View file @
0626ce84
/*
* =====================================================================================
*
* Filename: hardware_settings.h
*
* Description:
*
* Version: 1.0
* Created: 28.01.2021 16:15:56
* Revision: none
* Compiler: gcc
*
* Author: Henning Fehrmann (), henning.fehrmann@aei.mpg.de
* Organization: AEI Hannover
* Copyright: Copyright (c) 2021, Henning Fehrmann
*
* =====================================================================================
*/
#ifdef ROC
#define __HIP_PLATFORM_HCC__
#include
<rocblas.h>
#include
<hip/hip_runtime.h>
#include
<hip/hip_vector_types.h>
#define __ASSERT(x) (assert((x)==hipSuccess))
#define __HANDLE__ rocblas_handle
#define __COMPLEX8__ rocblas_float_complex
#define __BLAS_OPERATION__ rocblas_operation
#define __NO_TRANSFORM__ rocblas_operation_none
#define __CT_TRANSFORM__ rocblas_operation_conjugate_transpose
#define __CGMEM__ rocblas_cgemm
#define __PREFIX(c) hip##c
#define __CREATE_HANDLE(h) rocblas_create_handle(h)
#define __DESTROY_HANDLE(h) rocblas_destroy_handle(h)
#endif
#ifdef CUDA
#include
<assert.h>
#include
<cuda_runtime.h>
#include
<cuda.h>
#include
<cublas_v2.h>
#define __ASSERT(x) (assert((x)==cudaSuccess))
#define __HANDLE__ cublasHandle_t
#define __COMPLEX8__ cuComplex
#define __BLAS_OPERATION__ cublasOperation_t
#define __NO_TRANSFORM__ CUBLAS_OP_N
#define __CT_TRANSFORM__ CUBLAS_OP_C
#define __CGMEM__ cublasCgemm
#define __PREFIX(c) cuda##c
#define __CREATE_HANDLE(h) cublasCreate(h)
#define __DESTROY_HANDLE(h) cublasDestroy(h)
#endif
This diff is collapsed.
Click to expand it.
rocmblas_AMD.c
deleted
100644 → 0
+
0
−
361
View file @
21c8a6d9
/*
* =====================================================================================
*
* Description: BLAS Benchmark
*
* Version: 1.0
* Created: 27.01.2021 12:45:18
* Revision: none
* Compiler: hipc
*
* Author: Henning Fehrmann (), henning.fehrmann@aei.mpg.de
* Organization: AEI Hannover
* License: GNU General Public License v2
*
* =====================================================================================
*/
#define __HIP_PLATFORM_HCC__
#include
<stdio.h>
#include
<stdlib.h>
#include
<math.h>
#include
<rocblas.h>
#include
<hip/hip_runtime.h>
#include
<hip/hip_vector_types.h>
#include
<time.h>
#include
<omp.h>
#define __ASSERT(x) (assert((x)==hipSuccess))
size_t
m
=
10000
;
size_t
n
=
10000
;
size_t
k
=
10000
;
static
unsigned
long
x
=
123456789
,
y
=
362436069
,
z
=
521288629
;
unsigned
long
xorshf96
(
void
)
{
// NOT thread save
unsigned
long
t
;
x
^=
x
<<
16
;
x
^=
x
>>
5
;
x
^=
x
<<
1
;
t
=
x
;
x
=
y
;
y
=
z
;
z
=
t
^
x
^
y
;
return
z
;
}
struct
runtime
{
struct
timespec
start
;
struct
timespec
stop
;
char
tag
[
128
];
};
void
timer_start
(
struct
runtime
*
timer
,
char
tag
[
128
]
)
{
struct
timespec
start
;
sprintf
(
timer
->
tag
,
"%s"
,
tag
);
clock_gettime
(
CLOCK_REALTIME
,
&
start
);
timer
->
start
=
start
;
// printf("--------> start timer: %s\n", timer->tag);
}
double
timer_stop
(
struct
runtime
*
timer
)
{
struct
timespec
stop
;
clock_gettime
(
CLOCK_REALTIME
,
&
stop
);
timer
->
stop
=
stop
;
double
res
=
(
double
)
(
(
timer
->
stop
).
tv_sec
-
(
timer
->
start
).
tv_sec
)
*
1000
.
+
(
double
)
(
(
timer
->
stop
).
tv_nsec
-
(
timer
->
start
).
tv_nsec
)
/
1000000
.
;
// printf("--------> stop timer %s: %g ms\n", timer->tag, res );
return
res
;
}
void
multiplication
(
rocblas_handle
handle
,
const
rocblas_float_complex
*
A
,
const
rocblas_float_complex
*
B
,
rocblas_float_complex
*
C
)
{
rocblas_operation
transA
=
rocblas_operation_none
;
rocblas_operation
transB
=
rocblas_operation_conjugate_transpose
;
const
rocblas_float_complex
alpha
=
{.
x
=
1
.
f
,
.
y
=
0
.
f
};
const
rocblas_float_complex
beta
=
{.
x
=
0
.
f
,
.
y
=
0
.
f
};
rocblas_int
lda
=
n
;
rocblas_int
ldb
=
n
;
rocblas_int
ldc
=
k
;
rocblas_cgemm
(
handle
,
transA
,
transB
,
m
,
n
,
k
,
&
alpha
,
A
,
lda
,
B
,
ldb
,
&
beta
,
C
,
ldc
);
}
void
prepare_matrices
(
rocblas_float_complex
*
hA
,
rocblas_float_complex
*
hB
)
{
float
fact
=
1
.
f
/
(
float
)
n
/
(
float
)
x
/
(
float
)
y
/
20
.
f
;
#pragma omp parallel for
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
m
;
j
++
)
{
size_t
ind
=
j
+
m
*
i
;
hA
[
ind
].
x
=
(
float
)
xorshf96
()
*
fact
;
hA
[
ind
].
y
=
(
float
)
xorshf96
()
*
fact
;
}
}
#pragma omp parallel for
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
size_t
ind
=
j
+
k
*
i
;
hB
[
ind
].
x
=
(
float
)
xorshf96
()
*
fact
;
hB
[
ind
].
y
=
(
float
)
xorshf96
()
*
fact
;
}
}
}
void
print_result
(
rocblas_float_complex
*
hC
)
{
printf
(
"-------- %zu %zu
\n
"
,
m
,
k
);
for
(
size_t
i
=
0
;
i
<
m
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
size_t
ind
=
j
+
k
*
i
;
printf
(
"%1.2f %1.2f
\t
"
,
hC
[
ind
].
x
,
hC
[
ind
].
y
);
}
printf
(
"
\n
"
);
}
printf
(
"--------
\n
"
);
}
int
run_test
(
size_t
dim
,
unsigned
rep
,
float
*
res
)
{
m
=
dim
;
n
=
dim
;
k
=
dim
;
struct
runtime
*
timer
=
malloc
(
sizeof
(
*
timer
));
rocblas_float_complex
*
A
;
rocblas_float_complex
*
B
;
rocblas_float_complex
*
C
;
__ASSERT
(
hipMalloc
((
void
**
)
&
A
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
n
)));
if
(
A
==
NULL
)
{
fprintf
(
stderr
,
"A not allocated
\n
"
);
exit
(
1
);
}
__ASSERT
(
hipMalloc
((
void
**
)
&
B
,
sizeof
(
*
B
)
*
(
size_t
)(
n
*
k
)));
if
(
B
==
NULL
)
{
fprintf
(
stderr
,
"B not allocated
\n
"
);
exit
(
1
);
}
__ASSERT
(
hipMalloc
((
void
**
)
&
C
,
sizeof
(
*
C
)
*
(
size_t
)(
m
*
k
)));
if
(
C
==
NULL
)
{
fprintf
(
stderr
,
"C not allocated
\n
"
);
exit
(
1
);
}
rocblas_float_complex
*
hA
=
malloc
(
sizeof
(
*
hA
)
*
(
size_t
)(
m
*
n
));
if
(
hA
==
NULL
)
{
fprintf
(
stderr
,
"hA not allocated
\n
"
);
exit
(
1
);
}
rocblas_float_complex
*
hB
=
malloc
(
sizeof
(
*
hB
)
*
(
size_t
)(
k
*
n
));
if
(
hB
==
NULL
)
{
fprintf
(
stderr
,
"hB not allocated
\n
"
);
exit
(
1
);
}
rocblas_float_complex
*
hC
=
malloc
(
sizeof
(
*
hC
)
*
(
size_t
)(
m
*
k
));
if
(
hC
==
NULL
)
{
fprintf
(
stderr
,
"hC not allocated
\n
"
);
exit
(
1
);
}
// timer_start(timer, "Prepare matrices");
prepare_matrices
(
hA
,
hB
);
// timer_stop(timer);
//timer_start(timer, "Memcopy");
__ASSERT
(
hipMemcpy
(
A
,
hA
,
sizeof
(
*
A
)
*
(
size_t
)(
m
*
n
),
hipMemcpyHostToDevice
));
__ASSERT
(
hipMemcpy
(
B
,
hB
,
sizeof
(
*
B
)
*
(
size_t
)(
k
*
n
),
hipMemcpyHostToDevice
));
// timer_stop(timer);
rocblas_handle
handle
;
//timer_start(timer, "Create Handle");
if
(
rocblas_create_handle
(
&
handle
)
!=
rocblas_status_success
)
return
EXIT_FAILURE
;
//timer_stop(timer);
for
(
unsigned
r
=
0
;
r
<
rep
;
r
++
)
{
float
res_r
=
0
.
f
;
char
mes
[
128
];
sprintf
(
mes
,
"dim %zu run %d a"
,
dim
,
r
);
timer_start
(
timer
,
mes
);
multiplication
(
handle
,
A
,
B
,
C
);
res_r
+=
timer_stop
(
timer
);
sprintf
(
mes
,
"dim %zu run %d b"
,
dim
,
r
);
/*
timer_start(timer, mes);
multiplication
(
handle,
B,
A,
C
);
res_r += timer_stop(timer);
*/
res
[
r
]
=
res_r
/
1
.
f
;
}
printf
(
"dimensions: %zu %zu %zu
\t
-- "
,
n
,
m
,
k
);
printf
(
"required size: %f GB
\n
"
,
(
m
*
n
*
sizeof
(
*
A
)
+
k
*
n
*
sizeof
(
*
B
)
+
k
*
m
*
sizeof
(
*
C
)
)
/
1.e+9
);
__ASSERT
(
hipMemcpy
(
hC
,
C
,
sizeof
(
*
hC
)
*
(
size_t
)(
k
*
m
),
hipMemcpyDeviceToHost
));
//print_result(hC);
// timer_start(timer, "Destroy Handle");
if
(
rocblas_destroy_handle
(
handle
)
!=
rocblas_status_success
)
return
EXIT_FAILURE
;
// timer_stop(timer);
hipFree
(
A
);
hipFree
(
B
);
hipFree
(
C
);
free
(
hA
);
free
(
hB
);
free
(
hC
);
free
(
timer
);
return
0
;
}
int
main
(
)
{
int
rep
=
512
;
int
min_dim
=
1
;
int
max_dim
=
14
;
float
*
res
=
malloc
(
sizeof
(
*
res
)
*
(
size_t
)((
max_dim
-
min_dim
)
*
rep
));
if
(
res
==
NULL
)
{
fprintf
(
stderr
,
"Couldn't allocate res
\n
"
);
exit
(
1
);
}
for
(
int
i
=
min_dim
;
i
<
max_dim
;
i
++
)
{
size_t
dim
=
1
<<
i
;
int
ind
=
(
i
-
min_dim
)
*
rep
;
run_test
(
dim
,
rep
,
&
res
[
ind
]);
}
// store the results
FILE
*
f
;
char
name
[
128
];
sprintf
(
name
,
"runtimes"
);
f
=
fopen
(
name
,
"w"
);
if
(
f
==
NULL
)
{
fprintf
(
stderr
,
"Couldn't open %s
\n
"
,
name
);
}
for
(
int
i
=
min_dim
;
i
<
max_dim
;
i
++
)
{
size_t
dim
=
1
<<
i
;
fprintf
(
f
,
"%zu
\t
"
,
dim
);
}
fprintf
(
f
,
"
\n
"
);
for
(
int
r
=
0
;
r
<
rep
;
r
++
)
{
for
(
int
i
=
min_dim
;
i
<
max_dim
;
i
++
)
{
size_t
pos
=
(
i
-
min_dim
)
*
rep
+
r
;
fprintf
(
f
,
"%1.6f
\t
"
,
res
[
pos
]);
}
fprintf
(
f
,
"
\n
"
);
}
fclose
(
f
);
return
0
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment