diff --git a/blas.c b/blas.c
index 6dfd65e8fe20d3c77ab1003edb531ce2bad31884..6f2f5fefce5ac8e8ed250b16b3dc80948bb09f89 100644
--- a/blas.c
+++ b/blas.c
@@ -16,11 +16,11 @@
  */
 
 #include "hardware_settings.h"
+#include "profiler.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
-#include <time.h>
 #include <omp.h>
 
 #define __MALLOC(P, size) P =  malloc(size); \
@@ -34,73 +34,6 @@ size_t m = 10000;
 size_t n = 10000;
 size_t k = 10000;
 
-static unsigned long x=123456789, y=362436069, z=521288629;
-
-unsigned long
-xorshf96
-(
-	void
-)
-{
-	// NOT thread save
-	unsigned long t;
-	x ^= x << 16;
-	x ^= x >> 5;
-	x ^= x << 1;
-
-	t = x;
-	x = y;
-	y = z;
-	z = t ^ x ^ y;
-	return z;
-}
-
-struct runtime
-{
-	struct timespec start;
-	struct timespec stop;
-	char tag[128];
-};
-
-
-void
-timer_start
-(
-	struct runtime * timer,
-	char tag[128]
-)
-{
-	struct timespec start;
-	sprintf(timer->tag,"%s", tag);
-	clock_gettime(CLOCK_REALTIME , &start);
-	timer->start = start;
-//	printf("-------->  start timer: %s\n", timer->tag);
-}
-
-double
-timer_stop
-(
-	struct runtime * timer
-)
-{
-	struct timespec stop;
-	clock_gettime(CLOCK_REALTIME , &stop);
-	timer->stop = stop;
-	double res= (double)
-		(
-			(timer->stop).tv_sec - (timer->start).tv_sec
-		)*1000.
-		+
-		(double)
-		(
-			(timer->stop).tv_nsec - (timer->start).tv_nsec
-		)/1000000.
-		;
-	// printf("-------->  stop timer %s: %g ms\n", timer->tag, res );
-	return res;
-}
-
-
 void
 multiplication
 (