cloud-sat/m4ri-20140914/m4/ax_cache_size_tune.m4

197 lines
5.1 KiB
Plaintext
Raw Normal View History

2023-02-24 07:58:40 +00:00
# SYNOPSIS
#
# AX_CACHE_SIZE_TUNE
#
# DESCRIPTION
#
# Find L1, L2, L3 caches size by running some timing experiments.
# The results are available in the defines __M4RI_CPU_L1_CACHE,
# __M4RI_CPU_L2_CACHE and __M4RI_CPU_L3_CACHE.
#
# This macro depends on AC_PROG_SED, AC_PROG_CC.
#
# LAST MODIFICATION
#
# 2011-04-11
#
# COPYLEFT
#
# Copyright (c) 2009,2010 Martin Albrecht <martinralbrecht@googlemail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved.
AC_DEFUN([AX_CACHE_SIZE_TUNE],
[ AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([AC_PROG_SED])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for cache sizes, ax_cv_cache_sizes,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([[
#include <time.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
double walltime(double t0) {
double mic, time;
double mega = 0.000001;
struct timeval tp;
static long base_sec = 0;
static long base_usec = 0;
(void) gettimeofday(&tp,NULL);
if (base_sec == 0) {
base_sec = tp.tv_sec;
base_usec = tp.tv_usec;
}
time = (double) (tp.tv_sec - base_sec);
mic = (double) (tp.tv_usec - base_usec);
time = (time + mic * mega) - t0;
return(time);
}
double run_experiment(size_t size, size_t trials) {
size_t i,j;
unsigned long *a = (unsigned long*)malloc(size/4);
unsigned long *b = (unsigned long*)malloc(size/4);
unsigned long *c = (unsigned long*)malloc(size/4);
unsigned long *d = (unsigned long*)malloc(size/4);
size_t n = size/4/(sizeof(unsigned long));
/* we setup a lookup table with a random-ish pattern */
a[0] = 1337;
b[0] = 5345345;
for(j=1; j<n ; j++) {
a[j] = a[j-1] * 1073741827;
a[j-1] %= n;
b[j] = b[j-1] * 1073741827;
b[j-1] %= n;
}
a[n-1] %= n;
b[n-1] %= n;
double wt = walltime(0.0);
clock_t ct = clock();
for(i=0; i<trials; i++) {
for(j=0; j<n; j+=8) {
d[b[j+0]] = c[a[j+0]];
d[b[j+1]] = c[a[j+1]];
d[b[j+2]] = c[a[j+2]];
d[b[j+3]] = c[a[j+3]];
d[b[j+4]] = c[a[j+4]];
d[b[j+5]] = c[a[j+5]];
d[b[j+6]] = c[a[j+6]];
d[b[j+7]] = c[a[j+7]];
}
}
ct = clock() - ct;
wt = walltime(wt);
free(a);
free(b);
free(c);
free(d);
return (double)wt;
}
#define NUMBER_OF_EXPERIMENTS 8
size_t cache_size(const size_t *candidates, const size_t n, size_t trials) {
double times[NUMBER_OF_EXPERIMENTS][n];
double dtimes[NUMBER_OF_EXPERIMENTS][n];
size_t i,j;
double wt, result;
for(j=0; j<NUMBER_OF_EXPERIMENTS; j++) {
size_t mult = 1;
size_t _trials = trials;
run_experiment(candidates[0]*1024,_trials);
wt = walltime(0.0);
times[j][0] = run_experiment(candidates[0]*1024,_trials);
wt = walltime(wt);
dtimes[j][0] = 1.0;
printf("s: %5zu, rx: %6.2f, x: %6.2f, wt: %6.2f, dx: NaN\n",candidates[0],times[j][0],times[j][0],wt);
fflush(NULL);
for(i=1;i<n;i++) {
run_experiment(candidates[i]*1024,_trials);
wt = walltime(0.0);
result = run_experiment(candidates[i]*1024,_trials);
wt = walltime(wt);
times[j][i] = mult*result;
dtimes[j][i] = candidates[i-1]*times[j][i]/times[j][i-1]/candidates[i];
printf("s: %5zu, rx: %6.2f, x: %6.2f, wt: %6.2f, dx: %6.2f\n",candidates[i],result,times[j][i],wt,dtimes[j][i]);
fflush(NULL);
while(wt > 0.25) {
_trials = _trials/2;
mult = 2*mult;
wt /= 2.0;
result /= 2.0;
}
}
printf("\n");
}
for(i=0;i<n;i++) {
double tmp = 0.0;
for(j=0; j<NUMBER_OF_EXPERIMENTS; j++) {
tmp += dtimes[j][i];
}
dtimes[0][i] = tmp/NUMBER_OF_EXPERIMENTS;
}
size_t max = 1;
for(i=1;i<n;i++){
if (dtimes[0][i] > dtimes[0][max] ) {
max = i;
}
}
return candidates[max-1];
}
]],
[[
const size_t c1[] = { 4, 8, 16, 32, 64, 128};
const size_t c2[] = { 128, 256, 512};
const size_t c3[] = {1024,1536,2048,3072,4096,6144,8192,16384,32768};
FILE *f;
printf("\n");
size_t _l1 = cache_size(c1, 6, 1ULL<<15);
size_t _l2 = cache_size(c2, 3, 1ULL<<12);
size_t _l3 = cache_size(c3, 9, 1ULL<< 9);
f = fopen("conftest_cache_sizes", "w"); if (!f) return 1;
fprintf(f,"%lu:%lu:%lu\n",(unsigned long)(_l1*1024),(unsigned long)(_l2*1024),(unsigned long)(_l3*1024));
fclose(f);
return 0;
]])],
[ax_cv_cache_sizes=`cat conftest_cache_sizes`; rm -f conftest_cache_sizes],
[ax_cv_cache_sizes=unknown; rm -f conftest_cache_sizes],
[ax_cv_cache_sizes=unknown])])
AC_LANG_POP([C])
AC_MSG_CHECKING(the L1 cache size)
ax_l1_size=`echo $ax_cv_cache_sizes | cut -d ':' -f 1`
AC_MSG_RESULT( $ax_l1_size Bytes)
AC_MSG_CHECKING(the L2 cache size)
ax_l2_size=`echo $ax_cv_cache_sizes | cut -d ':' -f 2`
AC_MSG_RESULT( $ax_l2_size Bytes)
AC_MSG_CHECKING(the L3 cache size)
ax_l3_size=`echo $ax_cv_cache_sizes | cut -d ':' -f 3`
AC_MSG_RESULT( $ax_l3_size Bytes)
M4RI_CPU_L1_CACHE=${ax_l1_size}
M4RI_CPU_L2_CACHE=${ax_l2_size}
M4RI_CPU_L3_CACHE=${ax_l3_size}
AC_SUBST(M4RI_CPU_L1_CACHE)
AC_SUBST(M4RI_CPU_L2_CACHE)
AC_SUBST(M4RI_CPU_L3_CACHE)
])