|
@@ -24,8 +24,8 @@
|
|
|
static const char *length_str = "1MB";
|
|
|
static const char *routine = "default";
|
|
|
static int iterations = 1;
|
|
|
-static bool use_clock;
|
|
|
-static int clock_fd;
|
|
|
+static bool use_cycle;
|
|
|
+static int cycle_fd;
|
|
|
static bool only_prefault;
|
|
|
static bool no_prefault;
|
|
|
|
|
@@ -37,7 +37,7 @@ static const struct option options[] = {
|
|
|
"Specify routine to copy"),
|
|
|
OPT_INTEGER('i', "iterations", &iterations,
|
|
|
"repeat memcpy() invocation this number of times"),
|
|
|
- OPT_BOOLEAN('c', "clock", &use_clock,
|
|
|
+ OPT_BOOLEAN('c', "cycle", &use_cycle,
|
|
|
"Use cycles event instead of gettimeofday() for measuring"),
|
|
|
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
|
|
|
"Show only the result with page faults before memcpy()"),
|
|
@@ -76,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = {
|
|
|
NULL
|
|
|
};
|
|
|
|
|
|
-static struct perf_event_attr clock_attr = {
|
|
|
+static struct perf_event_attr cycle_attr = {
|
|
|
.type = PERF_TYPE_HARDWARE,
|
|
|
.config = PERF_COUNT_HW_CPU_CYCLES
|
|
|
};
|
|
|
|
|
|
-static void init_clock(void)
|
|
|
+static void init_cycle(void)
|
|
|
{
|
|
|
- clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
|
|
|
+ cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
|
|
|
|
|
|
- if (clock_fd < 0 && errno == ENOSYS)
|
|
|
+ if (cycle_fd < 0 && errno == ENOSYS)
|
|
|
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
|
|
|
else
|
|
|
- BUG_ON(clock_fd < 0);
|
|
|
+ BUG_ON(cycle_fd < 0);
|
|
|
}
|
|
|
|
|
|
-static u64 get_clock(void)
|
|
|
+static u64 get_cycle(void)
|
|
|
{
|
|
|
int ret;
|
|
|
u64 clk;
|
|
|
|
|
|
- ret = read(clock_fd, &clk, sizeof(u64));
|
|
|
+ ret = read(cycle_fd, &clk, sizeof(u64));
|
|
|
BUG_ON(ret != sizeof(u64));
|
|
|
|
|
|
return clk;
|
|
@@ -119,9 +119,9 @@ static void alloc_mem(void **dst, void **src, size_t length)
|
|
|
die("memory allocation failed - maybe length is too large?\n");
|
|
|
}
|
|
|
|
|
|
-static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
|
|
|
+static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
|
|
|
{
|
|
|
- u64 clock_start = 0ULL, clock_end = 0ULL;
|
|
|
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
|
|
|
void *src = NULL, *dst = NULL;
|
|
|
int i;
|
|
|
|
|
@@ -130,14 +130,14 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
|
|
|
if (prefault)
|
|
|
fn(dst, src, len);
|
|
|
|
|
|
- clock_start = get_clock();
|
|
|
+ cycle_start = get_cycle();
|
|
|
for (i = 0; i < iterations; ++i)
|
|
|
fn(dst, src, len);
|
|
|
- clock_end = get_clock();
|
|
|
+ cycle_end = get_cycle();
|
|
|
|
|
|
free(src);
|
|
|
free(dst);
|
|
|
- return clock_end - clock_start;
|
|
|
+ return cycle_end - cycle_start;
|
|
|
}
|
|
|
|
|
|
static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
|
|
@@ -182,17 +182,17 @@ int bench_mem_memcpy(int argc, const char **argv,
|
|
|
int i;
|
|
|
size_t len;
|
|
|
double result_bps[2];
|
|
|
- u64 result_clock[2];
|
|
|
+ u64 result_cycle[2];
|
|
|
|
|
|
argc = parse_options(argc, argv, options,
|
|
|
bench_mem_memcpy_usage, 0);
|
|
|
|
|
|
- if (use_clock)
|
|
|
- init_clock();
|
|
|
+ if (use_cycle)
|
|
|
+ init_cycle();
|
|
|
|
|
|
len = (size_t)perf_atoll((char *)length_str);
|
|
|
|
|
|
- result_clock[0] = result_clock[1] = 0ULL;
|
|
|
+ result_cycle[0] = result_cycle[1] = 0ULL;
|
|
|
result_bps[0] = result_bps[1] = 0.0;
|
|
|
|
|
|
if ((s64)len <= 0) {
|
|
@@ -223,11 +223,11 @@ int bench_mem_memcpy(int argc, const char **argv,
|
|
|
|
|
|
if (!only_prefault && !no_prefault) {
|
|
|
/* show both of results */
|
|
|
- if (use_clock) {
|
|
|
- result_clock[0] =
|
|
|
- do_memcpy_clock(routines[i].fn, len, false);
|
|
|
- result_clock[1] =
|
|
|
- do_memcpy_clock(routines[i].fn, len, true);
|
|
|
+ if (use_cycle) {
|
|
|
+ result_cycle[0] =
|
|
|
+ do_memcpy_cycle(routines[i].fn, len, false);
|
|
|
+ result_cycle[1] =
|
|
|
+ do_memcpy_cycle(routines[i].fn, len, true);
|
|
|
} else {
|
|
|
result_bps[0] =
|
|
|
do_memcpy_gettimeofday(routines[i].fn,
|
|
@@ -237,9 +237,9 @@ int bench_mem_memcpy(int argc, const char **argv,
|
|
|
len, true);
|
|
|
}
|
|
|
} else {
|
|
|
- if (use_clock) {
|
|
|
- result_clock[pf] =
|
|
|
- do_memcpy_clock(routines[i].fn,
|
|
|
+ if (use_cycle) {
|
|
|
+ result_cycle[pf] =
|
|
|
+ do_memcpy_cycle(routines[i].fn,
|
|
|
len, only_prefault);
|
|
|
} else {
|
|
|
result_bps[pf] =
|
|
@@ -251,12 +251,12 @@ int bench_mem_memcpy(int argc, const char **argv,
|
|
|
switch (bench_format) {
|
|
|
case BENCH_FORMAT_DEFAULT:
|
|
|
if (!only_prefault && !no_prefault) {
|
|
|
- if (use_clock) {
|
|
|
- printf(" %14lf Clock/Byte\n",
|
|
|
- (double)result_clock[0]
|
|
|
+ if (use_cycle) {
|
|
|
+ printf(" %14lf Cycle/Byte\n",
|
|
|
+ (double)result_cycle[0]
|
|
|
/ (double)len);
|
|
|
- printf(" %14lf Clock/Byte (with prefault)\n",
|
|
|
- (double)result_clock[1]
|
|
|
+ printf(" %14lf Cycle/Byte (with prefault)\n",
|
|
|
+ (double)result_cycle[1]
|
|
|
/ (double)len);
|
|
|
} else {
|
|
|
print_bps(result_bps[0]);
|
|
@@ -265,9 +265,9 @@ int bench_mem_memcpy(int argc, const char **argv,
|
|
|
printf(" (with prefault)\n");
|
|
|
}
|
|
|
} else {
|
|
|
- if (use_clock) {
|
|
|
- printf(" %14lf Clock/Byte",
|
|
|
- (double)result_clock[pf]
|
|
|
+ if (use_cycle) {
|
|
|
+ printf(" %14lf Cycle/Byte",
|
|
|
+ (double)result_cycle[pf]
|
|
|
/ (double)len);
|
|
|
} else
|
|
|
print_bps(result_bps[pf]);
|
|
@@ -277,17 +277,17 @@ int bench_mem_memcpy(int argc, const char **argv,
|
|
|
break;
|
|
|
case BENCH_FORMAT_SIMPLE:
|
|
|
if (!only_prefault && !no_prefault) {
|
|
|
- if (use_clock) {
|
|
|
+ if (use_cycle) {
|
|
|
printf("%lf %lf\n",
|
|
|
- (double)result_clock[0] / (double)len,
|
|
|
- (double)result_clock[1] / (double)len);
|
|
|
+ (double)result_cycle[0] / (double)len,
|
|
|
+ (double)result_cycle[1] / (double)len);
|
|
|
} else {
|
|
|
printf("%lf %lf\n",
|
|
|
result_bps[0], result_bps[1]);
|
|
|
}
|
|
|
} else {
|
|
|
- if (use_clock) {
|
|
|
- printf("%lf\n", (double)result_clock[pf]
|
|
|
+ if (use_cycle) {
|
|
|
+ printf("%lf\n", (double)result_cycle[pf]
|
|
|
/ (double)len);
|
|
|
} else
|
|
|
printf("%lf\n", result_bps[pf]);
|