Table of
contents
12.1.2010
A simple random file read benchmark
I have been mostly using iozone for benchmarking my file systems, but it isn't really a file system benchmark as much as it is a disk benchmark.
One night I was after a random read test with randomized files to benchmark an upgrade I had gotten to my Solaris ZFS file system.
The closest benchmark iozone had to this was the one where a big file is written, and accessed randomly.
Now, file systems have been complicated (or smart) enough for ages for a test like this not to tell you all about how fast your storage system is accessing random files.
For example, ZFS's raidz1/2 use stripe sizes that dynamically adjust to file sizes.
Reading one massive file a small piece here, a small piece there will not produce the same kind of load as accessing real individual files.
And this is an important task, since most system data is - actually - piles and piles of small files.
I know there are an abundance of better benchmark suites out there which simulate this kind of load, but I just wanted to write this small piece of software myself since I knew exactly what I was after.
So, here it is. It prints out a help with -h, but the parameters you need to know are
- -n <filecount> how many files to create
- -s <filesize> how big should one file be
- -l <loopcount> how many reading loops should we perform (each loop reads through all the files in a random order)
- -d print debug information if you're confused about what the script is actually doing
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#define MAXREAD 1024*1024 // We don't read more than 1MB in one call
// Simple timing
struct timeval t_start, t_end;
void starttime() {
gettimeofday(&t_start, NULL);
}
void gettime_r(char* desc, int repeat) {
gettimeofday(&t_end, NULL);
long int timedif = (long int)((t_end.tv_sec*1e6 + t_end.tv_usec) -
(t_start.tv_sec*1e6 + t_start.tv_usec));
printf("%s time: %.3f, average: %.4f\n", desc, (double)timedif/1e6f, (double)timedif/1e6f/(double)repeat);
}
void gettime(char* desc) { gettime_r(desc, 1); }
int main(int argc, char **argv) {
// Setting some defaults
int64_t filesize = 4096;
int count = 10;
float loop = 1.0f;
int rvalue;
int debug = 0;
unsigned char *data = (unsigned char*)malloc(MAXREAD);
while ((rvalue = getopt(argc, argv, "h--s:n:l:d")) != -1)
switch (rvalue) {
case 'h':
printf("Usage: %s [-h] [-s filesize] [-n filecount] [-l loopcount (float)] [-d]\n",
argv[0]);
exit(0);
case 's':
sscanf(optarg, "%ld", &filesize);
if (filesize < 0 || filesize > 1e12) { // 0 .. 1 terabyte
fprintf(stderr, "Filesize of %ld bytes is out of bounds, resetting to 4096\n", filesize);
filesize = 4096;
}
break;
case 'n':
sscanf(optarg, "%d", &count);
if (count < 0 || count > 1e9) { // 0 .. 1 billion
fprintf(stderr, "Count out of bounds, resetting to 10\n", count);
count = 10;
}
break;
case 'l':
sscanf(optarg, "%f", &loop);
if (loop < 0.0f || loop > (float)1e6) { // 0 .. 1 million
fprintf(stderr, "Loop out of bounds, resetting to 1.0\n", count);
loop = 1.0f;
}
break;
case 'd':
debug = 1;
break;
}
printf("Creating %d files of size %Ld B", count, filesize);
printf(", total data %.2f MB\n", (double)(filesize*count)/(1024.0*1024.0));
char filename[1024];
for (int i = 0; i < MAXREAD && i < filesize; ++i)
data[i] = i%256;
for (int i = 0; i < count; ++i) {
sprintf(filename, "rand%09d", i);
if (debug) printf("Creating file %s\n", filename);
FILE *f = fopen(filename, "w");
// Full 1MB blocks
int64_t left = filesize;
while (left > MAXREAD) {
if (debug) printf("\twriting %d bytes\n", MAXREAD);
fwrite(data, sizeof(char), MAXREAD, f);
left -= MAXREAD;
}
if (left) {
if (debug) printf("\twriting %d bytes\n", left);
fwrite(data, sizeof(char), left, f);
}
left = 0;
fclose(f);
}
printf("Reading\n");
srand(time(0));
starttime();
for (double i = 0.0; i < (double)count*loop; ++i) {
int index = rand()%count;
sprintf(filename, "rand%09d", index);
if (debug)
printf("Loop %d/%d: opening file %s\n",
(int)i, (int)((double)count*loop), filename);
FILE *f = fopen(filename, "r");
int64_t left = filesize;
while (left > MAXREAD) {
if (debug) printf("\treading %d bytes\n", MAXREAD);
fread(data, sizeof(char), MAXREAD, f);
left -= MAXREAD;
}
if (left) {
if (debug) printf("\treading %d bytes\n", left);
fread(data, sizeof(char), left, f);
}
left = 0;
fclose(f);
}
gettime_r("File read", (int)loop);
printf("Removing\n");
for (int i = 0; i < count; ++i) {
sprintf(filename, "rand%09d", i);
remove(filename);
}
}
Comments