> > component partitions, e.g. /dev/sda7: 39MB/s > > raid device /dev/md2: 31MB/s > > lvm device /dev/main/media: 53MB/s > > > > (oldish system - but note that lvm device is *much* faster) > > Did you test component device and raid device speed using the > read-ahead settings tuned for lvm reads? If so, that's not a fair > comparison. :-) I did an eval with a vendor who claimed that their lvm actually improved bandwidth because it somehow triggered better full-stripe operations, or readahead, or something. filtered through a marketing person, of course ;( > Is there a way for me to simulate readahead in userspace, i.e. can > I do lots of sequential asynchronous reads in parallel? there is async IO, but I don't think this is going to help you much. > Also, is there a way to disable caching of reads? Having to clear yes: O_DIRECT. I'm attaching a little program I wrote which basically just shows you incremental bandwidth. you can use it to show the zones on a disk (just iorate -r -l 9999 /dev/hda and plot the results), or to do normal r/w bandwidth without being confused by the page-cache. you can even use it as a filter to measure tape backup performance. it doesn't try to do anything with random seeks. it doesn't do anything multi-stream. regards, mark hahn.
/* iorate.c - measure rates of sequential IO, showing incremental bandwidth written by Mark Hahn (hahn@xxxxxxxxxxx) 2003,2004,2005 the main point of this code is to illustrate the danger of running naive bandwidth tests on files that are small relative to the memory/disk bandwidth ratio of your system. that is, on any system, the incremental bandwidth will start out huge, since IO is purely to the page cache. once you exceed that size, bandwidth will be dominated by the real disk performance. but using the average of these two modes is a mistake, even if you use very large files. */ #define _LARGEFILE64_SOURCE 1 #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <errno.h> #include <sys/time.h> #include <sys/fcntl.h> #include <sys/stat.h> #include <stdarg.h> #include <string.h> #include <sys/mman.h> #ifdef O_LARGEFILE #define LF O_LARGEFILE #elif defined(_O_LARGEFILE) #define LF _O_LARGEFILE #else #define LF 0 #endif #ifndef O_DIRECT #define O_DIRECT 040000 #endif typedef unsigned long long u64; u64 bytes = 0, bytesLast = 0; double timeStart = 0, timeLast = 0; /* default reporting interval is every 2 seconds; in 2004, an entry-level desktop disk will sustain around 50 MB/s, so the default bytes interval is 100 MB. whichever comes first. */ u64 byteInterval = 100; double timeInterval = 2; double gtod() { struct timeval tv; gettimeofday(&tv,0); return tv.tv_sec + 1e-6 * tv.tv_usec; } void dumpstats(int force) { u64 db = bytes - bytesLast; double now = gtod(); double dt; static int first = 1; if (timeLast == 0) timeStart = timeLast = now; dt = now - timeLast; if (!force && db < byteInterval && dt < timeInterval) return; if (first) { printf("#%7s %7s %7s %7s\n", "secs", "MB", "MB/sec", "MB/sec"); first = 0; } printf("%7.3f %7.3f %7.3f %7.3f\n", now - timeStart, 1e-6 * bytes, 1e-6 * db / dt, 1e-6 * bytes / (now-timeStart)); timeLast = now; bytesLast = bytes; } void usage() { fprintf(stderr,"iorate [-r/w filename] [-d] [-c chunksz][-b byteivl][-t ivl][-l szlim] [-r in] [-w out]\n"); fprintf(stderr,"-r in or -w out select which file is read or written ('-' for stdin/out)\n"); fprintf(stderr,"-c chunksz - size of chunks written (KB);\n"); fprintf(stderr,"-t timeinterval - collect rate each timeinterval seconds;\n"); fprintf(stderr,"-b byteinterval - collect rate each byteinterval MB;\n"); fprintf(stderr,"-l limit - total output size limit (MB);\n"); fprintf(stderr,"-d use O_DIRECT\n"); fprintf(stderr,"defaults are: '-c 8 -b 20 -t 10 -l 10'\n"); exit(1); } void fatal(char *format, ...) { va_list ap; va_start(ap,format); vfprintf(stderr,format,ap); fprintf(stderr,": errno=%d (%s)\n",errno,strerror(errno)); va_end(ap); dumpstats(1); exit(1); } /* allocate a buffer using mmap to ensure it's page-aligned. O_DIRECT *could* be more strict than that, but probably isn't */ void *myalloc(unsigned size) { unsigned s = (size + 4095) & ~4095U; void *p = mmap(0, s, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (p == MAP_FAILED) return 0; return p; } int main(int argc, char *argv[]) { unsigned size = 8; char *buffer; u64 limit = 10; char *fnameI = 0; char *fnameO = 0; int fdI = 0; int fdO = 1; int doRead = 0; int doWrite = 0; int doDirect = 0; int letter; while ((letter = getopt(argc,argv,"r:w:b:c:l:t:d")) != -1) { switch(letter) { case 'r': fnameI = optarg; doRead = 1; break; case 'w': fnameO = optarg; doWrite = 1; break; case 'b': byteInterval = atoi(optarg); break; case 'c': size = atoi(optarg); break; case 'l': limit = atoi(optarg); break; case 't': timeInterval = atof(optarg); break; case 'd': doDirect = 1; break; default: usage(); } } if (argc != optind) usage(); byteInterval *= 1e6; limit *= 1e6; size *= 1024; setbuf(stdout, 0); fprintf(stderr,"chunk %dK, byteInterval %uM, timeInterval %f, limit %uM\n", size>>10, (unsigned)(byteInterval>>20), timeInterval, (unsigned)(limit>>20)); if (doRead && fnameI && strcmp(fnameI,"-")) { fdI = open(fnameI, O_RDONLY | LF); if (fdI == -1) fatal("open(read) failed"); } if (doWrite && fnameO && strcmp(fnameO,"-")) { int flags = O_RDWR | O_CREAT | LF; if (doDirect) flags |= O_DIRECT; fdO = open(fnameO, flags, 0600); if (fdO == -1) fatal("open(write) failed"); } buffer = myalloc(size); memset(buffer,'m',size); timeStart = timeLast = gtod(); bytes = 0; while (bytes < limit) { int c = size; dumpstats(0); if (doRead) { c = read(fdI,buffer,c); if (c == -1) fatal("read failed"); } if (doWrite) { c = write(fdO,buffer,c); if (c == -1) fatal("write failed"); } bytes += c; /* short read/write means EOF. */ if (c < size) break; } dumpstats(1); return 0; }