Re: RAID-5 streaming read performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> > component partitions, e.g. /dev/sda7: 39MB/s
> > raid device /dev/md2:                 31MB/s
> > lvm device /dev/main/media:           53MB/s
> >
> > (oldish system - but note that lvm device is *much* faster)
> 
> Did you test component device and raid device speed using the
> read-ahead settings tuned for lvm reads?  If so, that's not a fair
> comparison.  :-)

I did an eval with a vendor who claimed that their lvm actually
improved bandwidth because it somehow triggered better full-stripe
operations, or readahead, or something.  filtered through a marketing
person, of course ;(

> Is there a way for me to simulate readahead in userspace, i.e. can
> I do lots of sequential asynchronous reads in parallel?

there is async IO, but I don't think this is going to help you much.

> Also, is there a way to disable caching of reads?  Having to clear

yes: O_DIRECT.

I'm attaching a little program I wrote which basically just shows you
incremental bandwidth.  you can use it to show the zones on a disk
(just iorate -r -l 9999 /dev/hda and plot the results), or to
do normal r/w bandwidth without being confused by the page-cache.
you can even use it as a filter to measure tape backup performance.

it doesn't try to do anything with random seeks.  it doesn't do 
anything multi-stream.

regards, mark hahn.
/* iorate.c - measure rates of sequential IO, showing incremental 
   bandwidth written by Mark Hahn (hahn@xxxxxxxxxxx) 2003,2004,2005
   the main point of this code is to illustrate the danger of 
   running naive bandwidth tests on files that are small relative
   to the memory/disk bandwidth ratio of your system.  that is,
   on any system, the incremental bandwidth will start out huge,
   since IO is purely to the page cache.  once you exceed that size,
   bandwidth will be dominated by the real disk performance.  but 
   using the average of these two modes is a mistake, even if 
   you use very large files.
*/
#define _LARGEFILE64_SOURCE 1
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/time.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <stdarg.h>
#include <string.h>
#include <sys/mman.h>

#ifdef O_LARGEFILE
#define LF O_LARGEFILE
#elif defined(_O_LARGEFILE)
#define LF _O_LARGEFILE
#else
#define LF 0
#endif

#ifndef O_DIRECT
#define O_DIRECT 040000
#endif

typedef unsigned long long u64;

u64 bytes = 0, bytesLast = 0;
double timeStart = 0, timeLast = 0;

/* default reporting interval is every 2 seconds;
   in 2004, an entry-level desktop disk will sustain around 50 MB/s,
   so the default bytes interval is 100 MB.  whichever comes first. */
u64 byteInterval = 100;
double timeInterval = 2;

double gtod() {
    struct timeval tv;
    gettimeofday(&tv,0);
    return tv.tv_sec + 1e-6 * tv.tv_usec;
}

void dumpstats(int force) {
    u64 db = bytes - bytesLast;
    double now = gtod();
    double dt;
    static int first = 1;

    if (timeLast == 0) timeStart = timeLast = now;

    dt = now - timeLast;

    if (!force && db < byteInterval && dt < timeInterval) return;

    if (first) {
	printf("#%7s %7s %7s %7s\n", 
	       "secs",
	       "MB",
	       "MB/sec",
	       "MB/sec");
	first = 0;
    }

    printf("%7.3f %7.3f %7.3f %7.3f\n", 
	   now - timeStart,
	   1e-6 * bytes, 
	   1e-6 * db / dt,
	   1e-6 * bytes / (now-timeStart));
    timeLast = now;
    bytesLast = bytes;
}

void usage() {
    fprintf(stderr,"iorate [-r/w filename] [-d] [-c chunksz][-b byteivl][-t ivl][-l szlim] [-r in] [-w out]\n");
    fprintf(stderr,"-r in or -w out select which file is read or written ('-' for stdin/out)\n");
    fprintf(stderr,"-c chunksz - size of chunks written (KB);\n");
    fprintf(stderr,"-t timeinterval - collect rate each timeinterval seconds;\n");
    fprintf(stderr,"-b byteinterval - collect rate each byteinterval MB;\n");
    fprintf(stderr,"-l limit - total output size limit (MB);\n");
    fprintf(stderr,"-d use O_DIRECT\n");
    fprintf(stderr,"defaults are: '-c 8 -b 20 -t 10 -l 10'\n");
    exit(1);
}

void fatal(char *format, ...) {
    va_list ap;
    va_start(ap,format);

    vfprintf(stderr,format,ap);
    fprintf(stderr,": errno=%d (%s)\n",errno,strerror(errno));
    va_end(ap);
    dumpstats(1);
    exit(1);
}

/* allocate a buffer using mmap to ensure it's page-aligned.  
   O_DIRECT *could* be more strict than that, but probably isn't */
void *myalloc(unsigned size) {
    unsigned s = (size + 4095) & ~4095U;
    void *p = mmap(0, 
                   s, 
                   PROT_READ|PROT_WRITE, 
                   MAP_ANONYMOUS|MAP_PRIVATE, 
                   -1, 0);
    if (p == MAP_FAILED)
        return 0;
    return p;
}

int main(int argc, char *argv[]) {
    unsigned size = 8;
    char *buffer;
    u64 limit = 10;
    char *fnameI = 0;
    char *fnameO = 0;
    int fdI = 0;
    int fdO = 1;
    int doRead = 0;
    int doWrite = 0;
    int doDirect = 0;

    int letter;
    while ((letter = getopt(argc,argv,"r:w:b:c:l:t:d")) != -1) {
	switch(letter) {
	case 'r':
	    fnameI = optarg;
	    doRead = 1;
	    break;
	case 'w':
	    fnameO = optarg;
	    doWrite = 1;
	    break;
	case 'b':
	    byteInterval = atoi(optarg);
	    break;
	case 'c':
	    size = atoi(optarg);
	    break;
	case 'l':
	    limit = atoi(optarg);
	    break;
	case 't':
	    timeInterval = atof(optarg);
	    break;
	case 'd':
	    doDirect = 1;
	    break;
	default:
	    usage();
	}
    }
    
    if (argc != optind) usage();

    byteInterval *= 1e6;
    limit *= 1e6;
    size *= 1024;

    setbuf(stdout, 0);

    fprintf(stderr,"chunk %dK, byteInterval %uM, timeInterval %f, limit %uM\n",
	    size>>10,
	    (unsigned)(byteInterval>>20),
	    timeInterval,
	    (unsigned)(limit>>20));

    if (doRead && fnameI && strcmp(fnameI,"-")) {
	fdI = open(fnameI, O_RDONLY | LF);
	if (fdI == -1) fatal("open(read) failed");
    }
    if (doWrite && fnameO && strcmp(fnameO,"-")) {
	int flags = O_RDWR | O_CREAT | LF;
	if (doDirect) flags |= O_DIRECT;

	fdO = open(fnameO, flags, 0600);
	if (fdO == -1) fatal("open(write) failed");
    }

    buffer = myalloc(size);
    memset(buffer,'m',size);

    timeStart = timeLast = gtod();

    bytes = 0;

    while (bytes < limit) {
	int c = size;

    	dumpstats(0);

	if (doRead) {
	    c = read(fdI,buffer,c);
	    if (c == -1) fatal("read failed");
	}

	if (doWrite) {
	    c = write(fdO,buffer,c);
	    if (c == -1) fatal("write failed");
	}

	bytes += c;

	/* short read/write means EOF. */
	if (c < size) break;
    }

    dumpstats(1);

    return 0;
}

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux