well, vdr w/ the recent cUnbufferedFile changes was flushing the data buffers in huge burst; this was even worse than slowly filling up the caches -- the large (IIRC ~10M) bursts caused latency problems (apps visibly freezing etc). This patch makes vdr use a much more aggressive disk access strategy. Writes are flushed out almost immediately and the IO is more evenly distributed. While recording and/or replaying the caches do not grow and when vdr is done accessing a video file all cached data from that file is dropped. I've tested this w/ both local disks and NFS mounted ones, and it seems to do the right thing. Writes get flushed every 1..2s at a rate of .5..1M/s instead of the >10M bursts. For async mounted NFS servers the writes get collected by the NFS server and normally written out. Local disks get an extra feature -- you can use the HD activity LED as a "recording" indicator :^) As posix_advice requires kernel v2.5.60 and glibc v2.2, you'll need at least those versions to see any difference. (w/o posix_advice you will not get the fdatasyncs every 10M - if somebody really wants them they should be controlled by a config option) Possible further improvements could be: switch from POSIX_FADV_SEQUENTIAL to POSIX_FADV_RANDOM, since we're doing manual readahead anyway (or just leave this as is, and drop the readahead) (not using POSIX_FADV_RANDOM is probably one of the causes of the "leaks", so some of the workarounds could then go too) artur -------------- next part -------------- --- vdr-1.3.36.org/tools.h 2005-11-05 11:54:39.000000000 +0100 +++ vdr-1.3.36/tools.h 2005-11-16 15:51:27.000000000 +0100 @@ -209,6 +209,9 @@ private: off_t end; off_t ahead; ssize_t written; + ssize_t totwritten; + size_t readahead; + size_t pendingreadahead; public: cUnbufferedFile(void); ~cUnbufferedFile(); --- vdr-1.3.36.org/tools.c 2005-11-04 17:33:18.000000000 +0100 +++ vdr-1.3.36/tools.c 2005-11-16 16:06:26.000000000 +0100 @@ -851,8 +851,7 @@ bool cSafeFile::Close(void) // --- cUnbufferedFile ------------------------------------------------------- -#define READ_AHEAD MEGABYTE(2) -#define WRITE_BUFFER MEGABYTE(10) +#define WRITE_BUFFER KILOBYTE(800) cUnbufferedFile::cUnbufferedFile(void) { @@ -869,7 +868,13 @@ int cUnbufferedFile::Open(const char *Fi Close(); fd = open(FileName, Flags, Mode); begin = end = ahead = -1; + readahead = 16*1024; + pendingreadahead = 0; written = 0; + totwritten = 0; + if (fd >= 0) { + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); + } return fd; } @@ -880,10 +885,10 @@ int cUnbufferedFile::Close(void) end = ahead; if (begin >= 0 && end > begin) { //dsyslog("close buffer: %d (flush: %d bytes, %ld-%ld)", fd, written, begin, end); - if (written) + if (0 && written) fdatasync(fd); - posix_fadvise(fd, begin, end - begin, POSIX_FADV_DONTNEED); } + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); begin = end = ahead = -1; written = 0; } @@ -899,35 +904,92 @@ off_t cUnbufferedFile::Seek(off_t Offset return -1; } +// when replaying and going eg FF->PLAY the position jumps back 2..8M +// hence we might not want to drop that data at once. +// Ignoring for now to avoid making this even more complex, but we could +// at least try to handle the common cases +// (PLAY->FF->PLAY, small jumps, moving editing marks etc) + ssize_t cUnbufferedFile::Read(void *Data, size_t Size) { if (fd >= 0) { off_t pos = lseek(fd, 0, SEEK_CUR); - // jump forward - adjust end position - if (pos > end) - end = pos; - // after adjusting end - don't clear more than previously requested - if (end > ahead) - end = ahead; - // jump backward - drop read ahead of previous run - if (pos < begin) - end = ahead; + off_t jumped = end - pos; // nonzero means we're not at the last offset - some kind of jump happened. + if (jumped) { + pendingreadahead += ahead-end+KILOBYTE(64); + // jumped forward? - treat as if we did read all the way to current pos. + if (pos > end) { + end = pos; + // but clamp at ahead so we don't clear more than previously requested. + // (would be mostly harmless anyway, unless we got more than one reader of this file) + // add a little extra readahead, JIC the kernel prefethed more than we requested. + if (end > (ahead+KILOBYTE(512))) + end = ahead+KILOBYTE(512); + } + // jumped backward? - drop both last read _and_ read-ahead + if (pos < begin) + end = ahead+KILOBYTE(512); + // jumped backward, but still inside prev read window? - pretend we read less. + if ((pos >= begin) && (pos < end) ) + end = pos; + } + + ssize_t bytesRead = safe_read(fd, Data, Size); + + // now drop all data accesed during _previous_ Read(). if (begin >= 0 && end > begin) - posix_fadvise(fd, begin - KILOBYTE(200), end - begin + KILOBYTE(200), POSIX_FADV_DONTNEED);//XXX macros/parameters??? + posix_fadvise(fd, begin, end-begin, POSIX_FADV_DONTNEED); + begin = pos; - ssize_t bytesRead = safe_read(fd, Data, Size); if (bytesRead > 0) { pos += bytesRead; - end = pos; // this seems to trigger a non blocking read - this // may or may not have been finished when we will be called next time. // If it is not finished we can't release the not yet filled buffers. // So this is commented out till we find a better solution. - //posix_fadvise(fd, pos, READ_AHEAD, POSIX_FADV_WILLNEED); - ahead = pos + READ_AHEAD; + + // Hmm, it's obviously harmless if we're actually going to read the data + // -- the whole point of read-ahead is to start the IO early... + // The comment above applies only when we jump somewhere else _before_ the + // IO started here finishes. How common would that be? Could be handled eg + // by posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED) called some time after + // we detect a jump. Ignoring this for now. /AS + + // Ugh, it seems to cause some "leaks" at every jump... Either the + // brute force approach mentioned above should work (it's not like this is + // much different than O_DIRECT) or keeping notes about the ahead reads and + // flushing them after some time. the latter seems overkill though, trying + // the former... + + if ( !jumped ) { + if ( readahead <= Size ) // automagically tune readahead size. + readahead = Size*2; + posix_fadvise(fd, pos, readahead, POSIX_FADV_WILLNEED); + ahead = pos + readahead; + } + else { + // flush it all; mostly to get rid of nonflushed readahead + // coming from _previous_ jumps. possibly ratelimit this in + // some way (bytes prefetched? nr of jumps? time?) + + //// first-jump-after-every-60s seemed as good as any :^) + //time_t t = time(NULL); + //if (t >= nextflush) { + // posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); + // nextflush = t+60; + // } + + // the accounting is _very_ unaccurate, i've seen ~50M get flushed + // when the limit was set to 4M. As long as this triggers after + // _some_ jumps we should be ok though. + if (pendingreadahead > MEGABYTE(1)) { + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); + pendingreadahead = 0; + } + ahead = pos; + } } - else - end = pos; + end = pos; return bytesRead; } return -1; @@ -950,12 +1012,18 @@ ssize_t cUnbufferedFile::Write(const voi end = pos + bytesWritten; if (written > WRITE_BUFFER) { //dsyslog("flush buffer: %d (%d bytes, %ld-%ld)", fd, written, begin, end); - fdatasync(fd); - if (begin >= 0 && end > begin) - posix_fadvise(fd, begin, end - begin, POSIX_FADV_DONTNEED); - begin = end = -1; + if (begin >= 0 && end > begin) { + off_t headdrop = max((long)begin,(long)WRITE_BUFFER*2); + posix_fadvise(fd, (begin&~4095)-headdrop, end - begin + headdrop, POSIX_FADV_DONTNEED); + } + begin = end = -1; + totwritten += written; written = 0; } + if (totwritten > MEGABYTE(10)) { + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); + totwritten = 0; + } } return bytesWritten; }