Klaus Schmidinger wrote: > Do you really think that making Seek() inline actually makes things > faster? Can you show some hard numbers? no, i missed the change in tools.c. merged your changes w/ my current tree. Whitespace was the worst part, other than that the only diffs now are: - privatized FadviseDrop - a few more comments, hopefully making things more obvious - spelling fixes - simplified the readahead calculation - removed unnecessary Write() alignment handling - removed the readahead-grows-write-window hack; if necessary this should be done differently. patch vs .41 + vdr-1.3.41-fadvise.diff attached. artur -------------- next part -------------- --- ../vdr-1.3.41-klaus1/tools.c 2006-02-04 17:58:16.000000000 +0100 +++ tools.c 2006-02-04 20:00:21.000000000 +0100 @@ -1133,6 +1133,7 @@ #ifdef USE_FADVISE off_t jumped = curpos-lastpos; // nonzero means we're not at the last offset if ((cachedstart < cachedend) && (curpos < cachedstart || curpos > cachedend)) { + // current position is outside the cached window -- invalidate it. FadviseDrop(cachedstart, cachedend-cachedstart); cachedstart = curpos; cachedend = curpos; @@ -1151,7 +1152,7 @@ // Trigger the readahead IO, but only if we've used at least // 1/2 of the previously requested area. This avoids calling // fadvise() after every read() call. - if (ahead - curpos < (off_t)(readahead - readahead / 2)) {//XXX CLARIFY: isn't this just 'readahead / 2'??? + if (ahead - curpos < (off_t)(readahead / 2)) { posix_fadvise(fd, curpos, readahead, POSIX_FADV_WILLNEED); ahead = curpos + readahead; cachedend = max(cachedend, ahead); @@ -1161,15 +1162,17 @@ } } else - ahead = curpos; // jumped -> we really don't want any readahead. otherwise eg fast-rewind gets in trouble. + ahead = curpos; // jumped -> we really don't want any readahead. otherwise e.g. fast-rewind gets in trouble. } if (cachedstart < cachedend) { if (curpos - cachedstart > READCHUNK * 2) { + // current position has moved forward enough, shrink tail window. FadviseDrop(cachedstart, curpos - READCHUNK - cachedstart); cachedstart = curpos - READCHUNK; } else if (cachedend > ahead && cachedend - curpos > READCHUNK * 2) { + // current position has moved back enough, shrink head window. FadviseDrop(curpos + READCHUNK, cachedend - curpos + READCHUNK); cachedend = curpos + READCHUNK; } @@ -1193,21 +1196,34 @@ lastpos = max(lastpos, curpos); if (written > WRITE_BUFFER) { if (lastpos > begin) { + // Now do three things: + // 1) Start writeback of begin..lastpos range + // 2) Drop the already written range (by the previous fadvise call) + // 3) Handle nonpagealigned data. + // This is why we double the WRITE_BUFFER; the first time around the + // last (partial) page might be skipped, writeback will start only after + // second call; the third call will still include this page and finally + // drop it from cache. off_t headdrop = min(begin, WRITE_BUFFER * 2L); posix_fadvise(fd, begin - headdrop, lastpos - begin + headdrop, POSIX_FADV_DONTNEED); } - begin = lastpos = max(0L, curpos - (KILOBYTE(4) - 1)); + begin = lastpos = curpos; totwritten += written; written = 0; // The above fadvise() works when writing slowly (recording), but could - // leave cached data around when writing at a high rate (cutting). - // Also, it seems in some setups, the above does not trigger any I/O and - // the fdatasync() call below has to do all the work (reiserfs with some - // kind of write gathering enabled). - // We add 'readahead' to the threshold in an attempt to increase cutting - // speed; it's a tradeoff -- speed vs RAM-used. - if (totwritten > MEGABYTE(32) + readahead) { - //fdatasync(fd);//XXX CLARIFY: so what, is this necessary or not??? + // leave cached data around when writing at a high rate, e.g. when cutting, + // because by the time we try to flush the cached pages (above) the data + // can still be dirty - we are faster than the disk I/O. + // So we do another round of flushing, just like above, but at larger + // intervals -- this should catch any pages that couldn't be released + // earlier. + + if (totwritten > MEGABYTE(32)) { + // It seems in some setups, fadvise() does not trigger any I/O and + // a fdatasync() call would be required do all the work (reiserfs with some + // kind of write gathering enabled), but the syncs cause (io) load.. + // Uncomment the next line if you think you need them. + //fdatasync(fd); off_t headdrop = min(curpos - totwritten, totwritten * 2L); posix_fadvise(fd, curpos - totwritten - headdrop, totwritten + headdrop, POSIX_FADV_DONTNEED); totwritten = 0; --- ../vdr-1.3.41-klaus1/tools.h 2006-02-04 17:58:16.000000000 +0100 +++ tools.h 2006-02-04 20:01:20.000000000 +0100 @@ -251,13 +251,13 @@ size_t readahead; size_t written; size_t totwritten; + int FadviseDrop(off_t Offset, off_t Len); public: cUnbufferedFile(void); ~cUnbufferedFile(); int Open(const char *FileName, int Flags, mode_t Mode = DEFFILEMODE); int Close(void); void SetReadAhead(size_t ra); - int FadviseDrop(off_t Offset, off_t Len); off_t Seek(off_t Offset, int Whence); ssize_t Read(void *Data, size_t Size); ssize_t Write(const void *Data, size_t Size); --- ../vdr-1.3.41-klaus1/cutter.c 2006-02-04 17:58:16.000000000 +0100 +++ cutter.c 2006-01-30 06:57:07.000000000 +0100 @@ -125,7 +125,6 @@ error = "toFile 1"; break; } - toFile->SetReadAhead(MEGABYTE(20)); FileSize = 0; } LastIFrame = 0; @@ -166,7 +165,6 @@ error = "toFile 2"; break; } - toFile->SetReadAhead(MEGABYTE(20)); FileSize = 0; } }