Hi Jeff, Attached the sample program. Should be able to compile as-is. Let me know your findings or need additional info. Thanks, Qiuyang -----Original Message----- From: Jeff Moyer [mailto:jmoyer@xxxxxxxxxx] Sent: Tuesday, March 03, 2009 10:10 AM To: Qiuyang Wu Cc: linux-nfs@xxxxxxxxxxxxxxx; linux-fsdevel@xxxxxxxxxxxxxxx Subject: Re: pread/pwrite bug on linux? Qiuyang Wu <Qiuyang.Wu@xxxxxxxxxxxx> writes: > Hi All, > > Are there any known bugs with pread/pwrite on RHEL Linux when the disk > partition is close to full? > > Reduced a complex application fatal down to a simple program just > opens a regular file and performs 1M sequential pwrite() of size 8KB > blocks; at every 100th write, does a pread() to load the very first > 8KB block and validate its content still matching what was originally > written. Could you attach your test program, please? > BEHAVIOR: pwrite() works for many iterations, then pread() suddenly > returns data of the requested size but filled with 0's, and strerror > shows errno="No such file or directory", condition > if(size_to_read != pread(fd, buf, size_to_read, ...)) ... > is not triggered and application has to check errno immediately after > every call to pread. The value of errno is undefined if the pread call did not return -1. Cheers, Jeff
/****************************************************************************** ABSTRACT: Simple program to test behavior of pwrite/pread calls under the condition of insufficient disk space. Expect to trigger error when pushing data to disk with pwrite, instead of failing on pread, which implies data lose. The program does the following - - pwrite up to 1M size 8KB chunks (total ~8GB if finishes) - Very first 8KB chunk is specially filled with int value 13. - at every 100th write, does pread to load the first 8KB chunk and validate (part of) its content. - before and after each pread, also use fstat to sannity check program is in sync with the file. To compile with GCC (e.g. gcc-4.2.2) - >gcc io_test.c To run: The program requires one arg which is the path to a directory for creating a regular file for write/read testing. Example >a.out /path/to/writable/dir To demonstrate the problem Choose a disk partition that is almost full, for example, the following with ~1GB available space - > cd /remote/testdsk/qwu > df -k . Filesystem 1K-blocks Used Available Use% Mounted on emcns202:/testdsk 318033408 316740544 1292864 100% /remote/testdsk > a.out /remote/testdsk/qwu HISTORY: Feb 24 2009 - Qiuyang Wu : Created. ******************************************************************************/ #include <errno.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <fcntl.h> #include <string.h> #include <sys/stat.h> #define FILE_NAME "rw_test_file" #define TOTAL_WRITE 1000000 // total number of pwrites #define CHUNK_SIZE 8192 // buf size per write #define CMP_SIZE 10 // compare the first 10 bytes #define READ_INTERV 100 // read at every 100 write char* check_args(char *dir); int pwrite_pread_test(char *fname); void stat_file(char* op, int fd, size_t size, off_t offset); int check_pread(int fd, char* fname, char* buf, char* refbuf); int main(int argc, char* *argv) { char *full_fname=NULL; if(argc != 2) { printf("Usage: %s dir_path \n", argv[0]); exit(1); } if((full_fname = check_args(argv[1]))) { pwrite_pread_test(full_fname); free(full_fname); } exit(0); } int pwrite_pread_test(char *fname) { int ok=1; int fd=-1; int w_count=0; off_t cur_offset=0; char *buf=NULL; char *refbuf=NULL; fd = open(fname, (O_CREAT|O_RDWR), (S_IRUSR|S_IWUSR) ) ; if(fd==-1) { printf("Error opening '%s' for writing.\n", fname); exit(-1); } buf = malloc(CHUNK_SIZE); refbuf = malloc(CHUNK_SIZE); memset(buf, 13, CHUNK_SIZE); memset(refbuf, 13, CHUNK_SIZE); while(w_count<TOTAL_WRITE) { ++w_count; if(CHUNK_SIZE != pwrite(fd, buf, CHUNK_SIZE, cur_offset)) { /* Just print errno and bail out, a filled disk is expected at some point */ printf("Error (%s) writing to file '%s'.\n", strerror(errno), fname); ok = 0; break; } if(0==(w_count%READ_INTERV)) { printf("writes=%9d ::", w_count); stat_file("after-PWRITE", fd, CHUNK_SIZE, cur_offset); ok = check_pread(fd, fname, buf, refbuf); if(!ok) break; } cur_offset += CHUNK_SIZE; // increment cur_offset memset(buf, (int)(w_count%10), CHUNK_SIZE); // fill buf with some data for next pwrite } free(buf); free(refbuf); close(fd); return ok; } int check_pread(int fd, char* fname, char* buf, char* refbuf) { int ok=1; printf("read/check first chunk ...\n"); memset(buf, -1, CHUNK_SIZE); //prest the buf to sentinel values if(CHUNK_SIZE != pread(fd, buf, CHUNK_SIZE, 0)) { /* Problem bailing out on pread, prints errno */ printf("Error (%s) reading from file '%s'.\n", strerror(errno), fname); return 0; } else { stat_file("before-PREAD", fd, CHUNK_SIZE, 0); if(memcmp(buf, refbuf, CMP_SIZE)) { /* ** key problem: pread appears successful but buf contain corrupted data */ int i=0; printf("... data BAD! (errno=%s)\n", strerror(errno)); for(i=0; i<CMP_SIZE; ++i) { printf("buf[%d]=%d ref[%d]=%d\n", i, buf[i], i, refbuf[i]); } stat_file("after-PREAD", fd, CHUNK_SIZE, 0); ok = 0; } else { printf("... data OK!.\n"); } } fflush(stdout); return ok; } void stat_file(char* op, int fd, size_t size, off_t offset) { struct stat stats; if ( -1 == fstat(fd, &stats) ) { printf("%s::fstat failed!\n", op); } else { printf("\t%s:: fd=%d, fstat.sz=%ld, off/sz=%ld/%ld => %ld!\n", op, (int)fd, (long)stats.st_size, (long)offset, (long)size, (long)offset+(long)size); } fflush(stdout); } char* check_args(char *dir) { int ok=1; int len=0; struct stat statbuf; char *full_fname=NULL; if(stat(dir, &statbuf)==-1) { perror("stat call failed!\n"); ok = 0; } else { ok = (S_ISDIR(statbuf.st_mode) && 0==access(dir, W_OK)); } if(!ok) { printf("Error: directory '%s' not accessible\n", dir); return NULL; } len = strlen(dir) + strlen(FILE_NAME) + 2; full_fname = malloc(len); memset(full_fname, 0, len); sprintf(full_fname, "%s/%s", dir, FILE_NAME); if(access(full_fname, F_OK)==0) { printf("Error: file '%s' already exists.\n", full_fname); free(full_fname); return NULL; } return full_fname; }