RE: pread/pwrite bug on linux?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Jeff,

Attached the sample program. Should be able to compile as-is.

Let me know your findings or need additional info.

Thanks,
Qiuyang

-----Original Message-----
From: Jeff Moyer [mailto:jmoyer@xxxxxxxxxx]
Sent: Tuesday, March 03, 2009 10:10 AM
To: Qiuyang Wu
Cc: linux-nfs@xxxxxxxxxxxxxxx; linux-fsdevel@xxxxxxxxxxxxxxx
Subject: Re: pread/pwrite bug on linux?

Qiuyang Wu <Qiuyang.Wu@xxxxxxxxxxxx> writes:

> Hi All,
>
> Are there any known bugs with pread/pwrite on RHEL Linux when the disk
> partition is close to full?
>
> Reduced a complex application fatal down to a simple program just
> opens a regular file and performs 1M sequential pwrite() of size 8KB
> blocks; at every 100th write, does a pread() to load the very first
> 8KB block and validate its content still matching what was originally
> written.

Could you attach your test program, please?

> BEHAVIOR: pwrite() works for many iterations, then pread() suddenly
> returns data of the requested size but filled with 0's, and strerror
> shows errno="No such file or directory", condition
>       if(size_to_read != pread(fd, buf, size_to_read, ...)) ...
> is not triggered and application has to check errno immediately after
> every call to pread.

The value of errno is undefined if the pread call did not return -1.

Cheers,
Jeff
/******************************************************************************

  ABSTRACT:   Simple program to test behavior of pwrite/pread calls under
              the condition of insufficient disk space. Expect to trigger
              error when pushing data to disk with pwrite, instead of failing
              on pread, which implies data lose.

              The program does the following -
                - pwrite up to 1M size 8KB chunks (total ~8GB if finishes)
                - Very first 8KB chunk is specially filled with int value 13.
                - at every 100th write, does pread to load the first 8KB
                  chunk and validate (part of) its content.
                - before and after each pread, also use fstat to sannity
                  check program is in sync with the file.     

              To compile with GCC (e.g. gcc-4.2.2) -
                >gcc io_test.c

              To run:
                The program requires one arg which is the path to a directory
                for creating a regular file for write/read testing. Example                   
              
                >a.out /path/to/writable/dir

              To demonstrate the problem
                 Choose a disk partition that is almost full, for example,
                 the following with ~1GB available space -

                 > cd /remote/testdsk/qwu
                 > df -k .
                Filesystem           1K-blocks      Used Available Use% Mounted on
                emcns202:/testdsk    318033408 316740544   1292864 100% /remote/testdsk
                 > a.out /remote/testdsk/qwu


  HISTORY:	
  Feb 24 2009 - Qiuyang Wu : Created.

******************************************************************************/

#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>

#define FILE_NAME   "rw_test_file"
#define TOTAL_WRITE 1000000 // total number of pwrites
#define CHUNK_SIZE  8192    // buf size per write
#define CMP_SIZE    10      // compare the first 10 bytes
#define READ_INTERV 100     // read at every 100 write

char* check_args(char *dir);
int   pwrite_pread_test(char *fname);
void  stat_file(char* op, int fd, size_t size, off_t offset);
int   check_pread(int fd, char* fname, char* buf, char* refbuf);

int main(int argc, char* *argv)
{
  char *full_fname=NULL;
  
  if(argc != 2) {
    printf("Usage: %s dir_path \n", argv[0]);
    exit(1);
  }

  if((full_fname = check_args(argv[1]))) {
    pwrite_pread_test(full_fname);
    free(full_fname);
  }

  exit(0);
}

int pwrite_pread_test(char *fname) {
  int   ok=1;
  int   fd=-1;
  int   w_count=0;
  off_t cur_offset=0;
  char  *buf=NULL;
  char  *refbuf=NULL;  
  
  fd = open(fname, (O_CREAT|O_RDWR), (S_IRUSR|S_IWUSR) ) ;

  if(fd==-1) {
    printf("Error opening '%s' for writing.\n", fname);
    exit(-1);
  }

  buf = malloc(CHUNK_SIZE);
  refbuf = malloc(CHUNK_SIZE);
  memset(buf, 13, CHUNK_SIZE);
  memset(refbuf, 13, CHUNK_SIZE);

  while(w_count<TOTAL_WRITE) {
    ++w_count;
    if(CHUNK_SIZE != pwrite(fd, buf, CHUNK_SIZE, cur_offset)) {
      /* Just  print errno and bail out, a filled disk is expected at some point */
      printf("Error (%s) writing to file '%s'.\n", strerror(errno), fname);
      ok = 0;
      break;  
    }
    
    if(0==(w_count%READ_INTERV)) {
      printf("writes=%9d ::", w_count);
      stat_file("after-PWRITE", fd, CHUNK_SIZE, cur_offset);
      ok = check_pread(fd, fname, buf, refbuf);
      if(!ok) break;
    }
    
    cur_offset += CHUNK_SIZE; // increment cur_offset
    memset(buf, (int)(w_count%10), CHUNK_SIZE); // fill buf with some data for next pwrite
  }
  
  free(buf);
  free(refbuf);
  close(fd);
  
  return ok;
}

int check_pread(int fd, char* fname, char* buf, char* refbuf)
{
  int ok=1;
  
  printf("read/check first chunk ...\n");        
  
  memset(buf, -1, CHUNK_SIZE); //prest the buf to sentinel values
  
  if(CHUNK_SIZE != pread(fd, buf, CHUNK_SIZE, 0)) {
    /* Problem bailing out on pread, prints errno */
    printf("Error (%s) reading from file '%s'.\n", strerror(errno), fname);
    return 0;
  } else {
    stat_file("before-PREAD", fd, CHUNK_SIZE, 0);
    if(memcmp(buf, refbuf, CMP_SIZE)) {
      /*
      ** key problem: pread appears successful but buf contain corrupted data
      */
      int i=0;
      printf("... data BAD! (errno=%s)\n", strerror(errno));
      for(i=0; i<CMP_SIZE; ++i) {
        printf("buf[%d]=%d ref[%d]=%d\n", i, buf[i], i, refbuf[i]);              
      }
      stat_file("after-PREAD", fd, CHUNK_SIZE, 0);
      ok = 0;
    } else {
      printf("... data OK!.\n");                 
    }
  }
  fflush(stdout);
  return ok;
}

void stat_file(char* op, int fd, size_t size, off_t offset)
{  
  struct stat stats;
  
  if ( -1 == fstat(fd, &stats) ) {
    printf("%s::fstat failed!\n", op);
  } else {
    printf("\t%s:: fd=%d, fstat.sz=%ld, off/sz=%ld/%ld => %ld!\n",
           op, (int)fd, (long)stats.st_size,
           (long)offset, (long)size, (long)offset+(long)size);
  }
  fflush(stdout);
}

char* check_args(char *dir) {
  int ok=1;
  int len=0;
  struct stat statbuf;
  char *full_fname=NULL;
  
  if(stat(dir, &statbuf)==-1) {
    perror("stat call failed!\n");
    ok = 0;
  } else {
    ok = (S_ISDIR(statbuf.st_mode) && 0==access(dir, W_OK));
  }

  if(!ok) {
    printf("Error: directory '%s' not accessible\n", dir);
    return NULL;
  }
  
  len = strlen(dir) + strlen(FILE_NAME) + 2;
  full_fname = malloc(len);
  memset(full_fname, 0, len);
  sprintf(full_fname, "%s/%s", dir, FILE_NAME);

  if(access(full_fname, F_OK)==0) {
    printf("Error: file '%s' already exists.\n", full_fname);
    free(full_fname);
    return NULL;
  }

  return full_fname;  
}


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux