On Fri, Mar 13, 2009 at 12:07 AM, Greg Freemyer <greg.freemyer@xxxxxxxxx> wrote: > On Thu, Mar 12, 2009 at 1:59 PM, Sandeep K Sinha > <sandeepksinha@xxxxxxxxx> wrote: >> On Thu, Mar 12, 2009 at 9:43 PM, Greg Freemyer <greg.freemyer@xxxxxxxxx> wrote: >>> On Thu, Mar 12, 2009 at 11:16 AM, Sandeep K Sinha >>> <sandeepksinha@xxxxxxxxx> wrote: >>>> Hi all, >>>> >>>> I am listing above two codes for re-allocating new blocks for a given inode. >>>> >>>> These code snippets are specific to ext2. >>>> The problem here is that it is performing slower than cp and dd. >>>> >>>> Can anyone help in determining what can be the possible reason(s) ? >>> >>> Excessive use of sync_dirty_buffer(); >>> >>> You're calling it for every single block you copy. Pretty sure it is a >>> very aggressive call that forces the data out to the disk immediately, >>> thus the benefits of caching and elevators are lost. And those are >>> big benefits. >>> >> >> probably the reason being that the ext2 has some issues with sync. >> http://kerneltrap.org/index.php?q=mailarchive/linux-fsdevel/2007/2/14/316836/thread >> >> Hence, if we don't do this we see a difference in the original and >> reallocated file. > > Sandeep, > > As I read that bug report, the issue would be resolved by doing: > > truncate(a) > fsync(a) > allocate_additional_blocks_for_b() > fsync(b) > > But for the reporter the issue is that the code manipulating file A is > independent of the code manipulating file B thus it needs to be fixed > in a different way. > > Since you have total control of both the files (inodes & data blocks) > in question, you should be able to find a better way than calling > sync_dirty_buffer() for every block you copy. > We are looking over it and expect to find a better way to do it. > Have you actually seen data corruption if you leave out the > sync_dirty_buffer() call, or is it a theoretical issue? Yes, It has been practically. We can see a data corruption. > > Have you tried calling ext2_sync_inode() at the end of the copy loop > instead of sync_dirty_buffer() on every iteration. I really can't see > why that would be any more dangerous. Yet it would at least allow the > elevators and caches to work for an entire file at a time. > Let me do this for you once again and get back to you. If I can recall, we did try that but were able to see data corruption. Will get back to you soon with the reply. > Greg > >>> Greg >>> >>> >>>> >>>> 1. MEMCPY >>>> ======= >>>> >>>> >>>> >>>> src_ind = ext2_iget (sb, inum); >>>> if (!src_ind) { >>>> printk (KERN_DEBUG "\n OHSM Source Inode Not Found "); >>>> goto out_err; >>>> } >>>> >>>> /* Getting the ghost inode */ >>>> dest_ind = ext2_new_inode (nd.path.dentry->d_inode, src_ind->i_mode); >>>> if (IS_ERR(dest_ind)) { >>>> printk (KERN_ALERT "\n OHSM destination inode unable to allocate. >>>> err = %ld", PTR_ERR(dest_ind)); >>>> goto out_err; >>>> } >>>> >>>> /* Locking the source inode using mutex */ >>>> mutex_lock (&src_ind->i_mutex); >>>> >>>> /* Get the source inode size and number of blocks */ >>>> i_size = i_size_read(src_ind); >>>> >>>> /* Calculating number of block to allocate for ghost inode */ >>>> nblocks = (i_size >> EXT2_BLOCK_SIZE_BITS(sb)) + >>>> ((i_size & (sb->s_blocksize - 1)) ? 1 : 0); >>>> >>>> memset(&dest_bh, 0, sizeof(struct buffer_head)); >>>> memset(&src_bh, 0, sizeof(struct buffer_head)); >>>> for (done = 0; done < nblocks; done++) { >>>> >>>> err = ext2_get_block (src_ind, done, &src_bh, 0); >>>> if (err < 0) { >>>> printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err); >>>> goto unlock; >>>> } >>>> >>>> dest_bh.b_state = 0; >>>> err = ext2_get_block (dest_ind, done, &dest_bh, 1); >>>> if (err < 0) { >>>> printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err); >>>> goto unlock; >>>> } >>>> >>>> if (!buffer_mapped(&src_bh)){ >>>> printk (KERN_DEBUG "\nHOLE "); >>>> continue; >>>> } >>>> >>>> src_bhptr = sb_bread(sb, src_bh.b_blocknr); >>>> if (!src_bhptr) >>>> goto unlock; >>>> >>>> dst_bhptr = sb_bread(sb, dest_bh.b_blocknr); >>>> if (!dst_bhptr) >>>> goto unlock; >>>> >>>> lock_buffer(dst_bhptr); >>>> memcpy(dst_bhptr->b_data,src_bhptr->b_data,src_bhptr->b_size); >>>> unlock_buffer(dst_bhptr); >>>> >>>> mark_buffer_dirty(dst_bhptr); >>>> sync_dirty_buffer(dst_bhptr); >>>> >>>> brelse(src_bhptr); >>>> brelse(dst_bhptr); >>>> >>>> } >>>> >>>> for (blk_index = 0; blk_index < 15; blk_index++) { >>>> if (EXT2_I (dest_ind)->i_data[blk_index]){ >>>> swap = EXT2_I (src_ind)->i_data[blk_index]; >>>> EXT2_I (src_ind)->i_data[blk_index] = >>>> EXT2_I (dest_ind)->i_data[blk_index]; >>>> EXT2_I (dest_ind)->i_data[blk_index] = swap; >>>> } >>>> } >>>> >>>> dest_ind->i_blocks = src_ind->i_blocks ; >>>> mark_inode_dirty (src_ind); >>>> sync_mapping_buffers(src_ind->i_mapping); >>>> ohsm_sync_inode(src_ind); >>>> iput(src_ind); >>>> >>>> dest_ind->i_nlink = 0; >>>> iput(dest_ind); >>>> mutex_unlock (&src_ind->i_mutex); >>>> goto out_err; >>>> >>>> unlock: >>>> brelse(src_bhptr); >>>> brelse(dst_bhptr); >>>> mutex_unlock (&src_ind->i_mutex); >>>> >>>> out_err: >>>> path_put (&nd.path); >>>> return; >>>> >>>> >>>> >>>> 2. PAGEFLIP >>>> ======== >>>> >>>> >>>> /* Get the source inode */ >>>> >>>> src_ind = ext2_iget (sb, inum); >>>> if (!src_ind) { >>>> printk (KERN_DEBUG "\n OHSM Source Inode Not Found "); >>>> goto out_err; >>>> } >>>> >>>> /* Getting the ghost inode */ >>>> dest_ind = ext2_new_inode (nd.path.dentry->d_inode, src_ind->i_mode); >>>> if (IS_ERR(dest_ind)) { >>>> printk (KERN_ALERT "\n destination inode unable to allocate. err = >>>> %ld", PTR_ERR(dest_ind)); >>>> goto out_err; >>>> } >>>> >>>> /* Locking the source inode using mutex */ >>>> mutex_lock (&src_ind->i_mutex); >>>> >>>> /* Get the source inode size and number of blocks */ >>>> i_size = i_size_read(src_ind); >>>> >>>> /* Calculating number of block to allocate for ghost inode */ >>>> nblocks = (i_size >> EXT2_BLOCK_SIZE_BITS(sb)) + >>>> ((i_size & (sb->s_blocksize - 1)) ? 1 : 0); >>>> >>>> >>>> memset(&dest_bh, 0, sizeof(struct buffer_head)); >>>> memset(&src_bh, 0, sizeof(struct buffer_head)); >>>> for (done = 0; done < nblocks; done++) { >>>> >>>> err = ext2_get_block (src_ind, done, &src_bh, 0); >>>> if (err < 0) { >>>> printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err); >>>> goto unlock; >>>> } >>>> >>>> dest_bh.b_state = 0; >>>> err = ext2_get_block (dest_ind, done, &dest_bh, 1); >>>> if (err < 0) { >>>> printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err); >>>> goto unlock; >>>> } >>>> >>>> if (!buffer_mapped(&src_bh)){ >>>> printk (KERN_DEBUG "\nHOLE "); >>>> continue; >>>> } >>>> >>>> src_bhptr = sb_bread(sb, src_bh.b_blocknr); >>>> if (!src_bhptr) >>>> goto unlock; >>>> >>>> dst_bhptr = sb_bread(sb, dest_bh.b_blocknr); >>>> if (!dst_bhptr) >>>> goto unlock; >>>> >>>> lock_buffer(dst_bhptr); >>>> oldpage = dst_bhptr->b_page; >>>> olddata = dst_bhptr->b_data; >>>> dst_bhptr->b_data = src_bhptr->b_data; >>>> dst_bhptr->b_page = src_bhptr->b_page; >>>> flush_dcache_page(dst_bhptr->b_page); >>>> unlock_buffer(dst_bhptr); >>>> >>>> mark_buffer_dirty(dst_bhptr); >>>> sync_dirty_buffer(dst_bhptr); >>>> >>>> if(buffer_uptodate(dst_bhptr)) { >>>> dst_bhptr->b_page = oldpage; >>>> dst_bhptr->b_data = olddata; >>>> } >>>> >>>> brelse(src_bhptr); >>>> brelse(dst_bhptr); >>>> } >>>> >>>> for (blk_index = 0; blk_index < 15; blk_index++) { >>>> if (EXT2_I (dest_ind)->i_data[blk_index]){ >>>> swap = EXT2_I (src_ind)->i_data[blk_index]; >>>> EXT2_I (src_ind)->i_data[blk_index] = >>>> EXT2_I (dest_ind)->i_data[blk_index]; >>>> EXT2_I (dest_ind)->i_data[blk_index] = swap; >>>> } >>>> } >>>> >>>> dest_ind->i_blocks = src_ind->i_blocks ; >>>> mark_inode_dirty (src_ind); >>>> sync_mapping_buffers(src_ind->i_mapping); >>>> ohsm_sync_inode(src_ind); >>>> iput(src_ind); >>>> >>>> dest_ind->i_nlink = 0; >>>> iput(dest_ind); >>>> mutex_unlock (&src_ind->i_mutex); >>>> goto out_err; >>>> >>>> unlock: >>>> brelse(src_bhptr); >>>> brelse(dst_bhptr); >>>> mutex_unlock (&src_ind->i_mutex); >>>> >>>> out_err: >>>> path_put (&nd.path); >>>> return; >>>> } >>>> >>>> >>>> >>>> -- >>>> Regards, >>>> Sandeep. >>>> >>>> >>>> >>>> >>>> >>>> >>>> “To learn is to change. Education is a process that changes the learner.” >>>> >>> >>> >>> >>> -- >>> Greg Freemyer >>> Litigation Triage Solutions Specialist >>> http://www.linkedin.com/in/gregfreemyer >>> First 99 Days Litigation White Paper - >>> http://www.norcrossgroup.com/forms/whitepapers/99%20Days%20whitepaper.pdf >>> >>> The Norcross Group >>> The Intersection of Evidence & Technology >>> http://www.norcrossgroup.com >>> >> >> >> >> -- >> Regards, >> Sandeep. >> >> >> >> >> >> >> “To learn is to change. Education is a process that changes the learner.” >> > > > > -- > Greg Freemyer > Head of EDD Tape Extraction and Processing team > Litigation Triage Solutions Specialist > http://www.linkedin.com/in/gregfreemyer > First 99 Days Litigation White Paper - > http://www.norcrossgroup.com/forms/whitepapers/99%20Days%20whitepaper.pdf > > The Norcross Group > The Intersection of Evidence & Technology > http://www.norcrossgroup.com > -- Regards, Sandeep. “To learn is to change. Education is a process that changes the learner.” -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html