Re: [ext2/ext3] Re-allocation of blocks for an inode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 12, 2009 at 1:59 PM, Sandeep K Sinha
<sandeepksinha@xxxxxxxxx> wrote:
> On Thu, Mar 12, 2009 at 9:43 PM, Greg Freemyer <greg.freemyer@xxxxxxxxx> wrote:
>> On Thu, Mar 12, 2009 at 11:16 AM, Sandeep K Sinha
>> <sandeepksinha@xxxxxxxxx> wrote:
>>> Hi all,
>>>
>>> I am listing above two codes for re-allocating new blocks for a given inode.
>>>
>>> These code snippets are specific to  ext2.
>>> The problem here is that it is performing slower than cp and dd.
>>>
>>> Can anyone help in determining what can be the possible reason(s) ?
>>
>> Excessive use of sync_dirty_buffer();
>>
>> You're calling it for every single block you copy. Pretty sure it is a
>> very aggressive call that forces the data out to the disk immediately,
>> thus the benefits of caching and elevators are lost.  And those are
>> big benefits.
>>
>
> probably the reason being that the ext2 has some issues with sync.
> http://kerneltrap.org/index.php?q=mailarchive/linux-fsdevel/2007/2/14/316836/thread
>
> Hence, if we don't do this we see a difference in the original and
> reallocated file.

Sandeep,

As I read that bug report, the issue would be resolved by doing:

truncate(a)
fsync(a)
allocate_additional_blocks_for_b()
fsync(b)

But for the reporter the issue is that the code manipulating file A is
independent of the code manipulating file B thus it needs to be fixed
in a different way.

Since you have total control of both the files (inodes & data blocks)
in question, you should be able to find a better way than calling
sync_dirty_buffer() for every block you copy.

Have you actually seen data corruption if you leave out the
sync_dirty_buffer() call, or is it a theoretical issue?

Have you tried calling ext2_sync_inode() at the end of the copy loop
instead of sync_dirty_buffer() on every iteration.  I really can't see
why that would be any more dangerous.  Yet it would at least allow the
elevators and caches to work for an entire file at a time.

Greg

>> Greg
>>
>>
>>>
>>> 1. MEMCPY
>>> =======
>>>
>>>
>>>
>>>                src_ind = ext2_iget (sb, inum);
>>>        if (!src_ind) {
>>>                printk (KERN_DEBUG "\n OHSM Source Inode Not Found ");
>>>                goto out_err;
>>>        }
>>>
>>>        /* Getting the ghost inode */
>>>        dest_ind = ext2_new_inode (nd.path.dentry->d_inode, src_ind->i_mode);
>>>        if (IS_ERR(dest_ind)) {
>>>                printk (KERN_ALERT "\n OHSM destination inode unable to allocate.
>>> err = %ld", PTR_ERR(dest_ind));
>>>                goto out_err;
>>>        }
>>>
>>>        /* Locking the source inode using mutex */
>>>        mutex_lock (&src_ind->i_mutex);
>>>
>>>        /* Get the source inode size and number of blocks */
>>>        i_size = i_size_read(src_ind);
>>>
>>>        /* Calculating number of block to allocate for ghost inode */
>>>        nblocks = (i_size >> EXT2_BLOCK_SIZE_BITS(sb)) +
>>>                   ((i_size & (sb->s_blocksize - 1)) ? 1 : 0);
>>>
>>>        memset(&dest_bh, 0, sizeof(struct buffer_head));
>>>        memset(&src_bh, 0, sizeof(struct buffer_head));
>>>        for (done = 0; done < nblocks; done++) {
>>>
>>>             err = ext2_get_block (src_ind, done, &src_bh, 0);
>>>                if (err < 0) {
>>>                        printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err);
>>>                        goto unlock;
>>>                }
>>>
>>>                dest_bh.b_state = 0;
>>>                err = ext2_get_block (dest_ind, done, &dest_bh, 1);
>>>                if (err < 0) {
>>>                        printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err);
>>>                        goto unlock;
>>>                }
>>>
>>>                if (!buffer_mapped(&src_bh)){
>>>                        printk (KERN_DEBUG "\nHOLE ");
>>>                        continue;
>>>                }
>>>
>>>                src_bhptr = sb_bread(sb, src_bh.b_blocknr);
>>>                if (!src_bhptr)
>>>                        goto unlock;
>>>
>>>                dst_bhptr = sb_bread(sb, dest_bh.b_blocknr);
>>>                if (!dst_bhptr)
>>>                        goto unlock;
>>>
>>>                lock_buffer(dst_bhptr);
>>>                memcpy(dst_bhptr->b_data,src_bhptr->b_data,src_bhptr->b_size);
>>>                unlock_buffer(dst_bhptr);
>>>
>>>                mark_buffer_dirty(dst_bhptr);
>>>                sync_dirty_buffer(dst_bhptr);
>>>
>>>                brelse(src_bhptr);
>>>                brelse(dst_bhptr);
>>>
>>>        }
>>>
>>>        for (blk_index = 0; blk_index < 15; blk_index++) {
>>>                if (EXT2_I (dest_ind)->i_data[blk_index]){
>>>                        swap = EXT2_I (src_ind)->i_data[blk_index];
>>>                        EXT2_I (src_ind)->i_data[blk_index] =
>>>                                EXT2_I (dest_ind)->i_data[blk_index];
>>>                        EXT2_I (dest_ind)->i_data[blk_index] = swap;
>>>                }
>>>        }
>>>
>>>        dest_ind->i_blocks = src_ind->i_blocks ;
>>>        mark_inode_dirty (src_ind);
>>>        sync_mapping_buffers(src_ind->i_mapping);
>>>        ohsm_sync_inode(src_ind);
>>>        iput(src_ind);
>>>
>>>        dest_ind->i_nlink = 0;
>>>        iput(dest_ind);
>>>        mutex_unlock (&src_ind->i_mutex);
>>>        goto out_err;
>>>
>>> unlock:
>>>        brelse(src_bhptr);
>>>        brelse(dst_bhptr);
>>>        mutex_unlock (&src_ind->i_mutex);
>>>
>>> out_err:
>>>        path_put (&nd.path);
>>>        return;
>>>
>>>
>>>
>>> 2. PAGEFLIP
>>> ========
>>>
>>>
>>> /* Get the source inode */
>>>
>>>        src_ind = ext2_iget (sb, inum);
>>>        if (!src_ind) {
>>>                printk (KERN_DEBUG "\n OHSM Source Inode Not Found ");
>>>                goto out_err;
>>>        }
>>>
>>>        /* Getting the ghost inode */
>>>        dest_ind = ext2_new_inode (nd.path.dentry->d_inode, src_ind->i_mode);
>>>        if (IS_ERR(dest_ind)) {
>>>                printk (KERN_ALERT "\n destination inode unable to allocate. err =
>>> %ld", PTR_ERR(dest_ind));
>>>                goto out_err;
>>>        }
>>>
>>>        /* Locking the source inode using mutex */
>>>        mutex_lock (&src_ind->i_mutex);
>>>
>>>        /* Get the source inode size and number of blocks */
>>>        i_size = i_size_read(src_ind);
>>>
>>>        /* Calculating number of block to allocate for ghost inode */
>>>        nblocks = (i_size >> EXT2_BLOCK_SIZE_BITS(sb)) +
>>>                   ((i_size & (sb->s_blocksize - 1)) ? 1 : 0);
>>>
>>>
>>>        memset(&dest_bh, 0, sizeof(struct buffer_head));
>>>        memset(&src_bh, 0, sizeof(struct buffer_head));
>>>        for (done = 0; done < nblocks; done++) {
>>>
>>>             err = ext2_get_block (src_ind, done, &src_bh, 0);
>>>                if (err < 0) {
>>>                        printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err);
>>>                        goto unlock;
>>>                }
>>>
>>>                dest_bh.b_state = 0;
>>>                err = ext2_get_block (dest_ind, done, &dest_bh, 1);
>>>                if (err < 0) {
>>>                        printk (KERN_DEBUG "\n error getting blocks ret_val = %d",err);
>>>                        goto unlock;
>>>                }
>>>
>>>                if (!buffer_mapped(&src_bh)){
>>>                        printk (KERN_DEBUG "\nHOLE ");
>>>                        continue;
>>>                }
>>>
>>>                src_bhptr = sb_bread(sb, src_bh.b_blocknr);
>>>                if (!src_bhptr)
>>>                        goto unlock;
>>>
>>>                dst_bhptr = sb_bread(sb, dest_bh.b_blocknr);
>>>                if (!dst_bhptr)
>>>                        goto unlock;
>>>
>>>               lock_buffer(dst_bhptr);
>>>               oldpage = dst_bhptr->b_page;
>>>               olddata = dst_bhptr->b_data;
>>>               dst_bhptr->b_data = src_bhptr->b_data;
>>>               dst_bhptr->b_page = src_bhptr->b_page;
>>>               flush_dcache_page(dst_bhptr->b_page);
>>>               unlock_buffer(dst_bhptr);
>>>
>>>               mark_buffer_dirty(dst_bhptr);
>>>               sync_dirty_buffer(dst_bhptr);
>>>
>>>               if(buffer_uptodate(dst_bhptr)) {
>>>                dst_bhptr->b_page  = oldpage;
>>>                dst_bhptr->b_data = olddata;
>>>               }
>>>
>>>             brelse(src_bhptr);
>>>             brelse(dst_bhptr);
>>>        }
>>>
>>>        for (blk_index = 0; blk_index < 15; blk_index++) {
>>>                if (EXT2_I (dest_ind)->i_data[blk_index]){
>>>                        swap = EXT2_I (src_ind)->i_data[blk_index];
>>>                        EXT2_I (src_ind)->i_data[blk_index] =
>>>                                EXT2_I (dest_ind)->i_data[blk_index];
>>>                        EXT2_I (dest_ind)->i_data[blk_index] = swap;
>>>                }
>>>        }
>>>
>>>        dest_ind->i_blocks = src_ind->i_blocks ;
>>>        mark_inode_dirty (src_ind);
>>>        sync_mapping_buffers(src_ind->i_mapping);
>>>        ohsm_sync_inode(src_ind);
>>>        iput(src_ind);
>>>
>>>        dest_ind->i_nlink = 0;
>>>        iput(dest_ind);
>>>        mutex_unlock (&src_ind->i_mutex);
>>>        goto out_err;
>>>
>>> unlock:
>>>        brelse(src_bhptr);
>>>        brelse(dst_bhptr);
>>>        mutex_unlock (&src_ind->i_mutex);
>>>
>>> out_err:
>>>        path_put (&nd.path);
>>>        return;
>>> }
>>>
>>>
>>>
>>> --
>>> Regards,
>>> Sandeep.
>>>
>>>
>>>
>>>
>>>
>>>
>>> “To learn is to change. Education is a process that changes the learner.”
>>>
>>
>>
>>
>> --
>> Greg Freemyer
>> Litigation Triage Solutions Specialist
>> http://www.linkedin.com/in/gregfreemyer
>> First 99 Days Litigation White Paper -
>> http://www.norcrossgroup.com/forms/whitepapers/99%20Days%20whitepaper.pdf
>>
>> The Norcross Group
>> The Intersection of Evidence & Technology
>> http://www.norcrossgroup.com
>>
>
>
>
> --
> Regards,
> Sandeep.
>
>
>
>
>
>
> “To learn is to change. Education is a process that changes the learner.”
>



-- 
Greg Freemyer
Head of EDD Tape Extraction and Processing team
Litigation Triage Solutions Specialist
http://www.linkedin.com/in/gregfreemyer
First 99 Days Litigation White Paper -
http://www.norcrossgroup.com/forms/whitepapers/99%20Days%20whitepaper.pdf

The Norcross Group
The Intersection of Evidence & Technology
http://www.norcrossgroup.com
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux