Re: Question on fallocate/ftruncate sequence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sorry for taking so long to finish this. Here is the new patch based on
Andreas's suggestions. Now the patch clears the EXT4_EOFBLOCKS_FL
flag when we allocate beyond the maximum allocated block. I also
made the EOFBLOCKS flag user visible and added the handling
in ext4_ioctl as Andrea suggested.

Index: linux-2.6.30.5/fs/ext4/inode.c
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/inode.c    2009-08-31 12:08:10.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/inode.c    2009-09-23 21:42:33.000000000 -0700
@@ -3973,6 +3973,8 @@ void ext4_truncate(struct inode *inode)
     if (!ext4_can_truncate(inode))
         return;

+    inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
+
     if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
         ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;

@@ -4285,8 +4287,8 @@ void ext4_get_inode_flags(struct ext4_in
 {
     unsigned int flags = ei->vfs_inode.i_flags;

-    ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
-            EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
+    ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|EXT4_IMMUTABLE_FL|
+            EXT4_NOATIME_FL|EXT4_DIRSYNC_FL|EXT4_EOFBLOCKS_FL);
     if (flags & S_SYNC)
         ei->i_flags |= EXT4_SYNC_FL;
     if (flags & S_APPEND)
@@ -4297,6 +4299,8 @@ void ext4_get_inode_flags(struct ext4_in
         ei->i_flags |= EXT4_NOATIME_FL;
     if (flags & S_DIRSYNC)
         ei->i_flags |= EXT4_DIRSYNC_FL;
+    if (flags & FS_EOFBLOCKS_FL)
+        ei->i_flags |= EXT4_EOFBLOCKS_FL;
 }
 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
                     struct ext4_inode_info *ei)
@@ -4807,7 +4811,9 @@ int ext4_setattr(struct dentry *dentry,
     }

     if (S_ISREG(inode->i_mode) &&
-        attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+        attr->ia_valid & ATTR_SIZE &&
+        (attr->ia_size < inode->i_size ||
+         (inode->i_flags & EXT4_EOFBLOCKS_FL))) {
         handle_t *handle;

         handle = ext4_journal_start(inode, 3);
@@ -4838,6 +4844,11 @@ int ext4_setattr(struct dentry *dentry,
                 goto err_out;
             }
         }
+        if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
+            rc = vmtruncate(inode, attr->ia_size);
+            if (rc)
+                goto err_out;
+        }
     }

     rc = inode_setattr(inode, attr);
Index: linux-2.6.30.5/include/linux/fs.h
===================================================================
--- linux-2.6.30.5.orig/include/linux/fs.h    2009-08-31
12:08:10.000000000 -0700
+++ linux-2.6.30.5/include/linux/fs.h    2009-09-10 21:27:30.000000000 -0700
@@ -343,9 +343,10 @@ struct inodes_stat_t {
 #define FS_TOPDIR_FL            0x00020000 /* Top of directory hierarchies*/
 #define FS_EXTENT_FL            0x00080000 /* Extents */
 #define FS_DIRECTIO_FL            0x00100000 /* Use direct i/o */
+#define FS_EOFBLOCKS_FL            0x00200000 /* Blocks allocated beyond EOF */
 #define FS_RESERVED_FL            0x80000000 /* reserved for ext2 lib */

-#define FS_FL_USER_VISIBLE        0x0003DFFF /* User visible flags */
+#define FS_FL_USER_VISIBLE        0x0023DFFF /* User visible flags */
 #define FS_FL_USER_MODIFIABLE        0x000380FF /* User modifiable flags */


Index: linux-2.6.30.5/fs/ext4/ext4.h
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/ext4.h    2009-08-31 12:08:10.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/ext4.h    2009-09-10 21:28:14.000000000 -0700
@@ -235,9 +235,10 @@ struct flex_groups {
 #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
 #define EXT4_EXTENTS_FL            0x00080000 /* Inode uses extents */
 #define EXT4_EXT_MIGRATE        0x00100000 /* Inode is migrating */
+#define EXT4_EOFBLOCKS_FL        0x00200000 /* Blocks allocated
beyond EOF (bit reserved in fs.h) */
 #define EXT4_RESERVED_FL        0x80000000 /* reserved for ext4 lib */

-#define EXT4_FL_USER_VISIBLE        0x000BDFFF /* User visible flags */
+#define EXT4_FL_USER_VISIBLE        0x002BDFFF /* User visible flags */
 #define EXT4_FL_USER_MODIFIABLE        0x000B80FF /* User modifiable flags */

 /* Flags that should be inherited by new inodes from their parent. */
Index: linux-2.6.30.5/fs/ext4/extents.c
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/extents.c    2009-09-01 18:14:58.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/extents.c    2009-09-23 22:12:22.000000000 -0700
@@ -2788,7 +2788,7 @@ int ext4_ext_get_blocks(handle_t *handle
 {
     struct ext4_ext_path *path = NULL;
     struct ext4_extent_header *eh;
-    struct ext4_extent newex, *ex;
+    struct ext4_extent newex, *ex, *last_ex;
     ext4_fsblk_t newblock;
     int err = 0, depth, ret, cache_type;
     unsigned int allocated = 0;
@@ -2968,6 +2968,14 @@ int ext4_ext_get_blocks(handle_t *handle
     newex.ee_len = cpu_to_le16(ar.len);
     if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
         ext4_ext_mark_uninitialized(&newex);
+
+    if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
+        BUG_ON(!eh->eh_entries);
+        last_ex = EXT_LAST_EXTENT(eh);
+        if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+                    + ext4_ext_get_actual_len(last_ex))
+            inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
+    }
     err = ext4_ext_insert_extent(handle, inode, path, &newex);
     if (err) {
         /* free data blocks we just allocated */
@@ -3095,6 +3103,13 @@ static void ext4_falloc_update_inode(str
             i_size_write(inode, new_size);
         if (new_size > EXT4_I(inode)->i_disksize)
             ext4_update_i_disksize(inode, new_size);
+    } else {
+        /*
+         * Mark that we allocate beyond EOF so the subsequent truncate
+         * can proceed even if the new size is the same as i_size.
+         */
+        if (new_size > i_size_read(inode))
+            inode->i_flags |= EXT4_EOFBLOCKS_FL;
     }
 }

Index: linux-2.6.30.5/fs/ext4/ioctl.c
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/ioctl.c    2009-08-16 14:19:38.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/ioctl.c    2009-09-23 22:04:47.000000000 -0700
@@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsig
             flags &= ~EXT4_EXTENTS_FL;
         }

+        if (flags & EXT4_EOFBLOCKS_FL) {
+            /* we don't support adding EOFBLOCKS flag */
+            if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+                err = -EOPNOTSUPP;
+                goto flags_out;
+            }
+        } else if (oldflags & EXT4_EOFBLOCKS_FL)
+            /* free the space reserved with fallocate KEEPSIZE */
+            vmtruncate(inode, inode->i_size);
+
         handle = ext4_journal_start(inode, 1);
         if (IS_ERR(handle)) {
             err = PTR_ERR(handle);


Jiaying

On Wed, Sep 2, 2009 at 10:20 PM, Jiaying Zhang <jiayingz@xxxxxxxxxx> wrote:
>
> On Wed, Sep 2, 2009 at 1:41 AM, Andreas Dilger<adilger@xxxxxxx> wrote:
> > On Aug 31, 2009  16:33 -0700, Jiaying Zhang wrote:
> >> > EXT4_KEEPSIZE_FL should only be cleared if there were writes to
> >> > the end of the fallocated space.  In that regard, I think the name
> >> > of this flag should be changed to something like "EXT4_EOFBLOCKS_FL"
> >> > to indicate that blocks are allocated beyond the end of file (i_size).
> >>
> >> Thanks for catching this! I changed the patch to only clear the flag
> >> when the new_size is larger than i_size and changed the flag name
> >> as you suggested. It would be nice if we only clear the flag when we
> >> write beyond the fallocated space, but this seems hard to detect
> >> because we no longer have the allocated size once that keepsize
> >> fallocate call returns.
> >
> > The problem is that if e2fsck depends on the EXT4_EOFBLOCKS_FL set
> > for fallocate-beyond-EOF then it is worse to clear it than to leave
> > it set.  At worst, leaving the flag set results in too many truncates
> > on the file.  Clearing the flag when not correct may result in user
> > visible data corruption if the file size is extended...
> >
> >> Here is the new patch:
> >>
> >> --- .pc/fallocate_keepsizse.patch/fs/ext4/extents.c   2009-08-31
> >> 12:08:10.000000000 -0700
> >> +++ fs/ext4/extents.c 2009-08-31 15:51:13.000000000 -0700
> >> @@ -3091,11 +3091,19 @@ static void ext4_falloc_update_inode(str
> >>        * the file size.
> >>        */
> >>       if (!(mode & FALLOC_FL_KEEP_SIZE)) {
> >> +             if (new_size > i_size_read(inode)) {
> >>                       i_size_write(inode, new_size);
> >> +                     inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> >
> > This again isn't quite correct, since the EOFBLOCKS_FL shouldn't
> > be cleared unless new_size is beyond the allocated size.  The
> > allocation code itself might be a better place to clear this,
> > since it knows whether there were new blocks being added beyond
> > the current max allocated block.
>
> We were thinking to clear this flag when we need to allocate new
> blocks, but I was not sure how to get the current max allocated
> block -- that is mostly because I just started working on the ext4
> code. After digging into the ext4 allocation code today, I think we
> can put the check&clear in ext4_ext_get_blocks:
>
> @@ -2968,6 +2968,14 @@ int ext4_ext_get_blocks(handle_t *handle
>        newex.ee_len = cpu_to_le16(ar.len);
>        if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
>                ext4_ext_mark_uninitialized(&newex);
> +
> +       if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> +               BUG_ON(!eh->eh_entries);
> +               last_ex = EXT_LAST_EXTENT(eh);
> +               if (iblock + max_blocks > le32_to_cpu(last_ex->ee_block)
> +                                       + ext4_ext_get_actual_len(last_ex))
> +                       inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> +       }
>        err = ext4_ext_insert_extent(handle, inode, path, &newex);
>        if (err) {
>                /* free data blocks we just allocated */
>
> Again, I just started looking at this part of code, so please let me know
> if I am in the right direction.
>
> Another thing I am not sure is whether we can allocate a non-data block,
> like extended attributes, beyond the current max block without changing
> the i_size. In that case, clearing the EOFBLOCKS flag will be wrong.
>
> >>  #define FS_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
> >
> > It probably isn't a bad idea to make this flag user-visible, since it
> > would allow scanning for files that have excess space reserved (e.g.
> > if the filesystem is getting full).  Making it user-settable (i.e.
> > clearable) would essentially mean truncating the file to i_size without
> > updating the timestamps so that the reserved space is discarded.  I
> > don't think there is any value in allowing a user to turn this flag on
> > for a file.
>
> So to make it user-settable, we need to add the handling in ext4_ioctl
> that calls vmtruncate when the flag to be cleared. But how can we get
> the right size to truncate in that case? Can we just set that to the
> max initialized block shift with block size? But that may also truncate
> the blocks reserved without the KEEP_SIZE flag.
>
> Jiaying
>
> >
> > Cheers, Andreas
> > --
> > Andreas Dilger
> > Sr. Staff Engineer, Lustre Group
> > Sun Microsystems of Canada, Inc.
> >
> >
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux