On Mar 25, 2015, at 9:07 AM, Jens Axboe <axboe@xxxxxx> wrote: > > Writing on flash devices can be much more efficient, if we can > inform the device what kind of data can be grouped together. If > the device is able to group data together with similar lifetimes, > then it can be more efficient in garbage collection. This, in turn, > leads to lower write amplification, which is a win on both device > wear and performance. > > Add a new fadvise hint, POSIX_FADV_STREAMID, which sets the file > and inode streamid. The file streamid is used if we have the file > available at the time of the write (O_DIRECT), we use the inode > streamid if not (buffered writeback). The fadvise hint uses the > 'offset' field to specify a stream ID. > > Signed-off-by: Jens Axboe <axboe@xxxxxx> > --- > fs/inode.c | 1 + > fs/open.c | 1 + > include/linux/fs.h | 23 +++++++++++++++++++++++ > include/uapi/linux/fadvise.h | 2 ++ > mm/fadvise.c | 17 +++++++++++++++++ > 5 files changed, 44 insertions(+) > > diff --git a/fs/inode.c b/fs/inode.c > index f00b16f45507..41885322ba64 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -149,6 +149,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) > inode->i_blocks = 0; > inode->i_bytes = 0; > inode->i_generation = 0; > + inode->i_streamid = 0; > inode->i_pipe = NULL; > inode->i_bdev = NULL; > inode->i_cdev = NULL; > diff --git a/fs/open.c b/fs/open.c > index 33f9cbf2610b..4a9b2be1a674 100644 > --- a/fs/open.c > +++ b/fs/open.c > @@ -743,6 +743,7 @@ static int do_dentry_open(struct file *f, > f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); > > file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); > + f->f_streamid = 0; > > return 0; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index b4d71b5e1ff2..43dde70c1d0d 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -631,6 +631,7 @@ struct inode { > }; > > __u32 i_generation; > + unsigned int i_streamid; Since there are only 8 bits of streamid being passed from userspace, is it possible to declare this as a char and pack it into a hole so that it doesn't increase the inode size for a functionality that most people won't be using? Maybe after i_bytes? That could be increased to unsigned short if needed without increasing the size of the inode. Cheers, Andreas > #ifdef CONFIG_FSNOTIFY > __u32 i_fsnotify_mask; /* all events this inode cares about */ > @@ -640,6 +641,14 @@ struct inode { > void *i_private; /* fs or device private pointer */ > }; > > +static inline unsigned int inode_streamid(struct inode *inode) > +{ > + if (inode) > + return inode->i_streamid; > + > + return 0; > +} > + > static inline int inode_unhashed(struct inode *inode) > { > return hlist_unhashed(&inode->i_hash); > @@ -820,6 +829,8 @@ struct file { > const struct cred *f_cred; > struct file_ra_state f_ra; > > + unsigned int f_streamid; > u64 f_version; > #ifdef CONFIG_SECURITY > void *f_security; > @@ -842,6 +853,18 @@ struct file_handle { > unsigned char f_handle[0]; > }; > > +/* > + * If the file doesn't have a stream ID set, return the inode stream ID > + * in case that has been set. > + */ > +static inline unsigned int file_streamid(struct file *f) > +{ > + if (f->f_streamid) > + return f->f_streamid; > + > + return inode_streamid(f->f_inode); > +} > + > static inline struct file *get_file(struct file *f) > { > atomic_long_inc(&f->f_count); > diff --git a/include/uapi/linux/fadvise.h b/include/uapi/linux/fadvise.h > index e8e747139b9a..3dc8a1ff1422 100644 > --- a/include/uapi/linux/fadvise.h > +++ b/include/uapi/linux/fadvise.h > @@ -18,4 +18,6 @@ > #define POSIX_FADV_NOREUSE 5 /* Data will be accessed once. */ > #endif > > +#define POSIX_FADV_STREAMID 8 /* associate stream ID with file */ > + > #endif /* FADVISE_H_INCLUDED */ > diff --git a/mm/fadvise.c b/mm/fadvise.c > index 4a3907cf79f8..b111a8899fb7 100644 > --- a/mm/fadvise.c > +++ b/mm/fadvise.c > @@ -60,6 +60,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) > case POSIX_FADV_WILLNEED: > case POSIX_FADV_NOREUSE: > case POSIX_FADV_DONTNEED: > + case POSIX_FADV_STREAMID: > /* no bad return value, but ignore advice */ > break; > default: > @@ -144,6 +145,22 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) > } > } > break; > + case POSIX_FADV_STREAMID: > + /* > + * streamid is stored in offset... we don't limit or check > + * if the device supports streams, or if it does, if the > + * stream nr is within the limits. 1 is the lowest valid > + * stream id, 0 is "don't care/know". > + */ > + if (offset != (unsigned int) offset) > + ret = EINVAL; > + else { > + f.file->f_streamid = offset; > + spin_lock(&inode->i_lock); > + inode->i_streamid = offset; > + spin_unlock(&inode->i_lock); > + } > + break; > default: > ret = -EINVAL; > } > -- > 1.9.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html Cheers, Andreas -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html