Hisashi Hifumi a écrit : > Hi, > > Following patch removes i_mutex from generic_file_llseek. > I think the reason of protecting lseek with i_mutex is just > touching i_size (and f_pos) atomically. > So i_mutex is no longer needed here by introducing i_size_read > to read i_size. > I also made f_pos access atomic here without i_mutex regarding > former i_mutex holders by introducing file_pos_read/file_pos_write > that use seqlock to preserve atomicity. > > Following patch removes i_mutex from generic_file_llseek, and deletes > generic_file_llseek_nolock totally. > > Currently there is i_mutex contention not only around lseek, but also fsync or write. > So, I think we can mitigate i_mutex contention between fsync lseek and write by > removing i_mutex. > > Thanks. One problem with this patch is : Currently on x86_32, filp kmem_cache uses 128 bytes per object, because sizeof(struct file) = 128 Adding a seqlock_t makes sizeof(struct file) = 136, and filp kmem_cache uses 192 or 256 bytes per object, a 50% or 100 % increase, depending on processor (64 or 128 bytes L1 cache lines) Maybe just use existing f_lock (spinlock) ? A seqlock is nice whe you have many readers on SMP. But on every file syscall (mentioned in your changelog) every "seqlock reader" will have to take a reference on f_count any way, so the cache line is already dirtied : We already lost seqlock benefit. f_pos is not a "read mostly" field, its quite the reverse in the real world. > > Signed-off-by: Hisashi Hifumi <hifumi.hisashi@xxxxxxxxxxxxx> > > diff -Nrup linux-2.6.30-rc5.org/fs/cifs/cifsfs.c linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c > --- linux-2.6.30-rc5.org/fs/cifs/cifsfs.c 2009-05-11 10:07:14.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c 2009-05-11 10:10:36.000000000 +0900 > @@ -635,7 +635,7 @@ static loff_t cifs_llseek(struct file *f > if (retval < 0) > return (loff_t)retval; > } > - return generic_file_llseek_unlocked(file, offset, origin); > + return generic_file_llseek(file, offset, origin); > } > > #ifdef CONFIG_CIFS_EXPERIMENTAL > diff -Nrup linux-2.6.30-rc5.org/fs/file_table.c linux-2.6.30-rc5.lseek/fs/file_table.c > --- linux-2.6.30-rc5.org/fs/file_table.c 2009-05-11 10:07:14.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/file_table.c 2009-05-11 10:10:36.000000000 +0900 > @@ -126,6 +126,7 @@ struct file *get_empty_filp(void) > > INIT_LIST_HEAD(&f->f_u.fu_list); > atomic_long_set(&f->f_count, 1); > + f_pos_ordered_init(f); > rwlock_init(&f->f_owner.lock); > f->f_cred = get_cred(cred); > spin_lock_init(&f->f_lock); > diff -Nrup linux-2.6.30-rc5.org/fs/gfs2/ops_file.c linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c > --- linux-2.6.30-rc5.org/fs/gfs2/ops_file.c 2009-05-11 10:07:15.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c 2009-05-11 10:10:36.000000000 +0900 > @@ -63,11 +63,11 @@ static loff_t gfs2_llseek(struct file *f > error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, > &i_gh); > if (!error) { > - error = generic_file_llseek_unlocked(file, offset, origin); > + error = generic_file_llseek(file, offset, origin); > gfs2_glock_dq_uninit(&i_gh); > } > } else > - error = generic_file_llseek_unlocked(file, offset, origin); > + error = generic_file_llseek(file, offset, origin); > > return error; > } > diff -Nrup linux-2.6.30-rc5.org/fs/ncpfs/file.c linux-2.6.30-rc5.lseek/fs/ncpfs/file.c > --- linux-2.6.30-rc5.org/fs/ncpfs/file.c 2009-03-24 08:12:14.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/ncpfs/file.c 2009-05-11 10:10:36.000000000 +0900 > @@ -286,7 +286,7 @@ static loff_t ncp_remote_llseek(struct f > { > loff_t ret; > lock_kernel(); > - ret = generic_file_llseek_unlocked(file, offset, origin); > + ret = generic_file_llseek(file, offset, origin); > unlock_kernel(); > return ret; > } > diff -Nrup linux-2.6.30-rc5.org/fs/nfs/file.c linux-2.6.30-rc5.lseek/fs/nfs/file.c > --- linux-2.6.30-rc5.org/fs/nfs/file.c 2009-05-11 10:07:15.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/nfs/file.c 2009-05-11 10:10:36.000000000 +0900 > @@ -188,10 +188,10 @@ static loff_t nfs_file_llseek(struct fil > return (loff_t)retval; > > spin_lock(&inode->i_lock); > - loff = generic_file_llseek_unlocked(filp, offset, origin); > + loff = generic_file_llseek(filp, offset, origin); > spin_unlock(&inode->i_lock); > } else > - loff = generic_file_llseek_unlocked(filp, offset, origin); > + loff = generic_file_llseek(filp, offset, origin); > return loff; > } > > diff -Nrup linux-2.6.30-rc5.org/fs/read_write.c linux-2.6.30-rc5.lseek/fs/read_write.c > --- linux-2.6.30-rc5.org/fs/read_write.c 2009-05-11 10:07:15.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/read_write.c 2009-05-11 10:10:36.000000000 +0900 > @@ -31,23 +31,61 @@ const struct file_operations generic_ro_ > > EXPORT_SYMBOL(generic_ro_fops); > > +static inline loff_t file_pos_read(struct file *file) > +{ > +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) > + loff_t pos; > + unsigned int seq; > + > + do { > + seq = read_seqbegin(&file->f_pos_seqlock); > + pos = file->f_pos; > + } while (read_seqretry(&file->f_pos_seqlock, seq)); > + return pos; > +#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT) > + loff_t pos; > + > + preempt_disable(); > + pos = file->f_pos; > + preempt_enable(); > + return pos; > +#else > + return file->f_pos; > +#endif > +} > + > +static inline void file_pos_write(struct file *file, loff_t pos) > +{ > +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) > + write_seqlock(&file->f_pos_seqlock); > + file->f_pos = pos; > + write_sequnlock(&file->f_pos_seqlock); > +#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT) > + preempt_disable(); > + file->f_pos = pos; > + preempt_enable(); > +#else > + file->f_pos = pos; > +#endif > +} > + > /** > - * generic_file_llseek_unlocked - lockless generic llseek implementation > + * generic_file_llseek - generic llseek implementation for regular files > * @file: file structure to seek on > * @offset: file offset to seek to > * @origin: type of seek > * > - * Updates the file offset to the value specified by @offset and @origin. > - * Locking must be provided by the caller. > + * This is a generic implemenation of ->llseek useable for all normal local > + * filesystems. It just updates the file offset to the value specified by > + * @offset and @origin. > */ > -loff_t > -generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) > +loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) > { > struct inode *inode = file->f_mapping->host; > > switch (origin) { > case SEEK_END: > - offset += inode->i_size; > + offset += i_size_read(inode); > break; > case SEEK_CUR: > /* > @@ -57,8 +95,8 @@ generic_file_llseek_unlocked(struct file > * write() or lseek() might have altered it > */ > if (offset == 0) > - return file->f_pos; > - offset += file->f_pos; > + return file_pos_read(file); > + offset += file_pos_read(file); > break; > } > > @@ -66,35 +104,13 @@ generic_file_llseek_unlocked(struct file > return -EINVAL; > > /* Special lock needed here? */ > - if (offset != file->f_pos) { > - file->f_pos = offset; > + if (offset != file_pos_read(file)) { > + file_pos_write(file, offset); > file->f_version = 0; > } > > return offset; > } > -EXPORT_SYMBOL(generic_file_llseek_unlocked); > - > -/** > - * generic_file_llseek - generic llseek implementation for regular files > - * @file: file structure to seek on > - * @offset: file offset to seek to > - * @origin: type of seek > - * > - * This is a generic implemenation of ->llseek useable for all normal local > - * filesystems. It just updates the file offset to the value specified by > - * @offset and @origin under i_mutex. > - */ > -loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) > -{ > - loff_t rval; > - > - mutex_lock(&file->f_dentry->d_inode->i_mutex); > - rval = generic_file_llseek_unlocked(file, offset, origin); > - mutex_unlock(&file->f_dentry->d_inode->i_mutex); > - > - return rval; > -} > EXPORT_SYMBOL(generic_file_llseek); > > loff_t no_llseek(struct file *file, loff_t offset, int origin) > @@ -359,16 +375,6 @@ ssize_t vfs_write(struct file *file, con > > EXPORT_SYMBOL(vfs_write); > > -static inline loff_t file_pos_read(struct file *file) > -{ > - return file->f_pos; > -} > - > -static inline void file_pos_write(struct file *file, loff_t pos) > -{ > - file->f_pos = pos; > -} > - > SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) > { > struct file *file; > diff -Nrup linux-2.6.30-rc5.org/fs/smbfs/file.c linux-2.6.30-rc5.lseek/fs/smbfs/file.c > --- linux-2.6.30-rc5.org/fs/smbfs/file.c 2009-03-24 08:12:14.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/fs/smbfs/file.c 2009-05-11 10:10:36.000000000 +0900 > @@ -426,7 +426,7 @@ static loff_t smb_remote_llseek(struct f > { > loff_t ret; > lock_kernel(); > - ret = generic_file_llseek_unlocked(file, offset, origin); > + ret = generic_file_llseek(file, offset, origin); > unlock_kernel(); > return ret; > } > diff -Nrup linux-2.6.30-rc5.org/include/linux/fs.h linux-2.6.30-rc5.lseek/include/linux/fs.h > --- linux-2.6.30-rc5.org/include/linux/fs.h 2009-05-11 10:07:17.000000000 +0900 > +++ linux-2.6.30-rc5.lseek/include/linux/fs.h 2009-05-11 10:10:36.000000000 +0900 > @@ -896,6 +896,16 @@ static inline int ra_has_index(struct fi > #define FILE_MNT_WRITE_TAKEN 1 > #define FILE_MNT_WRITE_RELEASED 2 > > +/* > + * Use sequence lock to get consistent f_pos on 32-bit processors. > + */ > +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) > +#define __NEED_F_POS_ORDERED > +#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock) > +#else > +#define f_pos_ordered_init(file) do { } while (0) > +#endif > + > struct file { > /* > * fu_list becomes invalid after file_free is called and queued via > @@ -914,6 +924,9 @@ struct file { > unsigned int f_flags; > fmode_t f_mode; > loff_t f_pos; > +#ifdef __NEED_F_POS_ORDERED > + seqlock_t f_pos_seqlock; > +#endif > struct fown_struct f_owner; > const struct cred *f_cred; > struct file_ra_state f_ra; > @@ -2215,8 +2228,6 @@ extern void > file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); > extern loff_t no_llseek(struct file *file, loff_t offset, int origin); > extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); > -extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, > - int origin); > extern int generic_file_open(struct inode * inode, struct file * filp); > extern int nonseekable_open(struct inode * inode, struct file * filp); > > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html