On Sun, 2014-03-23 at 15:31 -0500, Steve French wrote: > I merged it into cifs-2.6.git for-next branch but corrected the build > warning (sparse warning) below. See > http://git.samba.org/?p=sfrench/cifs-2.6.git;a=commit;h=b6d1cf24fd70518858f8576cd68781ffc846bed1 > for the (trivially) updated version of the patch. > > CHECK fs/cifs/smb1ops.c > fs/cifs/smb1ops.c:376:1: warning: symbol 'cifs_downgrade_oplock' was > not declared. Should it be static? > > CHECK fs/cifs/smb2ops.c > fs/cifs/smb2ops.c:908:1: warning: symbol 'smb2_downgrade_oplock' was > not declared. Should it be static? > > by adding static to the declaration of those two downgrade_oplock > functions. Let me know if you wanted to fix those differently. That is fine. Thanks Sachin Prabhu > > On Tue, Mar 11, 2014 at 11:11 AM, Sachin Prabhu <sprabhu@xxxxxxxxxx> wrote: > > Problem reported in Red Hat bz 1040329 for strict writes where we cache > > only when we hold oplock and write direct to the server when we don't. > > > > When we receive an oplock break, we first change the oplock value for > > the inode in cifsInodeInfo->oplock to indicate that we no longer hold > > the oplock before we enqueue a task to flush changes to the backing > > device. Once we have completed flushing the changes, we return the > > oplock to the server. > > > > There are 2 ways here where we can have data corruption > > 1) While we flush changes to the backing device as part of the oplock > > break, we can have processes write to the file. These writes check for > > the oplock, find none and attempt to write directly to the server. > > These direct writes made while we are flushing from cache could be > > overwritten by data being flushed from the cache causing data > > corruption. > > 2) While a thread runs in cifs_strict_writev, the machine could receive > > and process an oplock break after the thread has checked the oplock and > > found that it allows us to cache and before we have made changes to the > > cache. In that case, we end up with a dirty page in cache when we > > shouldn't have any. This will be flushed later and will overwrite all > > subsequent writes to the part of the file represented by this page. > > > > Before making any writes to the server, we need to confirm that we are > > not in the process of flushing data to the server and if we are, we > > should wait until the process is complete before we attempt the write. > > We should also wait for existing writes to complete before we process > > an oplock break request which changes oplock values. > > > > We add a version specific downgrade_oplock() operation to allow for > > differences in the oplock values set for the different smb versions. > > > > Cc: stable@xxxxxxxxxxxxxxx > > Signed-off-by: Sachin Prabhu <sprabhu@xxxxxxxxxx> > > --- > > fs/cifs/cifsfs.c | 14 +++++++++- > > fs/cifs/cifsglob.h | 8 ++++++ > > fs/cifs/cifsproto.h | 3 +++ > > fs/cifs/file.c | 31 +++++++++++++++++++--- > > fs/cifs/misc.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > > fs/cifs/smb1ops.c | 11 ++++++++ > > fs/cifs/smb2misc.c | 18 ++++++++++--- > > fs/cifs/smb2ops.c | 14 ++++++++++ > > 8 files changed, 164 insertions(+), 9 deletions(-) > > > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > > index 849f613..7c6b73c 100644 > > --- a/fs/cifs/cifsfs.c > > +++ b/fs/cifs/cifsfs.c > > @@ -253,6 +253,11 @@ cifs_alloc_inode(struct super_block *sb) > > cifs_set_oplock_level(cifs_inode, 0); > > cifs_inode->delete_pending = false; > > cifs_inode->invalid_mapping = false; > > + clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags); > > + clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags); > > + clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags); > > + spin_lock_init(&cifs_inode->writers_lock); > > + cifs_inode->writers = 0; > > cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ > > cifs_inode->server_eof = 0; > > cifs_inode->uniqueid = 0; > > @@ -731,19 +736,26 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, > > unsigned long nr_segs, loff_t pos) > > { > > struct inode *inode = file_inode(iocb->ki_filp); > > + struct cifsInodeInfo *cinode = CIFS_I(inode); > > ssize_t written; > > int rc; > > > > + written = cifs_get_writer(cinode); > > + if (written) > > + return written; > > + > > written = generic_file_aio_write(iocb, iov, nr_segs, pos); > > > > if (CIFS_CACHE_WRITE(CIFS_I(inode))) > > - return written; > > + goto out; > > > > rc = filemap_fdatawrite(inode->i_mapping); > > if (rc) > > cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", > > rc, inode); > > > > +out: > > + cifs_put_writer(cinode); > > return written; > > } > > > > diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h > > index cf32f03..8885ce8 100644 > > --- a/fs/cifs/cifsglob.h > > +++ b/fs/cifs/cifsglob.h > > @@ -228,6 +228,8 @@ struct smb_version_operations { > > /* verify the message */ > > int (*check_message)(char *, unsigned int); > > bool (*is_oplock_break)(char *, struct TCP_Server_Info *); > > + void (*downgrade_oplock)(struct TCP_Server_Info *, > > + struct cifsInodeInfo *, bool); > > /* process transaction2 response */ > > bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, > > char *, int); > > @@ -1113,6 +1115,12 @@ struct cifsInodeInfo { > > unsigned int epoch; /* used to track lease state changes */ > > bool delete_pending; /* DELETE_ON_CLOSE is set */ > > bool invalid_mapping; /* pagecache is invalid */ > > + unsigned long flags; > > +#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ > > +#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ > > +#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */ > > + spinlock_t writers_lock; > > + unsigned int writers; /* Number of writers on this inode */ > > unsigned long time; /* jiffies of last update of inode */ > > u64 server_eof; /* current file size on server -- protected by i_lock */ > > u64 uniqueid; /* server inode number */ > > diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h > > index acc4ee8..ca7980a 100644 > > --- a/fs/cifs/cifsproto.h > > +++ b/fs/cifs/cifsproto.h > > @@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec); > > extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, > > int offset); > > extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); > > +extern int cifs_get_writer(struct cifsInodeInfo *cinode); > > +extern void cifs_put_writer(struct cifsInodeInfo *cinode); > > +extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode); > > extern int cifs_unlock_range(struct cifsFileInfo *cfile, > > struct file_lock *flock, const unsigned int xid); > > extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); > > diff --git a/fs/cifs/file.c b/fs/cifs/file.c > > index 53c1507..9d07950 100644 > > --- a/fs/cifs/file.c > > +++ b/fs/cifs/file.c > > @@ -2620,12 +2620,20 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, > > struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); > > ssize_t written; > > > > + written = cifs_get_writer(cinode); > > + if (written) > > + return written; > > + > > if (CIFS_CACHE_WRITE(cinode)) { > > if (cap_unix(tcon->ses) && > > (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) > > - && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) > > - return generic_file_aio_write(iocb, iov, nr_segs, pos); > > - return cifs_writev(iocb, iov, nr_segs, pos); > > + && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { > > + written = generic_file_aio_write( > > + iocb, iov, nr_segs, pos); > > + goto out; > > + } > > + written = cifs_writev(iocb, iov, nr_segs, pos); > > + goto out; > > } > > /* > > * For non-oplocked files in strict cache mode we need to write the data > > @@ -2645,6 +2653,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, > > inode); > > cinode->oplock = 0; > > } > > +out: > > + cifs_put_writer(cinode); > > return written; > > } > > > > @@ -3656,6 +3666,13 @@ static int cifs_launder_page(struct page *page) > > return rc; > > } > > > > +static int > > +cifs_pending_writers_wait(void *unused) > > +{ > > + schedule(); > > + return 0; > > +} > > + > > void cifs_oplock_break(struct work_struct *work) > > { > > struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, > > @@ -3663,8 +3680,15 @@ void cifs_oplock_break(struct work_struct *work) > > struct inode *inode = cfile->dentry->d_inode; > > struct cifsInodeInfo *cinode = CIFS_I(inode); > > struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); > > + struct TCP_Server_Info *server = tcon->ses->server; > > int rc = 0; > > > > + wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, > > + cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE); > > + > > + server->ops->downgrade_oplock(server, cinode, > > + test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); > > + > > if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && > > cifs_has_mand_locks(cinode)) { > > cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", > > @@ -3701,6 +3725,7 @@ void cifs_oplock_break(struct work_struct *work) > > cinode); > > cifs_dbg(FYI, "Oplock release rc = %d\n", rc); > > } > > + cifs_done_oplock_break(cinode); > > } > > > > /* > > diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c > > index 2f9f379..3b0c62e 100644 > > --- a/fs/cifs/misc.c > > +++ b/fs/cifs/misc.c > > @@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) > > cifs_dbg(FYI, "file id match, oplock break\n"); > > pCifsInode = CIFS_I(netfile->dentry->d_inode); > > > > - cifs_set_oplock_level(pCifsInode, > > - pSMB->OplockLevel ? OPLOCK_READ : 0); > > + set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, > > + &pCifsInode->flags); > > + > > + /* > > + * Set flag if the server downgrades the oplock > > + * to L2 else clear. > > + */ > > + if (pSMB->OplockLevel) > > + set_bit( > > + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, > > + &pCifsInode->flags); > > + else > > + clear_bit( > > + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, > > + &pCifsInode->flags); > > + > > queue_work(cifsiod_wq, > > &netfile->oplock_break); > > netfile->oplock_break_cancelled = false; > > @@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) > > cinode->oplock = 0; > > } > > > > +static int > > +cifs_oplock_break_wait(void *unused) > > +{ > > + schedule(); > > + return signal_pending(current) ? -ERESTARTSYS : 0; > > +} > > + > > +/* > > + * We wait for oplock breaks to be processed before we attempt to perform > > + * writes. > > + */ > > +int cifs_get_writer(struct cifsInodeInfo *cinode) > > +{ > > + int rc; > > + > > +start: > > + rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, > > + cifs_oplock_break_wait, TASK_KILLABLE); > > + if (rc) > > + return rc; > > + > > + spin_lock(&cinode->writers_lock); > > + if (!cinode->writers) > > + set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); > > + cinode->writers++; > > + /* Check to see if we have started servicing an oplock break */ > > + if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) { > > + cinode->writers--; > > + if (cinode->writers == 0) { > > + clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); > > + wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS); > > + } > > + spin_unlock(&cinode->writers_lock); > > + goto start; > > + } > > + spin_unlock(&cinode->writers_lock); > > + return 0; > > +} > > + > > +void cifs_put_writer(struct cifsInodeInfo *cinode) > > +{ > > + spin_lock(&cinode->writers_lock); > > + cinode->writers--; > > + if (cinode->writers == 0) { > > + clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); > > + wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS); > > + } > > + spin_unlock(&cinode->writers_lock); > > +} > > + > > +void cifs_done_oplock_break(struct cifsInodeInfo *cinode) > > +{ > > + clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); > > + wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK); > > +} > > + > > bool > > backup_cred(struct cifs_sb_info *cifs_sb) > > { > > diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c > > index 526fb89..2cc7d78 100644 > > --- a/fs/cifs/smb1ops.c > > +++ b/fs/cifs/smb1ops.c > > @@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) > > return 0; > > } > > > > +void > > +cifs_downgrade_oplock(struct TCP_Server_Info *server, > > + struct cifsInodeInfo *cinode, bool set_level2) > > +{ > > + if (set_level2) > > + cifs_set_oplock_level(cinode, OPLOCK_READ); > > + else > > + cifs_set_oplock_level(cinode, 0); > > +} > > + > > static bool > > cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, > > char *buf, int malformed) > > @@ -1019,6 +1029,7 @@ struct smb_version_operations smb1_operations = { > > .clear_stats = cifs_clear_stats, > > .print_stats = cifs_print_stats, > > .is_oplock_break = is_valid_oplock_break, > > + .downgrade_oplock = cifs_downgrade_oplock, > > .check_trans2 = cifs_check_trans2, > > .need_neg = cifs_need_neg, > > .negotiate = cifs_negotiate, > > diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c > > index fb39662..b8021fd 100644 > > --- a/fs/cifs/smb2misc.c > > +++ b/fs/cifs/smb2misc.c > > @@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) > > else > > cfile->oplock_break_cancelled = false; > > > > - server->ops->set_oplock_level(cinode, > > - rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0, > > - 0, NULL); > > + set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, > > + &cinode->flags); > > + > > + /* > > + * Set flag if the server downgrades the oplock > > + * to L2 else clear. > > + */ > > + if (rsp->OplockLevel) > > + set_bit( > > + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, > > + &cinode->flags); > > + else > > + clear_bit( > > + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, > > + &cinode->flags); > > > > queue_work(cifsiod_wq, &cfile->oplock_break); > > > > diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c > > index 192f51a..ab8964d 100644 > > --- a/fs/cifs/smb2ops.c > > +++ b/fs/cifs/smb2ops.c > > @@ -904,6 +904,17 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, > > return rc; > > } > > > > +void > > +smb2_downgrade_oplock(struct TCP_Server_Info *server, > > + struct cifsInodeInfo *cinode, bool set_level2) > > +{ > > + if (set_level2) > > + server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II, > > + 0, NULL); > > + else > > + server->ops->set_oplock_level(cinode, 0, 0, NULL); > > +} > > + > > static void > > smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, > > unsigned int epoch, bool *purge_cache) > > @@ -1110,6 +1121,7 @@ struct smb_version_operations smb20_operations = { > > .clear_stats = smb2_clear_stats, > > .print_stats = smb2_print_stats, > > .is_oplock_break = smb2_is_valid_oplock_break, > > + .downgrade_oplock = smb2_downgrade_oplock, > > .need_neg = smb2_need_neg, > > .negotiate = smb2_negotiate, > > .negotiate_wsize = smb2_negotiate_wsize, > > @@ -1184,6 +1196,7 @@ struct smb_version_operations smb21_operations = { > > .clear_stats = smb2_clear_stats, > > .print_stats = smb2_print_stats, > > .is_oplock_break = smb2_is_valid_oplock_break, > > + .downgrade_oplock = smb2_downgrade_oplock, > > .need_neg = smb2_need_neg, > > .negotiate = smb2_negotiate, > > .negotiate_wsize = smb2_negotiate_wsize, > > @@ -1259,6 +1272,7 @@ struct smb_version_operations smb30_operations = { > > .print_stats = smb2_print_stats, > > .dump_share_caps = smb2_dump_share_caps, > > .is_oplock_break = smb2_is_valid_oplock_break, > > + .downgrade_oplock = smb2_downgrade_oplock, > > .need_neg = smb2_need_neg, > > .negotiate = smb2_negotiate, > > .negotiate_wsize = smb2_negotiate_wsize, > > -- > > 1.8.4.2 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-cifs" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > -- To unsubscribe from this list: send the line "unsubscribe linux-cifs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html