This is actually pretty easy since fs/dlm already handles the bulk of the work. The Ocfs2 userspace cluster stack module already uses fs/dlm as the underlying lock manager, so I only had to add the right calls. Cluster-aware POSIX locks ("plocks") can be turned off by the same means at UNIX locks - mount with 'noflocks', or create a local-only Ocfs2 volume. Internally, the file system uses two sets of file_operations, depending on whether cluster aware plocks is required. This turns out to be easier than implementing local-only versions of ->lock. Signed-off-by: Mark Fasheh <mfasheh@xxxxxxxx> --- fs/ocfs2/file.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/file.h | 2 + fs/ocfs2/inode.c | 15 ++++++++++++- fs/ocfs2/locks.c | 15 ++++++++++++++ fs/ocfs2/locks.h | 1 + fs/ocfs2/stack_user.c | 33 +++++++++++++++++++++++++++++++ fs/ocfs2/stackglue.c | 20 +++++++++++++++++++ fs/ocfs2/stackglue.h | 19 ++++++++++++++++++ 8 files changed, 154 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15..60232b1 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2236,6 +2236,10 @@ const struct inode_operations ocfs2_special_file_iops = { .permission = ocfs2_permission, }; +/* + * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with + * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! + */ const struct file_operations ocfs2_fops = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -2250,6 +2254,7 @@ const struct file_operations ocfs2_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif + .lock = ocfs2_lock, .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, .splice_write = ocfs2_file_splice_write, @@ -2266,5 +2271,51 @@ const struct file_operations ocfs2_dops = { #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif + .lock = ocfs2_lock, + .flock = ocfs2_flock, +}; + +/* + * POSIX-lockless variants of our file_operations. + * + * These will be used if the underlying cluster stack does not support + * posix file locking, if the user passes the "localflocks" mount + * option, or if we have a local-only fs. + * + * ocfs2_flock is in here because all stacks handle UNIX file locks, + * so we still want it in the case of no stack support for + * plocks. Internally, it will do the right thing when asked to ignore + * the cluster. + */ +const struct file_operations ocfs2_fops_no_plocks = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .mmap = ocfs2_mmap, + .fsync = ocfs2_sync_file, + .release = ocfs2_file_release, + .open = ocfs2_file_open, + .aio_read = ocfs2_file_aio_read, + .aio_write = ocfs2_file_aio_write, + .unlocked_ioctl = ocfs2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ocfs2_compat_ioctl, +#endif + .flock = ocfs2_flock, + .splice_read = ocfs2_file_splice_read, + .splice_write = ocfs2_file_splice_write, +}; + +const struct file_operations ocfs2_dops_no_plocks = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = ocfs2_readdir, + .fsync = ocfs2_sync_file, + .release = ocfs2_dir_release, + .open = ocfs2_dir_open, + .unlocked_ioctl = ocfs2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ocfs2_compat_ioctl, +#endif .flock = ocfs2_flock, }; diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 1e27b4d..5a6d3e4 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -28,6 +28,8 @@ extern const struct file_operations ocfs2_fops; extern const struct file_operations ocfs2_dops; +extern const struct file_operations ocfs2_fops_no_plocks; +extern const struct file_operations ocfs2_dops_no_plocks; extern const struct inode_operations ocfs2_file_iops; extern const struct inode_operations ocfs2_special_file_iops; struct ocfs2_alloc_context; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7e9e4c7..99f012a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -219,6 +219,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, struct super_block *sb; struct ocfs2_super *osb; int status = -EINVAL; + int use_plocks = 1; mlog_entry("(0x%p, size:%llu)\n", inode, (unsigned long long)le64_to_cpu(fe->i_size)); @@ -226,6 +227,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, sb = inode->i_sb; osb = OCFS2_SB(sb); + if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || + ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) + use_plocks = 0; + /* this means that read_inode cannot create a superblock inode * today. change if needed. */ if (!OCFS2_IS_VALID_DINODE(fe) || @@ -295,13 +300,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, switch (inode->i_mode & S_IFMT) { case S_IFREG: - inode->i_fop = &ocfs2_fops; + if (use_plocks) + inode->i_fop = &ocfs2_fops; + else + inode->i_fop = &ocfs2_fops_no_plocks; inode->i_op = &ocfs2_file_iops; i_size_write(inode, le64_to_cpu(fe->i_size)); break; case S_IFDIR: inode->i_op = &ocfs2_dir_iops; - inode->i_fop = &ocfs2_dops; + if (use_plocks) + inode->i_fop = &ocfs2_dops; + else + inode->i_fop = &ocfs2_dops_no_plocks; i_size_write(inode, le64_to_cpu(fe->i_size)); break; case S_IFLNK: diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 203f871..544ac62 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -24,6 +24,7 @@ */ #include <linux/fs.h> +#include <linux/fcntl.h> #define MLOG_MASK_PREFIX ML_INODE #include <cluster/masklog.h> @@ -32,6 +33,7 @@ #include "dlmglue.h" #include "file.h" +#include "inode.h" #include "locks.h" static int ocfs2_do_flock(struct file *file, struct inode *inode, @@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) else return ocfs2_do_flock(file, inode, cmd, fl); } + +int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct inode *inode = file->f_mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (!(fl->fl_flags & FL_POSIX)) + return -ENOLCK; + if (__mandatory_lock(inode)) + return -ENOLCK; + + return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); +} diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h index 9743ef2..496d488 100644 --- a/fs/ocfs2/locks.h +++ b/fs/ocfs2/locks.h @@ -27,5 +27,6 @@ #define OCFS2_LOCKS_H int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); +int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl); #endif /* OCFS2_LOCKS_H */ diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 353fc35..faec2d8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -28,6 +28,7 @@ #include "ocfs2.h" /* For struct ocfs2_lock_res */ #include "stackglue.h" +#include <linux/dlm_plock.h> /* * The control protocol starts with a handshake. Until the handshake @@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) { } +static int user_plock(struct ocfs2_cluster_connection *conn, + u64 ino, + struct file *file, + int cmd, + struct file_lock *fl) +{ + /* + * This more or less just demuxes the plock request into any + * one of three dlm calls. + * + * Internally, fs/dlm will pass these to a misc device, which + * a userspace daemon will read and write to. + * + * For now, cancel requests (which happen internally only), + * are turned into unlocks. Most of this function taken from + * gfs2_lock. + */ + + if (cmd == F_CANCELLK) { + cmd = F_SETLK; + fl->fl_type = F_UNLCK; + } + + if (IS_GETLK(cmd)) + return dlm_posix_get(conn->cc_lockspace, ino, file, fl); + else if (fl->fl_type == F_UNLCK) + return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); + else + return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); +} + /* * Compare a requested locking protocol version against the current one. * @@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .dlm_unlock = user_dlm_unlock, .lock_status = user_dlm_lock_status, .lock_lvb = user_dlm_lvb, + .plock = user_plock, .dump_lksb = user_dlm_dump_lksb, }; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 07f348b..7150f5d 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) } EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); +int ocfs2_stack_supports_plocks(void) +{ + return !!(active_stack && active_stack->sp_ops->plock); +} +EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); + +/* + * ocfs2_plock() can only be safely called if + * ocfs2_stack_supports_plocks() returned true + */ +int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, + struct file *file, int cmd, struct file_lock *fl) +{ + WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); + if (active_stack->sp_ops->plock) + return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(ocfs2_plock); + int ocfs2_cluster_connect(const char *stack_name, const char *group, int grouplen, diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index db56281..c571af3 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -28,6 +28,10 @@ #include "dlm/dlmapi.h" #include <linux/dlm.h> +/* Needed for plock-related prototypes */ +struct file; +struct file_lock; + /* * dlmconstants.h does not have a LOCAL flag. We hope to remove it * some day, but right now we need it. Let's fake it. This value is larger @@ -187,6 +191,17 @@ struct ocfs2_stack_operations { void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); /* + * Cluster-aware posix locks + * + * This is NULL for stacks which do not support posix locks. + */ + int (*plock)(struct ocfs2_cluster_connection *conn, + u64 ino, + struct file *file, + int cmd, + struct file_lock *fl); + + /* * This is an optoinal debugging hook. If provided, the * stack can dump debugging information about this lock. */ @@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); +int ocfs2_stack_supports_plocks(void); +int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, + struct file *file, int cmd, struct file_lock *fl); + void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); -- 1.5.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html