Hi,
I am replying my own thread, if you want to test these commits, maybe
you need my tcmu-runner patch, I have attached, it works fine with PR
register, reserve, clear, release and preempt. Or you can use my repo
directly: https://github.com/ls-zhu/tcmu-runner/tree/rbd_v2_pr
It only works for Ceph RBD as backend for now, because other TCMU
storage like qcow or a file does not have something like metadata. If we
want such support for devices other than RBD, we may change their
handler in tcmu-runner, like allocate some sectors works like metadata.
I know gluster may support this, we can implement it after kernel side
stabilized.
Thanks,
BR
Zhu Lingshan
On 2018/6/16 2:23, Zhu Lingshan wrote:
These commits and the following intend to implement Persistent
Reservation operations for TCMU devices.
This series of commits would implement such PR operations:
PR_Out_Register, PR_Out_Reserve, PR_Out_Clear, PR_Out_Preempt,
PR_Out_Release and PR_In_ReadKeys.
Next wave of patches will contain the other PR operations.
This patch added a struct tcmu_pr_info to store PR information
for the handling functions, added command codes and attrs for
netlink interfaces.
Design note:
In order to get consistent Persistent Reservation results from
multiple targets hosting the same TCMU device(like Ceph RBD),
this solution stores a string on the device itself(like RBD metadata).
Everytime when kernel receive a PR request against a TCMU device,
it will query this string(a netlink attr carried by a netlink cmd).
Then decide whether the PR request should be performed, after
processing, it will update this string.
For example:
When receive a PR Reserve request, kernel will send a netlink
message to tcmu-runner, try to get the string, tcmu-runner will
response, send the PR info string to kernel. Then kernel will
decode the string, find information like key, reservation holder,
then process this request. After processing, it will update the
string, send the updated string to tcmu-runner, so that tcmu-runner
will write it back to the device(like RBD metadata).
So we make the device itself as a "single" response point, (with
locks protection) we will get a consistent result even more than one
initiators sending multiple PR requests via multiple targets.
Signed-off-by: Zhu Lingshan <lszhu@xxxxxxxx>
---
include/uapi/linux/target_core_user.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 0be80f72646b..2d5c3e55d3f8 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -132,9 +132,13 @@ enum tcmu_genl_cmd {
TCMU_CMD_ADDED_DEVICE,
TCMU_CMD_REMOVED_DEVICE,
TCMU_CMD_RECONFIG_DEVICE,
+ TCMU_CMD_GET_PR_INFO,
+ TCMU_CMD_SET_PR_INFO,
TCMU_CMD_ADDED_DEVICE_DONE,
TCMU_CMD_REMOVED_DEVICE_DONE,
TCMU_CMD_RECONFIG_DEVICE_DONE,
+ TCMU_CMD_GET_PR_INFO_DONE,
+ TCMU_CMD_SET_PR_INFO_DONE,
TCMU_CMD_SET_FEATURES,
__TCMU_CMD_MAX,
};
@@ -151,8 +155,23 @@ enum tcmu_genl_attr {
TCMU_ATTR_CMD_STATUS,
TCMU_ATTR_DEVICE_ID,
TCMU_ATTR_SUPP_KERN_CMD_REPLY,
+ TCMU_ATTR_PR_INFO,
__TCMU_ATTR_MAX,
};
#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
+/* This struct help to store the Persistent Reservation which we
+ * are handling, it is encoded from or decoded to the string buffer in
+ * "struct tcmu_dev_pr_info"
+ */
+struct tcmu_pr_info {
+ u32 vers; /* on disk format version number */
+ u32 seq; /* sequence number bumped every xattr write */
+ struct tcmu_scsi2_rsv *scsi2_rsv; /* SCSI2 reservation if any */
+ u32 gen; /* PR generation number */
+ struct tcmu_pr_rsv *rsv; /* SCSI3 reservation if any */
+ u32 num_regs; /* number of registrations */
+ struct list_head regs; /* list of registrations */
+};
+
#endif
From 6a7029f03d092a86c9126bb4a72ab7c44b5abd6c Mon Sep 17 00:00:00 2001
From: Zhu Lingshan <lszhu@xxxxxxxx>
Date: Fri, 11 May 2018 16:54:51 +0800
Subject: [PATCH] pr_out_register and pr_in_readkeys can work
Signed-off-by: Zhu Lingshan <lszhu@xxxxxxxx>
---
libtcmu.c | 98 +++++++++++++++++++++++++++++++++++++++-
libtcmu.h | 4 ++
main.c | 2 +
rbd.c | 36 +++++++++++++++
target_core_user_local.h | 7 ++-
tcmu-runner.h | 3 ++
6 files changed, 147 insertions(+), 3 deletions(-)
diff --git a/libtcmu.c b/libtcmu.c
index d413020..826f482 100644
--- a/libtcmu.c
+++ b/libtcmu.c
@@ -84,6 +84,20 @@ static struct genl_cmd tcmu_cmds[] = {
.c_maxattr = TCMU_ATTR_MAX,
.c_attr_policy = tcmu_attr_policy,
},
+ {
+ .c_id = TCMU_CMD_GET_PR_INFO,
+ .c_name = "GET PR_INFO",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = TCMU_ATTR_MAX,
+ .c_attr_policy = tcmu_attr_policy,
+ },
+ {
+ .c_id = TCMU_CMD_SET_PR_INFO,
+ .c_name = "SET PR_INFO",
+ .c_msg_parser = handle_netlink,
+ .c_maxattr = TCMU_ATTR_MAX,
+ .c_attr_policy = tcmu_attr_policy,
+ },
};
static struct genl_ops tcmu_ops = {
@@ -93,7 +107,7 @@ static struct genl_ops tcmu_ops = {
};
static int send_netlink_reply(struct tcmulib_context *ctx, int reply_cmd,
- uint32_t dev_id, int status)
+ uint32_t dev_id, int status, char *data)
{
struct nl_sock *sock = ctx->nl_sock;
struct nl_msg *msg;
@@ -117,6 +131,12 @@ static int send_netlink_reply(struct tcmulib_context *ctx, int reply_cmd,
if (ret < 0)
goto free_msg;
+ if (reply_cmd == TCMU_CMD_GET_PR_INFO_DONE) {
+ ret = nla_put_string(msg, TCMU_ATTR_PR_INFO, data);
+ if (ret < 0)
+ goto free_msg;
+ }
+
/* Ignore ack. There is nothing we can do. */
ret = nl_send_auto(sock, msg);
free_msg:
@@ -197,12 +217,77 @@ static int reconfig_device(struct tcmulib_context *ctx, char *dev_name,
return 0;
}
+
+static int set_pr_info(struct tcmulib_context *ctx, char *dev_name,
+ struct genl_info *info)
+{
+ struct tcmu_device *dev;
+ int i, ret;
+ char *pr_info_str = NULL;
+
+ dev = lookup_dev_by_name(ctx, dev_name, &i);
+ if (!dev) {
+ tcmu_err("Could not set PR info, device %s: not found.\n",
+ dev_name);
+ return -ENODEV;
+ }
+
+ if (!dev->handler->set_pr_info) {
+ tcmu_dev_err(dev, "Setting PR info is not supported with this device.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (info->attrs[TCMU_ATTR_PR_INFO]) {
+ pr_info_str = nla_get_string(info->attrs[TCMU_ATTR_PR_INFO]);
+ }
+ else {
+ tcmu_dev_err(dev, "Failed to obtain PR info from netlink.\n");
+ return -ENODATA;
+ }
+
+ ret = dev->handler->set_pr_info(dev, pr_info_str);
+ if (ret < 0) {
+ tcmu_dev_err(dev, "Failed to store PR info with error %d.\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int get_pr_info(struct tcmulib_context *ctx, char *dev_name, char **buf)
+{
+ struct tcmu_device *dev;
+ int i, ret;
+
+ dev = lookup_dev_by_name(ctx, dev_name, &i);
+ if (!dev) {
+ tcmu_err("Could not get PR info, device %s: not found.\n", dev_name);
+ return -ENODEV;
+ }
+
+ if (!dev->handler->get_pr_info) {
+ tcmu_dev_err(dev, "Getting PR info is not supported with this device.\n");
+ return -EOPNOTSUPP;
+ }
+
+
+ ret = dev->handler->get_pr_info(dev, buf);
+ if (ret < 0) {
+ tcmu_dev_err(dev, "Failed to get PR info with error %d.\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd,
struct genl_info *info, void *arg)
{
struct tcmulib_context *ctx = arg;
int ret, reply_cmd, version = info->genlhdr->version;
char buf[32];
+ char *data = NULL;
tcmu_dbg("cmd %d. Got header version %d. Supported %d.\n",
cmd->c_id, info->genlhdr->version, TCMU_NL_VERSION);
@@ -237,6 +322,15 @@ static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd,
reply_cmd = TCMU_CMD_RECONFIG_DEVICE_DONE;
ret = reconfig_device(ctx, buf, info);
break;
+ case TCMU_CMD_GET_PR_INFO:
+ reply_cmd = TCMU_CMD_GET_PR_INFO_DONE;
+ ret = get_pr_info(ctx, buf, &data);
+ break;
+ case TCMU_CMD_SET_PR_INFO:
+ reply_cmd = TCMU_CMD_SET_PR_INFO_DONE;
+ ret = set_pr_info(ctx, buf, info);
+ break;
+
default:
tcmu_err("Unknown netlink command %d. Netlink header received version %d. libtcmu supports %d\n",
cmd->c_id, version, TCMU_NL_VERSION);
@@ -246,7 +340,7 @@ static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd,
if (version > 1)
ret = send_netlink_reply(ctx, reply_cmd,
nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]),
- ret);
+ ret, data);
return ret;
}
diff --git a/libtcmu.h b/libtcmu.h
index ee7f63d..a43bbbe 100644
--- a/libtcmu.h
+++ b/libtcmu.h
@@ -76,6 +76,10 @@ struct tcmulib_handler {
int (*added)(struct tcmu_device *dev);
void (*removed)(struct tcmu_device *dev);
+ int (*set_pr_info)(struct tcmu_device *dev, char *pr_info);
+ int (*get_pr_info)(struct tcmu_device *dev, char **buf);
+
+
void *hm_private; /* private ptr for handler module */
void *connection; /* private, dbus connection for this subtype */
};
diff --git a/main.c b/main.c
index 2e33741..3b12e21 100644
--- a/main.c
+++ b/main.c
@@ -1084,6 +1084,8 @@ int main(int argc, char **argv)
tmp_handler.subtype = (*tmp_r_handler)->subtype;
tmp_handler.cfg_desc = (*tmp_r_handler)->cfg_desc;
tmp_handler.check_config = (*tmp_r_handler)->check_config;
+ tmp_handler.set_pr_info = (*tmp_r_handler)->set_pr_info;
+ tmp_handler.get_pr_info = (*tmp_r_handler)->get_pr_info;
tmp_handler.reconfig = dev_reconfig;
tmp_handler.added = dev_added;
tmp_handler.removed = dev_removed;
diff --git a/rbd.c b/rbd.c
index 0d1ffe9..bc722e1 100644
--- a/rbd.c
+++ b/rbd.c
@@ -78,6 +78,9 @@
#define TCMU_RBD_LOCKER_TAG_FMT "tcmu_tag=%hu,rbd_client=%s"
#define TCMU_RBD_LOCKER_BUF_LEN 256
+#define TCMU_RBD_PR_INFO_MAX_SIZE 8192
+#define TCMU_PR_INFO_KEY "pr_info"
+
struct tcmu_rbd_state {
rados_t cluster;
rados_ioctx_t io_ctx;
@@ -801,6 +804,36 @@ static int tcmu_rbd_check_image_size(struct tcmu_device *dev, uint64_t new_size)
return 0;
}
+static rbd_image_t tcmu_dev_to_image(struct tcmu_device *dev);
+
+static int tcmu_rbd_pr_set(struct tcmu_device *dev, char *buf)
+{
+ int ret = 0;
+
+ rbd_image_t image = tcmu_dev_to_image(dev);
+ ret = rbd_metadata_set(image, TCMU_PR_INFO_KEY, buf);
+
+ return ret;
+}
+
+static int tcmu_rbd_pr_get(struct tcmu_device *dev, char **buf)
+{
+ int ret = 0;
+ size_t len = TCMU_RBD_PR_INFO_MAX_SIZE;
+ rbd_image_t image = tcmu_dev_to_image(dev);
+ char *pr_info_str = malloc(TCMU_RBD_PR_INFO_MAX_SIZE);
+ memset(pr_info_str, 0x0, TCMU_RBD_PR_INFO_MAX_SIZE);
+ if (!pr_info_str) {
+ tcmu_err("Not enough memory for getting PR info.\n");
+ return -ENOMEM;
+ }
+
+ ret = rbd_metadata_get(image, TCMU_PR_INFO_KEY, pr_info_str, &len);
+ *buf = pr_info_str;
+
+ return ret;
+}
+
static int tcmu_rbd_open(struct tcmu_device *dev, bool reopen)
{
rbd_image_info_t image_info;
@@ -904,6 +937,7 @@ static int tcmu_rbd_open(struct tcmu_device *dev, bool reopen)
tcmu_set_dev_write_cache_enabled(dev, 0);
free(dev_cfg_dup);
+ tcmu_rbd_pr_set(dev, "");
return 0;
stop_image:
@@ -1431,6 +1465,8 @@ struct tcmur_handler tcmu_rbd_handler = {
.read = tcmu_rbd_read,
.write = tcmu_rbd_write,
.reconfig = tcmu_rbd_reconfig,
+ .set_pr_info = tcmu_rbd_pr_set,
+ .get_pr_info = tcmu_rbd_pr_get,
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
.flush = tcmu_rbd_flush,
#endif
diff --git a/target_core_user_local.h b/target_core_user_local.h
index 2cdb3e5..aefa688 100644
--- a/target_core_user_local.h
+++ b/target_core_user_local.h
@@ -4,7 +4,7 @@
/* This header will be used by application too */
#include <linux/types.h>
-#include <linux/uio.h>
+//#include <linux/uio.h>
#define TCMU_VERSION "2.0"
@@ -131,9 +131,13 @@ enum tcmu_genl_cmd {
TCMU_CMD_ADDED_DEVICE,
TCMU_CMD_REMOVED_DEVICE,
TCMU_CMD_RECONFIG_DEVICE,
+ TCMU_CMD_GET_PR_INFO,
+ TCMU_CMD_SET_PR_INFO,
TCMU_CMD_ADDED_DEVICE_DONE,
TCMU_CMD_REMOVED_DEVICE_DONE,
TCMU_CMD_RECONFIG_DEVICE_DONE,
+ TCMU_CMD_GET_PR_INFO_DONE,
+ TCMU_CMD_SET_PR_INFO_DONE,
TCMU_CMD_SET_FEATURES,
__TCMU_CMD_MAX,
};
@@ -150,6 +154,7 @@ enum tcmu_genl_attr {
TCMU_ATTR_CMD_STATUS,
TCMU_ATTR_DEVICE_ID,
TCMU_ATTR_SUPP_KERN_CMD_REPLY,
+ TCMU_ATTR_PR_INFO,
__TCMU_ATTR_MAX,
};
#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
diff --git a/tcmu-runner.h b/tcmu-runner.h
index b423fc5..792ba7f 100644
--- a/tcmu-runner.h
+++ b/tcmu-runner.h
@@ -133,6 +133,9 @@ struct tcmur_handler {
* indicating success/failure.
*/
int (*get_lock_tag)(struct tcmu_device *dev, uint16_t *tag);
+ int (*set_pr_info)(struct tcmu_device *dev, char *pr_info);
+ int (*get_pr_info)(struct tcmu_device *dev, char **buf);
+
/*
* internal field, don't touch this
--
2.17.0