[PATCH 01/11] rbd: access snapshot context and mapping size safely

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



These fields may both change while the image is mapped if a snapshot
is created or deleted or the image is resized.  They are guarded by
rbd_dev->header_rwsem, so hold that while reading them, and store a
local copy to refer to outside of the critical section. The local copy
will stay consistent since the snapshot context is reference counted,
and the mapping size is just a u64. This prevents torn loads from
giving us inconsistent values.

Move reading header.snapc into the caller of rbd_img_request_create()
so that we only need to take the semaphore once. The read-only caller,
rbd_parent_request_create() can just pass NULL for snapc, since the
snapshot context is only relevant for writes.

Signed-off-by: Josh Durgin <josh.durgin@xxxxxxxxxxx>
---
 drivers/block/rbd.c |   31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4c95b50..b6676b6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1961,7 +1961,8 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
 static struct rbd_img_request *rbd_img_request_create(
 					struct rbd_device *rbd_dev,
 					u64 offset, u64 length,
-					bool write_request)
+					bool write_request,
+					struct ceph_snap_context *snapc)
 {
 	struct rbd_img_request *img_request;
 
@@ -1969,12 +1970,6 @@ static struct rbd_img_request *rbd_img_request_create(
 	if (!img_request)
 		return NULL;
 
-	if (write_request) {
-		down_read(&rbd_dev->header_rwsem);
-		ceph_get_snap_context(rbd_dev->header.snapc);
-		up_read(&rbd_dev->header_rwsem);
-	}
-
 	img_request->rq = NULL;
 	img_request->rbd_dev = rbd_dev;
 	img_request->offset = offset;
@@ -1982,7 +1977,7 @@ static struct rbd_img_request *rbd_img_request_create(
 	img_request->flags = 0;
 	if (write_request) {
 		img_request_write_set(img_request);
-		img_request->snapc = rbd_dev->header.snapc;
+		img_request->snapc = snapc;
 	} else {
 		img_request->snap_id = rbd_dev->spec->snap_id;
 	}
@@ -2038,8 +2033,8 @@ static struct rbd_img_request *rbd_parent_request_create(
 	rbd_assert(obj_request->img_request);
 	rbd_dev = obj_request->img_request->rbd_dev;
 
-	parent_request = rbd_img_request_create(rbd_dev->parent,
-						img_offset, length, false);
+	parent_request = rbd_img_request_create(rbd_dev->parent, img_offset,
+						length, false, NULL);
 	if (!parent_request)
 		return NULL;
 
@@ -3065,8 +3060,10 @@ static void rbd_request_fn(struct request_queue *q)
 	while ((rq = blk_fetch_request(q))) {
 		bool write_request = rq_data_dir(rq) == WRITE;
 		struct rbd_img_request *img_request;
+		struct ceph_snap_context *snapc = NULL;
 		u64 offset;
 		u64 length;
+		u64 mapping_size;
 
 		/* Ignore any non-FS requests that filter through. */
 
@@ -3120,8 +3117,16 @@ static void rbd_request_fn(struct request_queue *q)
 			goto end_request;	/* Shouldn't happen */
 		}
 
+		down_read(&rbd_dev->header_rwsem);
+		mapping_size = rbd_dev->mapping.size;
+		if (op_type != OBJ_OP_READ) {
+			snapc = rbd_dev->header.snapc;
+			ceph_get_snap_context(snapc);
+		}
+		up_read(&rbd_dev->header_rwsem);
+
 		result = -EIO;
-		if (offset + length > rbd_dev->mapping.size) {
+		if (offset + length > mapping_size) {
 			rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
 				offset, length, rbd_dev->mapping.size);
 			goto end_request;
@@ -3129,7 +3134,7 @@ static void rbd_request_fn(struct request_queue *q)
 
 		result = -ENOMEM;
 		img_request = rbd_img_request_create(rbd_dev, offset, length,
-							write_request);
+							write_request, snapc);
 		if (!img_request)
 			goto end_request;
 
@@ -3147,6 +3152,8 @@ end_request:
 			rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
 				write_request ? "write" : "read",
 				length, offset, result);
+			if (snapc)
+				ceph_put_snap_context(snapc);
 
 			__blk_end_request_all(rq, result);
 		}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux