Re: discard and barriers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jens, do you think the following patch is still okay for 3.6.36?
It fixes the massive performance regression due to the full barriers
for discard and also makes the barrier removal in 2.6.37 a lot simpler.

---

From: Christoph Hellwig <hch@xxxxxx>
Subject: [PATCH] block: do not send discards as barriers

There is no reason to send discards as barriers.  The rationale for that
is easy:  The current barriers do two things, flushing caches and provide
ordering.  There is no reason for a cache flush before a discard because
the discard doesn't care for ordering vs writes to other regions than
the one it discards, and there is no reason for a cache flush afterwards
as a discard doesn't store data.  The ordering semantics aren't used
currently because all discards are done synchronously and thus we
order explicitly.

Even more until very late in the 2.6.35 cycle we didn't send DISCARD_BARRIER
requests as real hardbarrier but as an elevator only barrier which doesn't
provide the above semantics, and the switch to real barriers causes masssive
performance regressions especially for the swap code.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: linux-2.6/block/blk-lib.c
===================================================================
--- linux-2.6.orig/block/blk-lib.c	2010-08-23 15:16:37.656033352 +0200
+++ linux-2.6/block/blk-lib.c	2010-08-23 15:16:45.913012470 +0200
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_de
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int max_discard_sectors;
 	struct bio *bio;
 	int ret = 0;
@@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_de
 	if (flags & BLKDEV_IFL_SECURE) {
 		if (!blk_queue_secdiscard(q))
 			return -EOPNOTSUPP;
-		type |= DISCARD_SECURE;
+		type |= REQ_SECURE;
 	}
 
 	while (nr_sects && !ret) {
@@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_de
 	bb.wait = &wait;
 	bb.end_io = NULL;
 
-	if (flags & BLKDEV_IFL_BARRIER) {
-		/* issue async barrier before the data */
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
-		if (ret)
-			return ret;
-	}
 submit:
 	ret = 0;
 	while (nr_sects != 0) {
@@ -199,13 +192,6 @@ submit:
 		issued++;
 		submit_bio(WRITE, bio);
 	}
-	/*
-	 * When all data bios are in flight. Send final barrier if requeted.
-	 */
-	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
-					flags & BLKDEV_IFL_WAIT);
-
 
 	if (flags & BLKDEV_IFL_WAIT)
 		/* Wait for bios in-flight */
Index: linux-2.6/fs/btrfs/extent-tree.c
===================================================================
--- linux-2.6.orig/fs/btrfs/extent-tree.c	2010-08-23 15:16:37.677004089 +0200
+++ linux-2.6/fs/btrfs/extent-tree.c	2010-08-23 15:16:45.918004368 +0200
@@ -1696,7 +1696,7 @@ static void btrfs_issue_discard(struct b
 				u64 start, u64 len)
 {
 	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
-			BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+			BLKDEV_IFL_WAIT);
 }
 
 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
Index: linux-2.6/fs/gfs2/rgrp.c
===================================================================
--- linux-2.6.orig/fs/gfs2/rgrp.c	2010-08-23 15:16:37.684004298 +0200
+++ linux-2.6/fs/gfs2/rgrp.c	2010-08-23 15:16:45.927004298 +0200
@@ -854,8 +854,7 @@ static void gfs2_rgrp_send_discards(stru
 				if ((start + nr_sects) != blk) {
 					rv = blkdev_issue_discard(bdev, start,
 							    nr_sects, GFP_NOFS,
-							    BLKDEV_IFL_WAIT |
-							    BLKDEV_IFL_BARRIER);
+							    BLKDEV_IFL_WAIT);
 					if (rv)
 						goto fail;
 					nr_sects = 0;
@@ -870,7 +869,7 @@ start_new_extent:
 	}
 	if (nr_sects) {
 		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
-					 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+					 BLKDEV_IFL_WAIT);
 		if (rv)
 			goto fail;
 	}
Index: linux-2.6/fs/nilfs2/the_nilfs.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/the_nilfs.c	2010-08-23 15:16:37.692003949 +0200
+++ linux-2.6/fs/nilfs2/the_nilfs.c	2010-08-23 15:16:45.928014774 +0200
@@ -775,8 +775,7 @@ int nilfs_discard_segments(struct the_ni
 						   start * sects_per_block,
 						   nblocks * sects_per_block,
 						   GFP_NOFS,
-						   BLKDEV_IFL_WAIT |
-						   BLKDEV_IFL_BARRIER);
+						   BLKDEV_IFL_WAIT);
 			if (ret < 0)
 				return ret;
 			nblocks = 0;
@@ -787,7 +786,7 @@ int nilfs_discard_segments(struct the_ni
 					   start * sects_per_block,
 					   nblocks * sects_per_block,
 					   GFP_NOFS,
-					  BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+					  BLKDEV_IFL_WAIT);
 	return ret;
 }
 
Index: linux-2.6/include/linux/blkdev.h
===================================================================
--- linux-2.6.orig/include/linux/blkdev.h	2010-08-23 15:16:37.700013168 +0200
+++ linux-2.6/include/linux/blkdev.h	2010-08-23 15:16:45.931003739 +0200
@@ -921,11 +921,9 @@ static inline struct request *blk_map_qu
 }
 enum{
 	BLKDEV_WAIT,	/* wait for completion */
-	BLKDEV_BARRIER,	/* issue request with barrier */
 	BLKDEV_SECURE,	/* secure discard */
 };
 #define BLKDEV_IFL_WAIT		(1 << BLKDEV_WAIT)
-#define BLKDEV_IFL_BARRIER	(1 << BLKDEV_BARRIER)
 #define BLKDEV_IFL_SECURE	(1 << BLKDEV_SECURE)
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
 			unsigned long);
@@ -939,7 +937,7 @@ static inline int sb_issue_discard(struc
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
-				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+				    BLKDEV_IFL_WAIT);
 }
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-08-23 15:16:37.709012819 +0200
+++ linux-2.6/include/linux/fs.h	2010-08-23 15:16:45.938005835 +0200
@@ -159,14 +159,6 @@ struct inodes_stat_t {
 #define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_HARDBARRIER)
 
-/*
- * These aren't really reads or writes, they pass down information about
- * parts of device that are now unused by the file system.
- */
-#define DISCARD_NOBARRIER	(WRITE | REQ_DISCARD)
-#define DISCARD_BARRIER		(WRITE | REQ_DISCARD | REQ_HARDBARRIER)
-#define DISCARD_SECURE		(DISCARD_NOBARRIER | REQ_SECURE)
-
 #define SEL_IN		1
 #define SEL_OUT		2
 #define SEL_EX		4
Index: linux-2.6/mm/swapfile.c
===================================================================
--- linux-2.6.orig/mm/swapfile.c	2010-08-23 15:16:37.724015124 +0200
+++ linux-2.6/mm/swapfile.c	2010-08-23 15:16:45.948004996 +0200
@@ -142,7 +142,7 @@ static int discard_swap(struct swap_info
 	if (nr_blocks) {
 		err = blkdev_issue_discard(si->bdev, start_block,
 				nr_blocks, GFP_KERNEL,
-				BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+				BLKDEV_IFL_WAIT);
 		if (err)
 			return err;
 		cond_resched();
@@ -154,7 +154,7 @@ static int discard_swap(struct swap_info
 
 		err = blkdev_issue_discard(si->bdev, start_block,
 				nr_blocks, GFP_KERNEL,
-				BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+				BLKDEV_IFL_WAIT);
 		if (err)
 			break;
 
@@ -193,8 +193,7 @@ static void discard_swap_cluster(struct
 			start_block <<= PAGE_SHIFT - 9;
 			nr_blocks <<= PAGE_SHIFT - 9;
 			if (blkdev_issue_discard(si->bdev, start_block,
-				    nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT |
-							BLKDEV_IFL_BARRIER))
+				    nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT))
 				break;
 		}
 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux