So instead of cracking my head on the relaxed barriers I've decided to do the easiet part first. That is relaxing explicit cache flushes done by blkdev_issue_flush. These days these are handled as an empty barrier, which is completely overkill. Instead take advantage of the way we now handle flushes, that is as REQ_FLUSH FS requests. Do a few updates to the block layer so that we handle REQ_FLUSH correctly and we can make blkdev_issue_flush submit them directly. All request based block drivers should just work with it, but bio based remappers will need some additional work. The next patch will do this for DM, but I haven't quite grasped the barrier code in MD yet. Despite doing a lot REQ_HARDBARRIER tests DRBD doesn't actually advertize any ordered mode so it's not affected. The barrier handling in the loop driver is currently broken anyway, and I'm still undecided if I want to fix it before or after this conversion. Index: linux-2.6/block/blk-barrier.c =================================================================== --- linux-2.6.orig/block/blk-barrier.c 2010-08-03 20:26:50.259005954 +0200 +++ linux-2.6/block/blk-barrier.c 2010-08-03 20:33:39.580266216 +0200 @@ -151,25 +151,7 @@ static inline bool start_ordered(struct q->ordered = q->next_ordered; q->ordseq |= QUEUE_ORDSEQ_STARTED; - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) { - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - /* - * Empty barrier on a write-through device w/ ordered - * tag has no command to issue and without any command - * to issue, ordering by tag can't be used. Drain - * instead. - */ - if ((q->ordered & QUEUE_ORDERED_BY_TAG) && - !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { - q->ordered &= ~QUEUE_ORDERED_BY_TAG; - q->ordered |= QUEUE_ORDERED_BY_DRAIN; - } - } + BUG_ON(!blk_rq_sectors(rq)); /* stash away the original request */ blk_dequeue_request(rq); @@ -311,6 +293,9 @@ int blkdev_issue_flush(struct block_devi if (!q) return -ENXIO; + if (!(q->next_ordered & QUEUE_ORDERED_DO_PREFLUSH)) + return 0; + /* * some block devices may not have their queue correctly set up here * (e.g. loop device without a backing file) and so issuing a flush @@ -327,7 +312,7 @@ int blkdev_issue_flush(struct block_devi bio->bi_private = &wait; bio_get(bio); - submit_bio(WRITE_BARRIER, bio); + submit_bio(WRITE_SYNC | REQ_FLUSH, bio); if (test_bit(BLKDEV_WAIT, &flags)) { wait_for_completion(&wait); /* Index: linux-2.6/block/elevator.c =================================================================== --- linux-2.6.orig/block/elevator.c 2010-08-03 20:26:50.268024322 +0200 +++ linux-2.6/block/elevator.c 2010-08-03 20:32:11.949256478 +0200 @@ -423,7 +423,8 @@ void elv_dispatch_sort(struct request_qu q->nr_sorted--; boundary = q->end_sector; - stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED; + stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED | \ + REQ_FLUSH; list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); Index: linux-2.6/include/linux/bio.h =================================================================== --- linux-2.6.orig/include/linux/bio.h 2010-08-03 20:26:50.298255570 +0200 +++ linux-2.6/include/linux/bio.h 2010-08-03 20:46:48.367257736 +0200 @@ -153,6 +153,7 @@ enum rq_flag_bits { __REQ_META, /* metadata io request */ __REQ_DISCARD, /* request to discard sectors */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_FLUSH, /* request for cache flush */ /* bio only flags */ __REQ_UNPLUG, /* unplug the immediately after submission */ @@ -174,7 +175,6 @@ enum rq_flag_bits { __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ - __REQ_FLUSH, /* request for cache flush */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_NR_BITS, /* stops here */ @@ -189,12 +189,13 @@ enum rq_flag_bits { #define REQ_META (1 << __REQ_META) #define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_NOIDLE (1 << __REQ_NOIDLE) +#define REQ_FLUSH (1 << __REQ_FLUSH) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META| REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH) #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) @@ -214,7 +215,6 @@ enum rq_flag_bits { #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) -#define REQ_FLUSH (1 << __REQ_FLUSH) #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) Index: linux-2.6/include/linux/blkdev.h =================================================================== --- linux-2.6.orig/include/linux/blkdev.h 2010-08-03 20:26:50.311003929 +0200 +++ linux-2.6/include/linux/blkdev.h 2010-08-03 20:32:11.956036684 +0200 @@ -589,7 +589,8 @@ static inline void blk_clear_queue_full( * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ Index: linux-2.6/block/blk-core.c =================================================================== --- linux-2.6.orig/block/blk-core.c 2010-08-03 20:26:50.275003649 +0200 +++ linux-2.6/block/blk-core.c 2010-08-03 20:32:11.960004138 +0200 @@ -1203,7 +1203,7 @@ static int __make_request(struct request const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if ((bio->bi_rw & REQ_HARDBARRIER) && + if ((bio->bi_rw & (REQ_HARDBARRIER|REQ_FLUSH)) && (q->next_ordered == QUEUE_ORDERED_NONE)) { bio_endio(bio, -EOPNOTSUPP); return 0; @@ -1217,7 +1217,7 @@ static int __make_request(struct request spin_lock_irq(q->queue_lock); - if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) + if ((bio->bi_rw & (REQ_HARDBARRIER|REQ_FLUSH)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html