Hi This is a patch for dm-bufio. Changes: * fixed a bug in i/o submission, introduced by someone who changed the code * simplified some constructs * more likely/unlikely hints * dm-bufio.h moved from drivers/md to include/linux * put cond_resched() to loops (it was there originally and then someone deleted it) Mikulas --- drivers/md/Kconfig | 2 drivers/md/dm-bufio.c | 95 ++++++++++++++++++++++++---------------- drivers/md/dm-bufio.h | 110 ----------------------------------------------- include/linux/dm-bufio.h | 110 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 169 insertions(+), 148 deletions(-) Index: linux-3.1-rc3-fast/drivers/md/Kconfig =================================================================== --- linux-3.1-rc3-fast.orig/drivers/md/Kconfig 2011-10-14 20:56:45.000000000 +0200 +++ linux-3.1-rc3-fast/drivers/md/Kconfig 2011-10-14 20:56:46.000000000 +0200 @@ -209,7 +209,7 @@ config DM_DEBUG If unsure, say N. config DM_BUFIO - tristate + tristate "Buffered IO" depends on BLK_DEV_DM && EXPERIMENTAL ---help--- This interface allows you to do buffered I/O on a device and acts Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.c =================================================================== --- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.c 2011-10-14 20:56:45.000000000 +0200 +++ linux-3.1-rc3-fast/drivers/md/dm-bufio.c 2011-10-14 20:57:10.000000000 +0200 @@ -167,6 +167,12 @@ static void dm_bufio_unlock(struct dm_bu mutex_unlock(&c->lock); } +#ifdef CONFIG_PREEMPT +#define dm_bufio_cond_resched() do { } while (0) +#else +#define dm_bufio_cond_resched() cond_resched() +#endif + /*----------------------------------------------------------------*/ /* Default cache size --- available memory divided by the ratio */ @@ -470,17 +476,18 @@ static void use_inline_bio(struct dm_buf ptr = b->data; len = b->c->block_size; - if (len >= PAGE_SIZE) + if (likely(len >= PAGE_SIZE)) BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); else BUG_ON((unsigned long)ptr & (len - 1)); do { - if (!bio_add_page(&b->bio, virt_to_page(ptr), - len < PAGE_SIZE ? len : PAGE_SIZE, - virt_to_phys(ptr) & (PAGE_SIZE - 1))) { + if (unlikely(!bio_add_page(&b->bio, virt_to_page(ptr), + len < PAGE_SIZE ? len : PAGE_SIZE, + virt_to_phys(ptr) & (PAGE_SIZE - 1)))) { BUG_ON(b->c->block_size <= PAGE_SIZE); use_dmio(b, rw, block, end_io); + return; } len -= PAGE_SIZE; @@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf static void submit_io(struct dm_buffer *b, int rw, sector_t block, bio_end_io_t *end_io) { - if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE && - b->data_mode != DATA_MODE_VMALLOC) + if (rw == WRITE && b->c->write_callback) + b->c->write_callback(b); + if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) && + likely(b->data_mode != DATA_MODE_VMALLOC)) use_inline_bio(b, rw, block, end_io); else use_dmio(b, rw, block, end_io); @@ -514,7 +523,7 @@ static void write_endio(struct bio *bio, { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); b->write_error = error; - if (error) { + if (unlikely(error)) { struct dm_bufio_client *c = b->c; (void)cmpxchg(&c->async_write_error, 0, error); } @@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct clear_bit(B_DIRTY, &b->state); wait_on_bit_lock(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); - if (b->c->write_callback) - b->c->write_callback(b); submit_io(b, WRITE, b->block, write_endio); } @@ -572,7 +579,7 @@ static void __make_buffer_clean(struct d /* * Find some buffer that is not held by anybody, clean it, unlink it and - * return it. If "wait" is zero, try less hard and don't block. + * return it. */ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) { @@ -585,6 +592,7 @@ static struct dm_buffer *__get_unclaimed __unlink_buffer(b); return b; } + dm_bufio_cond_resched(); } list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) { @@ -594,6 +602,7 @@ static struct dm_buffer *__get_unclaimed __unlink_buffer(b); return b; } + dm_bufio_cond_resched(); } return NULL; } @@ -636,18 +645,21 @@ static struct dm_buffer *__alloc_buffer_ * This is useful for debugging. When we set cache size to 1, * no new buffers are allocated at all. */ - if (dm_bufio_cache_size_latch != 1) { + if (likely(dm_bufio_cache_size_latch != 1)) { /* - * dm-bufio is resistant to allocation failures (it just keeps - * one buffer reserved in cases all the allocations fail). + * dm-bufio is resistant to allocation failures (it + * just keeps one buffer reserved in cases all the + * allocations fail). * So set flags to not try too hard: * GFP_NOIO: don't recurse into the I/O layer - * __GFP_NORETRY: don't retry and rather return failure + * __GFP_NORETRY: don't retry and rather return + * failure * __GFP_NOMEMALLOC: don't use emergency reserves - * __GFP_NOWARN: don't print a warning in case of failure + * __GFP_NOWARN: don't print a warning in case of + * failure */ b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (b) + if (likely(b != NULL)) return b; } @@ -670,7 +682,7 @@ static struct dm_buffer *__alloc_buffer_ static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c) { struct dm_buffer *b = __alloc_buffer_wait_no_callback(c); - if (b && c->alloc_callback) + if (c->alloc_callback) c->alloc_callback(b); return b; } @@ -682,7 +694,7 @@ static void __free_buffer_wake(struct dm { struct dm_bufio_client *c = b->c; - if (c->need_reserved_buffers) { + if (unlikely(c->need_reserved_buffers != 0)) { list_add(&b->lru_list, &c->reserved_buffers); c->need_reserved_buffers--; } else @@ -704,6 +716,7 @@ static void __write_dirty_buffers_async( if (no_wait && test_bit(B_WRITING, &b->state)) return; __write_dirty_buffer(b); + dm_bufio_cond_resched(); } } @@ -716,7 +729,7 @@ static void __get_memory_limit(struct dm { unsigned long buffers; - if (dm_bufio_cache_size != dm_bufio_cache_size_latch) { + if (unlikely(dm_bufio_cache_size != dm_bufio_cache_size_latch)) { mutex_lock(&dm_bufio_clients_lock); __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); @@ -724,7 +737,7 @@ static void __get_memory_limit(struct dm buffers = dm_bufio_cache_size_per_client >> (c->sectors_per_block_bits + SECTOR_SHIFT); - if (buffers < DM_BUFIO_MIN_BUFFERS) + if (unlikely(buffers < DM_BUFIO_MIN_BUFFERS)) buffers = DM_BUFIO_MIN_BUFFERS; *limit_buffers = buffers; *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100; @@ -747,6 +760,7 @@ static void __check_watermark(struct dm_ if (!b) return; __free_buffer_wake(b); + dm_bufio_cond_resched(); } if (c->n_buffers[LIST_DIRTY] > threshold_buffers) __write_dirty_buffers_async(c, 1); @@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d struct dm_buffer *b; struct hlist_node *hn; hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) { - if (b->block == block) + if (likely(b->block == block)) return b; + dm_bufio_cond_resched(); } return NULL; @@ -775,12 +790,13 @@ enum new_flag { NF_GET = 2 }; -static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf, - struct dm_buffer **bp, int *need_submit) +static void read_endio(struct bio *bio, int error); + +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, + enum new_flag nf, struct dm_buffer **bp) { struct dm_buffer *b, *new_b = NULL; - *need_submit = 0; b = __find(c, block); if (b) { b->hold_count++; @@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str } b->state = 1 << B_READING; - *need_submit = 1; + + submit_io(b, READ, b->block, read_endio); + return b; } @@ -849,22 +867,18 @@ static void read_endio(struct bio *bio, static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf, struct dm_buffer **bp) { - int need_submit; struct dm_buffer *b; dm_bufio_lock(c); - b = __bufio_new(c, block, nf, bp, &need_submit); + b = __bufio_new(c, block, nf, bp); dm_bufio_unlock(c); - if (!b || IS_ERR(b)) + if (unlikely(!b) || unlikely(IS_ERR(b))) return b; else { - if (need_submit) - submit_io(b, READ, b->block, read_endio); - wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); - if (b->read_error != 0) { + if (unlikely(b->read_error != 0)) { int error = b->read_error; dm_bufio_release(b); return ERR_PTR(error); @@ -904,7 +918,7 @@ void dm_bufio_release(struct dm_buffer * BUG_ON(test_bit(B_READING, &b->state)); BUG_ON(!b->hold_count); b->hold_count--; - if (!b->hold_count) { + if (likely(!b->hold_count)) { wake_up(&c->free_buffer_wait); /* @@ -912,7 +926,7 @@ void dm_bufio_release(struct dm_buffer * * to be written, free the buffer. There is no point in caching * invalid buffer. */ - if ((b->read_error || b->write_error) && + if (unlikely((b->read_error | b->write_error) != 0) && !test_bit(B_WRITING, &b->state) && !test_bit(B_DIRTY, &b->state)) { __unlink_buffer(b); @@ -999,15 +1013,19 @@ again: * someone is doing some writes simultaneously with us --- in * this case, stop dropping the lock. */ + dm_bufio_cond_resched(); if (dropped_lock) goto again; } wake_up(&c->free_buffer_wait); dm_bufio_unlock(c); - a = xchg(&c->async_write_error, 0); + if (likely(!c->async_write_error)) + a = 0; + else + a = xchg(&c->async_write_error, 0); f = dm_bufio_issue_flush(c); - if (a) + if (unlikely(a != 0)) return a; return f; } @@ -1071,7 +1089,7 @@ retry: BUG_ON(!b->hold_count); BUG_ON(test_bit(B_READING, &b->state)); __write_dirty_buffer(b); - if (b->hold_count == 1) { + if (likely(b->hold_count == 1)) { wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); set_bit(B_DIRTY, &b->state); @@ -1193,6 +1211,7 @@ static void __scan(struct dm_bufio_clien if (!--nr_to_scan) return; } + dm_bufio_cond_resched(); } } } @@ -1406,9 +1425,11 @@ static void cleanup_old_buffers(void) struct dm_buffer, lru_list); if (__cleanup_old_buffer(b, 0, max_age)) break; + dm_bufio_cond_resched(); } dm_bufio_unlock(c); + dm_bufio_cond_resched(); } mutex_unlock(&dm_bufio_clients_lock); } Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.h =================================================================== --- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.h 2011-10-14 20:56:45.000000000 +0200 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,110 +0,0 @@ -/* - * Copyright (C) 2009-2011 2011 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#ifndef DM_BUFIO_H -#define DM_BUFIO_H - -#include <linux/blkdev.h> -#include <linux/types.h> - -/*----------------------------------------------------------------*/ - -struct dm_bufio_client; -struct dm_buffer; - -/* - * Create a buffered IO cache on a given device - */ -struct dm_bufio_client * -dm_bufio_client_create(struct block_device *bdev, unsigned block_size, - unsigned reserved_buffers, unsigned aux_size, - void (*alloc_callback)(struct dm_buffer *), - void (*write_callback)(struct dm_buffer *)); - -/* - * Release a buffered IO cache. - */ -void dm_bufio_client_destroy(struct dm_bufio_client *c); - -/* - * WARNING: to avoid deadlocks, these conditions are observed: - * - * - At most one thread can hold at most "reserved_buffers" simultaneously. - * - Each other threads can hold at most one buffer. - * - Threads which call only dm_bufio_get can hold unlimited number of - * buffers. - */ - -/* - * Read a given block from disk. Returns pointer to data. Returns a - * pointer to dm_buffer that can be used to release the buffer or to make - * it dirty. - */ -void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, - struct dm_buffer **bp); - -/* - * Like dm_bufio_read, but return buffer from cache, don't read - * it. If the buffer is not in the cache, return NULL. - */ -void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, - struct dm_buffer **bp); - -/* - * Like dm_bufio_read, but don't read anything from the disk. It is - * expected that the caller initializes the buffer and marks it dirty. - */ -void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, - struct dm_buffer **bp); - -/* - * Release a reference obtained with dm_bufio_{read,get,new}. The data - * pointer and dm_buffer pointer is no longer valid after this call. - */ -void dm_bufio_release(struct dm_buffer *b); - -/* - * Mark a buffer dirty. It should be called after the buffer is modified. - * - * In case of memory pressure, the buffer may be written after - * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. So - * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but - * the actual writing may occur earlier. - */ -void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); - -/* - * Initiate writing of dirty buffers, without waiting for completion. - */ -void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c); - -/* - * Write all dirty buffers. Guarantees that all dirty buffers created prior - * to this call are on disk when this call exits. - */ -int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c); - -/* - * Send an empty write barrier to the device to flush hardware disk cache. - */ -int dm_bufio_issue_flush(struct dm_bufio_client *c); - -/* - * Like dm_bufio_release but also move the buffer to the new - * block. dm_bufio_write_dirty_buffers is needed to commit the new block. - */ -void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block); - -unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); -sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); -sector_t dm_bufio_get_block_number(struct dm_buffer *b); -void *dm_bufio_get_block_data(struct dm_buffer *b); -void *dm_bufio_get_aux_data(struct dm_buffer *b); -struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b); - -/*----------------------------------------------------------------*/ - -#endif Index: linux-3.1-rc3-fast/include/linux/dm-bufio.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-3.1-rc3-fast/include/linux/dm-bufio.h 2011-10-14 20:56:46.000000000 +0200 @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2009-2011 2011 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_BUFIO_H +#define DM_BUFIO_H + +#include <linux/blkdev.h> +#include <linux/types.h> + +/*----------------------------------------------------------------*/ + +struct dm_bufio_client; +struct dm_buffer; + +/* + * Create a buffered IO cache on a given device + */ +struct dm_bufio_client * +dm_bufio_client_create(struct block_device *bdev, unsigned block_size, + unsigned reserved_buffers, unsigned aux_size, + void (*alloc_callback)(struct dm_buffer *), + void (*write_callback)(struct dm_buffer *)); + +/* + * Release a buffered IO cache. + */ +void dm_bufio_client_destroy(struct dm_bufio_client *c); + +/* + * WARNING: to avoid deadlocks, these conditions are observed: + * + * - At most one thread can hold at most "reserved_buffers" simultaneously. + * - Each other threads can hold at most one buffer. + * - Threads which call only dm_bufio_get can hold unlimited number of + * buffers. + */ + +/* + * Read a given block from disk. Returns pointer to data. Returns a + * pointer to dm_buffer that can be used to release the buffer or to make + * it dirty. + */ +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp); + +/* + * Like dm_bufio_read, but return buffer from cache, don't read + * it. If the buffer is not in the cache, return NULL. + */ +void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp); + +/* + * Like dm_bufio_read, but don't read anything from the disk. It is + * expected that the caller initializes the buffer and marks it dirty. + */ +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp); + +/* + * Release a reference obtained with dm_bufio_{read,get,new}. The data + * pointer and dm_buffer pointer is no longer valid after this call. + */ +void dm_bufio_release(struct dm_buffer *b); + +/* + * Mark a buffer dirty. It should be called after the buffer is modified. + * + * In case of memory pressure, the buffer may be written after + * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. So + * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but + * the actual writing may occur earlier. + */ +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); + +/* + * Initiate writing of dirty buffers, without waiting for completion. + */ +void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c); + +/* + * Write all dirty buffers. Guarantees that all dirty buffers created prior + * to this call are on disk when this call exits. + */ +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c); + +/* + * Send an empty write barrier to the device to flush hardware disk cache. + */ +int dm_bufio_issue_flush(struct dm_bufio_client *c); + +/* + * Like dm_bufio_release but also move the buffer to the new + * block. dm_bufio_write_dirty_buffers is needed to commit the new block. + */ +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block); + +unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); +sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +sector_t dm_bufio_get_block_number(struct dm_buffer *b); +void *dm_bufio_get_block_data(struct dm_buffer *b); +void *dm_bufio_get_aux_data(struct dm_buffer *b); +struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b); + +/*----------------------------------------------------------------*/ + +#endif -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel