On 23.03.22 14:49, Xiaoguang Wang wrote:
Currently tcmu_vma_fault() uses udev->cmdr_lock to avoid concurrent
find_free_blocks(), which unmaps idle pages and truncates them. This
work is really like many filesystem's truncate operations, but they
use address_space->invalidate_lock to protect race.
This patch replaces cmdr_lock with address_space->invalidate_lock in
tcmu fault procedure, which will also make page-fault have concurrency.
Page fault happens only once for each allocated data page. After the
page fault was processed, the page is re-used for many cmds without
further page faults.
Of course, if find_free_blocks released a page and tcmu after that
allocates a new one, another page fault will occur. But that is a
very rare case.
So, what is the advantage of concurrency here?
OTOH, I don't like the switch from tcmu's cmdr_lock to
filemap_invalidate_* without a real technical reason.
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@xxxxxxxxxxxxxxxxx>
---
drivers/target/target_core_user.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9196188504ec..81bfa553cc67 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1820,13 +1820,14 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma)
static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
{
+ struct address_space *mapping = udev->inode->i_mapping;
struct page *page;
- mutex_lock(&udev->cmdr_lock);
+ filemap_invalidate_lock_shared(mapping);
page = xa_load(&udev->data_pages, dpi);
if (likely(page)) {
get_page(page);
- mutex_unlock(&udev->cmdr_lock);
+ filemap_invalidate_unlock_shared(mapping);
return page;
}
@@ -1836,7 +1837,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
*/
pr_err("Invalid addr to data page mapping (dpi %u) on device %s\n",
dpi, udev->name);
- mutex_unlock(&udev->cmdr_lock);
+ filemap_invalidate_unlock_shared(mapping);
return NULL;
}
@@ -3116,6 +3117,7 @@ static void find_free_blocks(void)
loff_t off;
u32 pages_freed, total_pages_freed = 0;
u32 start, end, block, total_blocks_freed = 0;
+ struct address_space *mapping;
if (atomic_read(&global_page_count) <= tcmu_global_max_pages)
return;
@@ -3139,6 +3141,8 @@ static void find_free_blocks(void)
continue;
}
+ mapping = udev->inode->i_mapping;
+ filemap_invalidate_lock(mapping);
end = udev->dbi_max + 1;
block = find_last_bit(udev->data_bitmap, end);
if (block == udev->dbi_max) {
@@ -3146,6 +3150,7 @@ static void find_free_blocks(void)
* The last bit is dbi_max, so it is not possible
* reclaim any blocks.
*/
+ filemap_invalidate_unlock(mapping);
mutex_unlock(&udev->cmdr_lock);
continue;
} else if (block == end) {
@@ -3159,10 +3164,11 @@ static void find_free_blocks(void)
/* Here will truncate the data area from off */
off = udev->data_off + (loff_t)start * udev->data_blk_size;
- unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
+ unmap_mapping_range(mapping, off, 0, 1);
/* Release the block pages */
pages_freed = tcmu_blocks_release(udev, start, end - 1);
+ filemap_invalidate_unlock(mapping);
mutex_unlock(&udev->cmdr_lock);
total_pages_freed += pages_freed;