Hi, We are running Linux 3.10 + fscache-20130702 + commit bae6b235905ab9dc6659395f7802c1d36fb63f15 from dhowells' git tree and hit the following on several hosts. Attached is a patch we have been using for a few weeks now and it seems to fix it. Have you seen this problem? Is there an alternative fix we could use? Thanks, Shantanu Jun 26 11:26:00 kworker/u24:7 D ffff88053300700e 0 11163 2 0x00000080 5 FAIR 0 0-11 Jun 26 11:26:00 Workqueue: fscache_operation fscache_op_work_func [fscache] Jun 26 11:26:00 ffff8800141410f8 0000000000000002 ffff880014140fd8 ffff88001fa9f0c0 Jun 26 11:26:00 0000000000012cc0 ffff880014141fd8 ffff880014140010 0000000000004000 Jun 26 11:26:00 ffff880014141fd8 0000000000012cc0 ffff8805fdda10c0 ffff88001fa9f0c0 Jun 26 11:26:00 Call Trace: Jun 26 11:26:00 [<ffffffff81073cfc>] ? arch_vtime_task_switch+0x6c/0x90 Jun 26 11:26:00 [<ffffffff81070aa5>] ? finish_task_switch+0xa5/0xf0 Jun 26 11:26:00 [<ffffffff81428862>] ? __schedule+0x442/0xa00 Jun 26 11:26:00 [<ffffffff81428ee9>] schedule+0x29/0x70 Jun 26 11:26:00 [<ffffffffa04e13e5>] __fscache_wait_on_page_write+0x75/0xb0 [fscache] Jun 26 11:26:00 [<ffffffff81062c60>] ? wake_up_bit+0x40/0x40 Jun 26 11:26:00 [<ffffffffa053fed5>] ? nfs_commit_clear_lock+0x25/0x30 [nfs] Jun 26 11:26:00 [<ffffffffa04e15b5>] __fscache_maybe_release_page+0x55/0x1a0 [fscache] Jun 26 11:26:00 [<ffffffffa05442e6>] nfs_fscache_release_page+0x76/0xd0 [nfs] Jun 26 11:26:00 [<ffffffffa0533245>] nfs_release_page+0x55/0xa0 [nfs] Jun 26 11:26:00 [<ffffffff810f5372>] try_to_release_page+0x32/0x60 Jun 26 11:26:00 [<ffffffff811076a3>] shrink_page_list+0x603/0x970 Jun 26 11:26:00 [<ffffffff811064b9>] ? isolate_lru_pages+0xd9/0x1c0 Jun 26 11:26:00 [<ffffffff81107fef>] shrink_inactive_list+0x18f/0x490 Jun 26 11:26:00 [<ffffffff8107308e>] ? try_to_wake_up+0x20e/0x2b0 Jun 26 11:26:00 [<ffffffff811088a9>] shrink_lruvec+0x269/0x470 Jun 26 11:26:00 [<ffffffff81108b5e>] shrink_zone+0xae/0x270 Jun 26 11:26:00 [<ffffffff8110a0e3>] do_try_to_free_pages+0xe3/0x560 Jun 26 11:26:00 [<ffffffff810fa08f>] ? zone_watermark_ok+0x1f/0x30 Jun 26 11:26:00 [<ffffffff8110a75e>] try_to_free_pages+0xce/0x150 Jun 26 11:26:00 [<ffffffff810fede1>] __alloc_pages_nodemask+0x591/0x910 Jun 26 11:26:00 [<ffffffff81036150>] ? flush_tlb_mm_range+0x240/0x240 Jun 26 11:26:01 [<ffffffff81139a9a>] alloc_pages_current+0xba/0x160 Jun 26 11:26:01 [<ffffffff810f6617>] __page_cache_alloc+0xa7/0xc0 Jun 26 11:26:01 [<ffffffff810f686c>] grab_cache_page_write_begin+0x7c/0xe0 Jun 26 11:26:01 [<ffffffffa0060ee9>] ext4_da_write_begin+0x149/0x2d0 [ext4] Jun 26 11:26:01 [<ffffffff810f552e>] generic_file_buffered_write+0x10e/0x280 Jun 26 11:26:01 [<ffffffff81181c2b>] ? __mark_inode_dirty+0x19b/0x280 Jun 26 11:26:01 [<ffffffff810f766f>] __generic_file_aio_write+0x1af/0x3c0 Jun 26 11:26:01 [<ffffffff810f78e5>] generic_file_aio_write+0x65/0xd0 Jun 26 11:26:01 [<ffffffffa00560c2>] ext4_file_write+0x62/0x430 [ext4] Jun 26 11:26:01 [<ffffffff811b6fa3>] ? dquot_initialize+0x13/0x20 Jun 26 11:26:01 [<ffffffff811b6ff3>] ? dquot_file_open+0x43/0x50 Jun 26 11:26:01 [<ffffffffa00555fe>] ? ext4_file_open+0x7e/0x250 [ext4] Jun 26 11:26:01 [<ffffffff8107308e>] ? try_to_wake_up+0x20e/0x2b0 Jun 26 11:26:01 [<ffffffff811586ef>] do_sync_write+0x7f/0xb0 Jun 26 11:26:01 [<ffffffffa0517deb>] cachefiles_write_page+0x13b/0x320 [cachefiles] Jun 26 11:26:01 [<ffffffffa04e06c9>] fscache_write_op+0x149/0x220 [fscache] Jun 26 11:26:01 [<ffffffffa04dee5e>] fscache_op_work_func+0x2e/0x90 [fscache] Jun 26 11:26:01 [<ffffffff8105b949>] process_one_work+0x169/0x4b0 Jun 26 11:26:01 [<ffffffff8105bdb1>] worker_thread+0x121/0x3f0 Jun 26 11:26:01 [<ffffffff81429bbe>] ? _raw_spin_unlock_irqrestore+0xe/0x10 Jun 26 11:26:01 [<ffffffff8105bc90>] ? process_one_work+0x4b0/0x4b0 Jun 26 11:26:01 [<ffffffff8106256e>] kthread+0xce/0xe0 diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 317f9ee..3ed6412 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -945,6 +945,19 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) } else { ret = -EIO; if (file->f_op->write) { + struct address_space *mapping = file->f_mapping; + gfp_t gfp_mask = mapping_gfp_mask(mapping); + gfp_t gfp_mask_orig = gfp_mask; + + /* + * Clear __GFP_FS to avoid potential deadlock + * during memory reclaim. + */ + if (gfp_mask & __GFP_FS) { + gfp_mask &= ~__GFP_FS; + mapping_set_gfp_mask(mapping, gfp_mask); + } + pos = (loff_t) page->index << PAGE_SHIFT; /* we mustn't write more data than we have, so we have @@ -972,6 +985,9 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) file_end_write(file); if (ret != len) ret = -EIO; + + if (gfp_mask != gfp_mask_orig) + mapping_set_gfp_mask(gfp_mask_orig); } fput(file); } -- Linux-cachefs mailing list Linux-cachefs@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/linux-cachefs