The patch titled revert "kswapd should only wait on IO if there is IO" has been added to the -mm tree. Its filename is revert-kswapd-should-only-wait-on-io-if-there-is-io.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/ ------------------------------------------------------ Subject: revert "kswapd should only wait on IO if there is IO" From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Revert commit f1a9ee758de7de1e040de849fdef46e6802ea117 Author: Rik van Riel <riel@xxxxxxxxxx> Date: Thu Feb 7 00:14:08 2008 -0800 kswapd should only wait on IO if there is IO The current kswapd (and try_to_free_pages) code has an oddity where the code will wait on IO, even if there is no IO in flight. This problem is notable especially when the system scans through many unfreeable pages, causing unnecessary stalls in the VM. Additionally, tasks without __GFP_FS or __GFP_IO in the direct reclaim path will sleep if a significant number of pages are encountered that should be written out. This gives kswapd a chance to write out those pages, while the direct reclaim task sleeps. Signed-off-by: Rik van Riel <riel@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Because of large latencies and interactivity problems reported by Carlos, here: http://lkml.org/lkml/2008/3/22/211 Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: "Carlos R. Mafra" <crmafra2@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/vmscan.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff -puN mm/vmscan.c~revert-kswapd-should-only-wait-on-io-if-there-is-io mm/vmscan.c --- a/mm/vmscan.c~revert-kswapd-should-only-wait-on-io-if-there-is-io +++ a/mm/vmscan.c @@ -70,13 +70,6 @@ struct scan_control { int order; - /* - * Pages that have (or should have) IO pending. If we run into - * a lot of these, we're better off waiting a little for IO to - * finish rather than scanning more pages in the VM. - */ - int nr_io_pages; - /* Which cgroup do we reclaim from */ struct mem_cgroup *mem_cgroup; @@ -512,10 +505,8 @@ static unsigned long shrink_page_list(st */ if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) wait_on_page_writeback(page); - else { - sc->nr_io_pages++; + else goto keep_locked; - } } referenced = page_referenced(page, 1, sc->mem_cgroup); @@ -554,10 +545,8 @@ static unsigned long shrink_page_list(st if (PageDirty(page)) { if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced) goto keep_locked; - if (!may_enter_fs) { - sc->nr_io_pages++; + if (!may_enter_fs) goto keep_locked; - } if (!sc->may_writepage) goto keep_locked; @@ -568,10 +557,8 @@ static unsigned long shrink_page_list(st case PAGE_ACTIVATE: goto activate_locked; case PAGE_SUCCESS: - if (PageWriteback(page) || PageDirty(page)) { - sc->nr_io_pages++; + if (PageWriteback(page) || PageDirty(page)) goto keep; - } /* * A synchronous write - probably a ramdisk. Go * ahead and try to reclaim the page. @@ -1344,7 +1331,6 @@ static unsigned long do_try_to_free_page for (priority = DEF_PRIORITY; priority >= 0; priority--) { sc->nr_scanned = 0; - sc->nr_io_pages = 0; if (!priority) disable_swap_token(); nr_reclaimed += shrink_zones(priority, zones, sc); @@ -1379,8 +1365,7 @@ static unsigned long do_try_to_free_page } /* Take a nap, wait for some writeback to complete */ - if (sc->nr_scanned && priority < DEF_PRIORITY - 2 && - sc->nr_io_pages > sc->swap_cluster_max) + if (sc->nr_scanned && priority < DEF_PRIORITY - 2) congestion_wait(WRITE, HZ/10); } /* top priority shrink_caches still had more to do? don't OOM, then */ @@ -1514,7 +1499,6 @@ loop_again: if (!priority) disable_swap_token(); - sc.nr_io_pages = 0; all_zones_ok = 1; /* @@ -1607,8 +1591,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && priority < DEF_PRIORITY - 2 && - sc.nr_io_pages > sc.swap_cluster_max) + if (total_scanned && priority < DEF_PRIORITY - 2) congestion_wait(WRITE, HZ/10); /* _ Patches currently in -mm which might be from akpm@xxxxxxxxxxxxxxxxxxxx are origin.patch mtd-memory-corruption-in-block2mtdc.patch revert-kswapd-should-only-wait-on-io-if-there-is-io.patch lib-swiotlbc-avoid-endless-loops-fix.patch git-x86.patch i386-arch-x86-math-emu-fpu_entryc-warning-fix.patch git-kgdb-light.patch git-acpi-fixup.patch git-acpi-arch-x86-kernel-apm_32c-fix-warning.patch acpi-enable-c3-power-state-on-dell-inspiron-8200.patch git-alsa-tiwai.patch cifs-suppress-warning.patch git-drm.patch git-drm-git-rejects.patch git-drm-drivers-char-drm-ati_pcigartc-fix-printk-warning.patch git-dvb.patch drivers-media-video-pvrusb2-pvrusb2-sysfsc-fix-printk-warnings.patch git-hwmon.patch git-gfs2-nmw.patch git-hrt.patch arch-ia64-kernel-use-time_-macros-checkpatch-fixes.patch git-ieee1394.patch git-infiniband.patch git-infiniband-vs-gregkh-driver-ib-convert-struct-class_device-to-struct-device.patch maple-add-driver-for-sega-dreamcast-controller.patch git-kvm.patch git-leds.patch devres-implement-pcim_iomap_regions_request_all-fix.patch devres-implement-pcim_iomap_regions_request_all-fix-fix.patch git-async-tx.patch drivers-atm-firestreamc-suppress-uninitialized-var-warning.patch wan-new-ppp-code-for-generic-hdlc-checkpatch-fixes.patch drivers-net-bonding-bond_sysfsc-suppress-uninitialized-var-warning.patch drivers-net-arcnet-arcnetc-use-time_-macros-checkpatch-fixes.patch drivers-net-tokenring-3c359c-use-time_-macros-checkpatch-fixes.patch update-smc91x-driver-with-arm-versatile-board-info.patch fs-nfs-callback_xdrc-suppress-uninitialiized-variable-warnings.patch git-nfsd.patch git-ocfs2.patch arch-parisc-kernel-unalignedc-use-time_-macros-checkpatch-fixes.patch drivers-pcmcia-soc_commonc-convert-soc_pcmcia_sockets_lock-into-a-mutex-and-make-it-static.patch git-selinux.patch git-sh.patch git-sh-git-rejects.patch git-scsi-misc-vs-gregkh-driver-driver-core-remove-no-longer-used-struct-class_device.patch git-scsi-misc-vs-gregkh-driver-driver-core-remove-no-longer-used-struct-class_device-fix.patch scsi-aic94xx-cleanups.patch scsi-fix-section-mismatch-in-aic94xx-fix.patch scsi-chc-fix-shadowed-variable-warnings-checkpatch-fixes.patch ipsc-fix-build-warning.patch drivers-scsi-initioc-suppress-compile-warning.patch drivers-scsi-hptiopc-fix-build-warning.patch drivers-scsi-mvsasc-fix-build-warnings.patch scsi-add-iscsi-ibft-support-fix.patch git-block-git-rejects.patch git-unionfs.patch fix-gregkh-usb-usb-ohci-port-reset-paranoia-timeout.patch git-v9fs.patch git-watchdog.patch xfs-suppress-uninitialized-var-warnings.patch git-xtensa.patch git-semaphore-git-rejects.patch remove-sparse-warning-for-mmzoneh-checkpatch-fixes.patch fix-invalidate_inode_pages2_range-to-not-clear-ret-checkpatch-fixes.patch mm-make-mem_map-allocation-continuous-checkpatch-fixes.patch mm-make-early_pfn_to_nid-a-c-function.patch vmalloc-show-vmalloced-areas-via-proc-vmallocinfo-checkpatch-fixes.patch vmallocinfo-add-caller-information-checkpatch-fixes.patch vmscan-give-referenced-active-and-unmapped-pages-a-second-trip-around-the-lru.patch vm-dont-run-touch_buffer-during-buffercache-lookups.patch capabilities-implement-per-process-securebits-warning-fix.patch lsm-introduce-inode_getsecid-and-ipc_getsecid-hooks-checkpatch-fixes.patch lsm-audit-introduce-generic-audit-lsm-hooks-checkpatch-fixes.patch selinux-use-new-audit-hooks-remove-redundant-exports-checkpatch-fixes.patch audit-final-renamings-and-cleanup-checkpatch-fixes.patch arch-alpha-kernel-trapsc-use-time_-macros-fix.patch alpha-teach-the-compiler-that-bug-doesnt-return.patch make-dev-kmem-a-config-option-fix.patch misc-phantom-add-compat-ioctl-checkpatch-fixes.patch sysrq-add-show-backtrace-on-all-cpus-function-checkpatch-fixes.patch sysrq-add-show-backtrace-on-all-cpus-function-checkpatch-fixes-cleanup.patch codafs-fix-build-warning.patch lib-swiotlbc-cleanups.patch r-o-bind-mounts-elevate-write-count-for-callers-of-vfs_mkdir-fix.patch r-o-bind-mounts-elevate-write-count-for-xattr_permission-callers-fix.patch r-o-bind-mounts-get-write-access-for-vfs_rename-callers-fix.patch r-o-bind-mounts-check-mnt-instead-of-superblock-directly-fix.patch r-o-bind-mounts-check-mnt-instead-of-superblock-directly-fix-2.patch r-o-bind-mounts-get-callers-of-vfs_mknod-create-fix.patch fs-inodec-use-hlist_for_each_entry-checkpatch-fixes.patch add-rcu_assign_index-if-ever-needed-fix.patch add-warn_on_secs-macro-simplification-fix.patch uart_get_baud_rate-stop-mangling-termios-fix.patch oprofile-change-cpu_buffer-from-array-to-per_cpu-variable-checkpatch-fixes.patch vt-notifier-extension-for-accessibility-checkpatch-fixes.patch kprobes-prevent-probing-of-preempt_schedule-fix.patch kprobes-prevent-probing-of-preempt_schedule-checkpatch-fixes.patch quota-various-style-cleanups-checkpatch-fixes.patch quota-quota-core-changes-for-quotaon-on-remount-quota-ext3-make-ext3-handle-quotaon-on-remount-checkpatch-fixes.patch quota-quota-core-changes-for-quotaon-on-remount-quota-ext4-make-ext4-handle-quotaon-on-remount-checkpatch-fixes.patch rtc-isl1208-new-style-conversion-and-minor-bug-fixes-checkpatch-fixes.patch rtc-pcf8563-new-style-conversion-checkpatch-fixes.patch rtc-pcf8563-new-style-conversion-checkpatch-fixes-fix.patch rtc-x1205-new-style-conversion-checkpatch-fixes.patch gpiochip_reserve-fix.patch fb-add-support-for-foreign-endianness-force-it-on.patch fbcon-replace-mono_col-macro-with-static-inline-fix.patch pm-gxfb-add-hook-to-pm-console-layer-that-allows-disabling-of-suspend-vt-switch-fix.patch fbdev-driver-for-freescale-8610-and-5121-diu-fix.patch pnp-use-dev_printk-for-quirk-messages-fix.patch fat_valid_media-remove-pointless-test.patch cgroup-api-files-update-cpusets-to-use-cgroup-structured-file-api-fix.patch add-a-document-describing-the-resource-counter-abstraction-v2-fix.patch memcgroup-implement-failcounter-reset-checkpatch-fixes.patch workqueues-shrink-cpu_populated_map-when-cpu-dies-fix.patch ext4-is-broken.patch ipc-use-ipc_buildid-directly-from-ipc_addid-cleanup.patch ipmi-run-to-completion-fixes-checkpatch-fixes.patch ipmi-style-fixes-in-the-system-interface-code-checkpatch-fixes.patch sxc-fix-printk-warnings-on-sparc32.patch elf-fix-shadowed-variables-in-fs-binfmt_elfc.patch sgi-altix-mmtimer-allow-larger-number-of-timers-per-node-fix.patch sgi-altix-mmtimer-allow-larger-number-of-timers-per-node-fix-2.patch epcac-static-functions-and-integer-as-null-pointer-fixes-checkpatch-fixes.patch keys-add-keyctl-function-to-get-a-security-label-fix.patch procfs-task-exe-symlink-fix.patch edd-add-default-mode-config_edd_off=n-override-with-edd=onoff-fix.patch make-module_sect_attrs-private-to-kernel-modulec-checkpatch-fixes.patch mm-bdi-export-bdi-attributes-in-sysfs-ia64-fix.patch basic-braille-screen-reader-support-ppc-fix.patch hfs-fix-warning-with-64k-page_size.patch hfsplus-fix-warning-with-64k-page_size.patch alloc_uid-cleanup.patch add-macros-similar-to-min-max-min_t-max_t.patch clocksource-introduce-clock_monotonic_raw-fix-checkpatch-fixes.patch idr-create-idr_layer_cache-at-boot-time-fix.patch idr-create-idr_layer_cache-at-boot-time-fix-fix.patch edac-add-e752x-parameter-for-sysbus_parity-selection-checkpatch-fixes.patch dma-mapping-ib-expand-ib_umem_get-prototype-fix.patch reiser4.patch jens-broke-reiser4patch-added-to-mm-tree.patch page-owner-tracking-leak-detector.patch nr_blockdev_pages-in_interrupt-warning.patch slab-leaks3-default-y.patch profile-likely-unlikely-macros.patch profile-likely-unlikely-macros-fix.patch put_bh-debug.patch shrink_slab-handle-bad-shrinkers.patch getblk-handle-2tb-devices.patch getblk-handle-2tb-devices-fix.patch undeprecate-pci_find_device.patch i386-arch-x86-math-emu-reg_ld_strc-fix-warning.patch w1-build-fix.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html