diff -urN oldtree/include/linux/mmzone.h newtree/include/linux/mmzone.h --- oldtree/include/linux/mmzone.h 2006-09-26 15:17:19.000000000 -0400 +++ newtree/include/linux/mmzone.h 2006-09-26 15:27:53.000000000 -0400 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -381,6 +382,7 @@ wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; + struct timer_list watermark_timer; } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) diff -urN oldtree/mm/vmscan.c newtree/mm/vmscan.c --- oldtree/mm/vmscan.c 2006-09-26 15:27:33.000000000 -0400 +++ newtree/mm/vmscan.c 2006-09-26 15:27:53.000000000 -0400 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1292,6 +1293,8 @@ return nr_reclaimed; } +#define WT_EXPIRY (HZ * 5) /* Time to wakeup watermark_timer */ + /* * The background pageout daemon, started as a kernel thread * from the init process. @@ -1341,6 +1344,8 @@ try_to_freeze(); + /* kswapd has been busy so delay watermark_timer */ + mod_timer(&pgdat->watermark_timer, jiffies + WT_EXPIRY); prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); new_order = pgdat->kswapd_max_order; pgdat->kswapd_max_order = 0; @@ -1568,20 +1573,57 @@ #endif /* CONFIG_HOTPLUG_CPU */ /* + * We wake up kswapd every WT_EXPIRY till free ram is above pages_lots + */ +static void watermark_wakeup(unsigned long data) +{ + pg_data_t *pgdat = (pg_data_t *)data; + struct timer_list *wt = &pgdat->watermark_timer; + int i; + + if (!waitqueue_active(&pgdat->kswapd_wait) || above_background_load()) + goto out; + for (i = pgdat->nr_zones - 1; i >= 0; i--) { + struct zone *z = pgdat->node_zones + i; + + if (!populated_zone(z) || is_highmem(z)) { + /* We are better off leaving highmem full */ + continue; + } + if (!zone_watermark_ok(z, 0, z->pages_lots, 0, 0)) { + wake_up_interruptible(&pgdat->kswapd_wait); + goto out; + } + } +out: + mod_timer(wt, jiffies + WT_EXPIRY); + return; +} + +/* * This kswapd start function will be called by init and node-hot-add. * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. */ int kswapd_run(int nid) { pg_data_t *pgdat = NODE_DATA(nid); + struct timer_list *wt; int ret = 0; if (pgdat->kswapd) return 0; + wt = &pgdat->watermark_timer; + init_timer(wt); + wt->data = (unsigned long)pgdat; + wt->function = watermark_wakeup; + wt->expires = jiffies + WT_EXPIRY; + add_timer(wt); + pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ + del_timer(wt); BUG_ON(system_state == SYSTEM_BOOTING); printk("Failed to start kswapd on node %d\n",nid); ret = -1;