diff -urN oldtree/include/linux/mmzone.h newtree/include/linux/mmzone.h --- oldtree/include/linux/mmzone.h 2006-09-26 15:14:49.000000000 -0400 +++ newtree/include/linux/mmzone.h 2006-09-26 15:16:46.000000000 -0400 @@ -400,7 +400,7 @@ unsigned long *free); unsigned long nr_free_inactive_pages_node(int nid); void build_all_zonelists(void); -void wakeup_kswapd(struct zone *zone, int order); +void wakeup_kswapd(struct zone *zone, int order, struct task_struct *p); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags); diff -urN oldtree/mm/page_alloc.c newtree/mm/page_alloc.c --- oldtree/mm/page_alloc.c 2006-09-26 15:14:49.000000000 -0400 +++ newtree/mm/page_alloc.c 2006-09-26 15:16:46.000000000 -0400 @@ -1069,7 +1069,7 @@ goto got_pg; do { - wakeup_kswapd(*z, order); + wakeup_kswapd(*z, order, p); } while (*(++z)); /* diff -urN oldtree/mm/vmscan.c newtree/mm/vmscan.c --- oldtree/mm/vmscan.c 2006-09-26 15:14:49.000000000 -0400 +++ newtree/mm/vmscan.c 2006-09-26 15:16:46.000000000 -0400 @@ -945,6 +945,39 @@ } /* + * Helper functions to adjust nice level of kswapd, based on the priority of + * the task (p) that called it. If it is already higher priority we do not + * demote its nice level since it is still working on behalf of a higher + * priority task. With kernel threads we leave it at nice 0. + * + * We don't ever run kswapd real time, so if a real time task calls kswapd we + * set it to highest SCHED_NORMAL priority. + */ +static int effective_sc_prio(struct task_struct *p) +{ + if (likely(p->mm)) { + if (rt_task(p)) + return -20; + return task_nice(p); + } + return 0; +} + +static void set_kswapd_nice(struct task_struct *kswapd, struct task_struct *p, + int active) +{ + long nice = effective_sc_prio(p); + + if (task_nice(kswapd) > nice || !active) + set_user_nice(kswapd, nice); +} + +static int sc_priority(struct task_struct *p) +{ + return (DEF_PRIORITY + (DEF_PRIORITY * effective_sc_prio(p) / 40)); +} + +/* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation * request. @@ -1315,6 +1348,7 @@ */ order = new_order; } else { + set_user_nice(tsk, 0); schedule(); order = pgdat->kswapd_max_order; } @@ -1328,9 +1362,10 @@ /* * A zone is low on free memory, so wake its kswapd task to service it. */ -void wakeup_kswapd(struct zone *zone, int order) +void wakeup_kswapd(struct zone *zone, int order, struct task_struct *p) { pg_data_t *pgdat; + int active; if (!populated_zone(zone)) return; @@ -1342,7 +1377,9 @@ pgdat->kswapd_max_order = order; if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) return; - if (!waitqueue_active(&pgdat->kswapd_wait)) + active = waitqueue_active(&pgdat->kswapd_wait); + set_kswapd_nice(pgdat->kswapd, p, active); + if (!active) return; wake_up_interruptible(&pgdat->kswapd_wait); }