Index: linux-2.6.11-ck3/include/linux/swap.h =================================================================== --- linux-2.6.11-ck3.orig/include/linux/swap.h 2004-12-25 10:14:51.000000000 +1100 +++ linux-2.6.11-ck3/include/linux/swap.h 2005-03-26 11:22:12.000000000 +1100 @@ -174,7 +174,8 @@ extern void swap_setup(void); /* linux/mm/vmscan.c */ extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); extern int shrink_all_memory(int); -extern int vm_swappiness; +extern int vm_mapped; +extern int vm_hardmaplimit; #ifdef CONFIG_MMU /* linux/mm/shmem.c */ Index: linux-2.6.11-ck3/include/linux/sysctl.h =================================================================== --- linux-2.6.11-ck3.orig/include/linux/sysctl.h 2005-03-26 11:22:11.000000000 +1100 +++ linux-2.6.11-ck3/include/linux/sysctl.h 2005-03-26 11:22:12.000000000 +1100 @@ -161,7 +161,7 @@ enum VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ - VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ + VM_MAPPED=19, /* percent mapped min while evicting cache */ VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ @@ -171,7 +171,8 @@ enum VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ -}; + VM_HARDMAPLIMIT=29, /* Make mapped a hard limit */ + }; /* CTL_NET names: */ Index: linux-2.6.11-ck3/kernel/sysctl.c =================================================================== --- linux-2.6.11-ck3.orig/kernel/sysctl.c 2005-03-26 11:22:11.000000000 +1100 +++ linux-2.6.11-ck3/kernel/sysctl.c 2005-03-26 11:22:12.000000000 +1100 @@ -729,16 +729,24 @@ static ctl_table vm_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = VM_SWAPPINESS, - .procname = "swappiness", - .data = &vm_swappiness, - .maxlen = sizeof(vm_swappiness), + .ctl_name = VM_MAPPED, + .procname = "mapped", + .data = &vm_mapped, + .maxlen = sizeof(vm_mapped), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_HARDMAPLIMIT, + .procname = "hardmaplimit", + .data = &vm_hardmaplimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_HUGETLB_PAGE { .ctl_name = VM_HUGETLB_PAGES, Index: linux-2.6.11-ck3/mm/vmscan.c =================================================================== --- linux-2.6.11-ck3.orig/mm/vmscan.c 2005-03-02 19:30:30.000000000 +1100 +++ linux-2.6.11-ck3/mm/vmscan.c 2005-03-26 11:22:12.000000000 +1100 @@ -115,10 +115,8 @@ struct shrinker { #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0) #endif -/* - * From 0 .. 100. Higher means more swappy. - */ -int vm_swappiness = 60; +int vm_mapped = 66; +int vm_hardmaplimit = 1; static long total_memory; static LIST_HEAD(shrinker_list); @@ -699,10 +697,14 @@ refill_inactive_zone(struct zone *zone, * doesn't necessarily mean that page reclaim isn't succeeding. * * The distress ratio is important - we don't want to start going oom. + * This distress value is ignored if we apply a hardmaplimit except + * in extreme distress. * - * A 100% value of vm_swappiness overrides this algorithm altogether. + * A 0% value of vm_mapped overrides this algorithm altogether. */ - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; + swap_tendency = mapped_ratio * 100 / (vm_mapped + 1); + if (!vm_hardmaplimit || distress == 100) + swap_tendency += distress; /* * Now use this metric to decide whether to start moving mapped memory @@ -1007,6 +1009,7 @@ loop_again: */ for (i = pgdat->nr_zones - 1; i >= 0; i--) { struct zone *zone = pgdat->node_zones + i; + unsigned long watermark = zone->pages_high; if (zone->present_pages == 0) continue; @@ -1015,8 +1018,16 @@ loop_again: priority != DEF_PRIORITY) continue; + /* + * The watermark is relaxed depending on the + * level of "priority" till it drops to + * pages_high. + */ + watermark += (zone->pages_high * priority / + DEF_PRIORITY); + if (!zone_watermark_ok(zone, order, - zone->pages_high, 0, 0, 0)) { + watermark, 0, 0, 0)) { end_zone = i; goto scan; }