diff -urN oldtree/arch/ia64/configs/tiger_defconfig newtree/arch/ia64/configs/tiger_defconfig --- oldtree/arch/ia64/configs/tiger_defconfig 2006-04-01 16:23:42.000000000 -0500 +++ newtree/arch/ia64/configs/tiger_defconfig 2006-04-02 06:15:50.352364250 -0400 @@ -105,10 +105,10 @@ # CONFIG_IA64_PAGE_SIZE_64KB is not set CONFIG_PGTABLE_3=y # CONFIG_PGTABLE_4 is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_IA64_CYCLONE=y CONFIG_IOSAPIC=y diff -urN oldtree/arch/ia64/configs/zx1_defconfig newtree/arch/ia64/configs/zx1_defconfig --- oldtree/arch/ia64/configs/zx1_defconfig 2006-04-01 16:23:42.000000000 -0500 +++ newtree/arch/ia64/configs/zx1_defconfig 2006-04-02 06:15:50.356364500 -0400 @@ -103,10 +103,10 @@ # CONFIG_IA64_PAGE_SIZE_64KB is not set CONFIG_PGTABLE_3=y # CONFIG_PGTABLE_4 is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ=100 CONFIG_IA64_L1_CACHE_SHIFT=7 # CONFIG_IA64_CYCLONE is not set CONFIG_IOSAPIC=y diff -urN oldtree/block/Kconfig.iosched newtree/block/Kconfig.iosched --- oldtree/block/Kconfig.iosched 2006-04-01 16:23:42.000000000 -0500 +++ newtree/block/Kconfig.iosched 2006-04-02 06:16:23.310424000 -0400 @@ -40,7 +40,7 @@ choice prompt "Default I/O scheduler" - default DEFAULT_AS + default DEFAULT_DEADLINE help Select the I/O scheduler which will be used by default for all block devices. diff -urN oldtree/include/linux/mm_inline.h newtree/include/linux/mm_inline.h --- oldtree/include/linux/mm_inline.h 2006-04-01 16:23:42.000000000 -0500 +++ newtree/include/linux/mm_inline.h 2006-04-02 06:17:31.622693250 -0400 @@ -21,6 +21,20 @@ } static inline void +add_page_to_inactive_list_tail(struct zone *zone, struct page *page) +{ + list_add_tail(&page->lru, &zone->inactive_list); + zone->nr_inactive++; +} + +static inline void +add_page_to_inactive_list_tail(struct zone *zone, struct page *page) +{ + list_add_tail(&page->lru, &zone->inactive_list); + zone->nr_inactive++; +} + +static inline void del_page_from_active_list(struct zone *zone, struct page *page) { list_del(&page->lru); diff -urN oldtree/include/linux/mmzone.h newtree/include/linux/mmzone.h --- oldtree/include/linux/mmzone.h 2006-04-01 16:23:42.000000000 -0500 +++ newtree/include/linux/mmzone.h 2006-04-02 06:16:27.842707250 -0400 @@ -121,7 +121,7 @@ struct zone { /* Fields commonly accessed by the page allocator */ unsigned long free_pages; - unsigned long pages_min, pages_low, pages_high; + unsigned long pages_min, pages_low, pages_high, pages_lots; /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several diff -urN oldtree/include/linux/swap.h newtree/include/linux/swap.h --- oldtree/include/linux/swap.h 2006-04-01 16:23:42.000000000 -0500 +++ newtree/include/linux/swap.h 2006-04-02 06:17:31.566689750 -0400 @@ -175,7 +175,8 @@ /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zone **, gfp_t); extern unsigned long shrink_all_memory(unsigned long nr_pages); -extern int vm_swappiness; +extern int vm_mapped; +extern int vm_hardmaplimit; extern int remove_mapping(struct address_space *mapping, struct page *page); /* possible outcome of pageout() */ diff -urN oldtree/include/linux/sysctl.h newtree/include/linux/sysctl.h --- oldtree/include/linux/sysctl.h 2006-04-01 16:23:42.000000000 -0500 +++ newtree/include/linux/sysctl.h 2006-04-02 06:17:57.472308750 -0400 @@ -172,7 +172,7 @@ VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ - VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ + VM_MAPPED=19, /* percent mapped min while evicting cache */ VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ @@ -186,7 +186,8 @@ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ - VM_SWAP_PREFETCH=33, /* swap prefetch */ + VM_SWAP_PREFETCH=33, /* swap prefetch */ + VM_HARDMAPLIMIT=34, /* Make mapped a hard limit */ }; diff -urN oldtree/kernel/Kconfig.hz newtree/kernel/Kconfig.hz --- oldtree/kernel/Kconfig.hz 2006-04-01 16:23:42.000000000 -0500 +++ newtree/kernel/Kconfig.hz 2006-04-02 06:16:00.833019250 -0400 @@ -4,7 +4,7 @@ choice prompt "Timer frequency" - default HZ_250 + default HZ_1000 help Allows the configuration of the timer frequency. It is customary to have the timer interrupt run at 1000 HZ but 100 HZ may be more @@ -21,14 +21,17 @@ help 100 HZ is a typical choice for servers, SMP and NUMA systems with lots of processors that may show reduced performance if - too many timer interrupts are occurring. + too many timer interrupts are occurring. Laptops may also show + improved battery life. - config HZ_250 + config HZ_250_NODEFAULT bool "250 HZ" help - 250 HZ is a good compromise choice allowing server performance - while also showing good interactive responsiveness even - on SMP and NUMA systems. + 250 HZ is a lousy compromise choice allowing server interactivity + while also showing desktop throughput and no extra power saving on + laptops. Good for when you can't make up your mind. + + Recommend 100 or 1000 instead. config HZ_1000 bool "1000 HZ" @@ -41,6 +44,6 @@ config HZ int default 100 if HZ_100 - default 250 if HZ_250 + default 250 if HZ_250_NODEFAULT default 1000 if HZ_1000 diff -urN oldtree/kernel/sysctl.c newtree/kernel/sysctl.c --- oldtree/kernel/sysctl.c 2006-04-01 16:23:42.000000000 -0500 +++ newtree/kernel/sysctl.c 2006-04-02 06:17:31.582690750 -0400 @@ -765,16 +765,24 @@ .proc_handler = &proc_dointvec, }, { - .ctl_name = VM_SWAPPINESS, - .procname = "swappiness", - .data = &vm_swappiness, - .maxlen = sizeof(vm_swappiness), + .ctl_name = VM_MAPPED, + .procname = "mapped", + .data = &vm_mapped, + .maxlen = sizeof(vm_mapped), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_HARDMAPLIMIT, + .procname = "hardmaplimit", + .data = &vm_hardmaplimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_HUGETLB_PAGE { .ctl_name = VM_HUGETLB_PAGES, diff -urN oldtree/mm/page_alloc.c newtree/mm/page_alloc.c --- oldtree/mm/page_alloc.c 2006-04-01 16:23:42.000000000 -0500 +++ newtree/mm/page_alloc.c 2006-04-02 06:16:27.862708500 -0400 @@ -1524,6 +1524,7 @@ " min:%lukB" " low:%lukB" " high:%lukB" + " lots:%lukB" " active:%lukB" " inactive:%lukB" " present:%lukB" @@ -1535,6 +1536,7 @@ K(zone->pages_min), K(zone->pages_low), K(zone->pages_high), + K(zone->pages_lots), K(zone->nr_active), K(zone->nr_inactive), K(zone->present_pages), @@ -2319,6 +2321,7 @@ "\n min %lu" "\n low %lu" "\n high %lu" + "\n lots %lu" "\n active %lu" "\n inactive %lu" "\n scanned %lu (a: %lu i: %lu)" @@ -2328,6 +2331,7 @@ zone->pages_min, zone->pages_low, zone->pages_high, + zone->pages_lots, zone->nr_active, zone->nr_inactive, zone->pages_scanned, @@ -2630,6 +2634,7 @@ zone->pages_low = zone->pages_min + tmp / 4; zone->pages_high = zone->pages_min + tmp / 2; + zone->pages_lots = zone->pages_min + tmp; spin_unlock_irqrestore(&zone->lru_lock, flags); } } diff -urN oldtree/mm/vmscan.c newtree/mm/vmscan.c --- oldtree/mm/vmscan.c 2006-04-01 16:23:42.000000000 -0500 +++ newtree/mm/vmscan.c 2006-04-02 06:17:31.618693000 -0400 @@ -95,10 +95,11 @@ #endif /* - * From 0 .. 100. Higher means more swappy. + * From 0 .. 100. Lower means more swappy. */ -int vm_swappiness = 60; -static long total_memory; +int vm_mapped __read_mostly = 66; +int vm_hardmaplimit __read_mostly = 1; +static long total_memory __read_mostly; static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); @@ -744,10 +745,14 @@ * The distress ratio is important - we don't want to start * going oom. * - * A 100% value of vm_swappiness overrides this algorithm - * altogether. + * This distress value is ignored if we apply a hardmaplimit except + * in extreme distress. + * + * A 0% value of vm_mapped overrides this algorithm altogether. */ - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; + swap_tendency = mapped_ratio * 100 / (vm_mapped + 1); + if (!vm_hardmaplimit || distress == 100) + swap_tendency += distress; /* * Now use this metric to decide whether to start moving mapped @@ -1097,6 +1102,7 @@ */ for (i = pgdat->nr_zones - 1; i >= 0; i--) { struct zone *zone = pgdat->node_zones + i; + unsigned long watermark; if (!populated_zone(zone)) continue; @@ -1105,8 +1111,17 @@ priority != DEF_PRIORITY) continue; + /* + * The watermark is relaxed depending on the + * level of "priority" till it drops to + * pages_high. + */ + watermark = zone->pages_high + + (zone->pages_high * priority / + DEF_PRIORITY); + if (!zone_watermark_ok(zone, order, - zone->pages_high, 0, 0)) { + watermark, 0, 0)) { end_zone = i; goto scan; } @@ -1142,8 +1157,11 @@ continue; if (nr_pages == 0) { /* Not software suspend */ + unsigned long watermark = zone->pages_high + + (zone->pages_high * priority / + DEF_PRIORITY); if (!zone_watermark_ok(zone, order, - zone->pages_high, end_zone, 0)) + watermark, end_zone, 0)) all_zones_ok = 0; } zone->temp_priority = priority;