diff -urN oldtree/Documentation/sysctl/kernel.txt newtree/Documentation/sysctl/kernel.txt --- oldtree/Documentation/sysctl/kernel.txt 2006-10-05 15:26:55.000000000 -0400 +++ newtree/Documentation/sysctl/kernel.txt 2006-10-06 16:15:41.000000000 -0400 @@ -18,6 +18,7 @@ show up in /proc/sys/kernel: - acpi_video_flags - acct +- compute - core_pattern - core_uses_pid - ctrl-alt-del @@ -25,6 +26,8 @@ - domainname - hostname - hotplug +- interactive +- iso_cpu - java-appletviewer [ binfmt_java, obsolete ] - java-interpreter [ binfmt_java, obsolete ] - l2cr [ PPC only ] @@ -83,6 +86,16 @@ ============================================================== +compute: (Staircase only) + +This flag controls the long timeslice, delayed preemption mode in the +cpu scheduler suitable for scientific computation applications. It +leads to large latencies so is unsuitable for normal usage. + +Disabled by default. + +============================================================== + core_pattern: core_pattern is used to specify a core dumpfile pattern name. @@ -160,6 +173,23 @@ ============================================================== +interactive: (Staircase only) + +This flag controls the allocation of dynamic priorities in the cpu +scheduler. It gives low cpu using tasks high priority for lowest +latencies. Nice value is still observed but stricter cpu proportions +are obeyed if this tunable is disabled. Enabled by default. + +============================================================== + +iso_cpu: + +This sets the percentage cpu that the unprivileged SCHED_ISO tasks can +run effectively at realtime priority, averaged over a rolling 3 seconds. +Set to 80% by default. + +============================================================== + l2cr: (PPC only) This flag controls the L2 cache of G3 processor boards. If diff -urN oldtree/include/linux/init_task.h newtree/include/linux/init_task.h --- oldtree/include/linux/init_task.h 2006-10-06 13:38:03.000000000 -0400 +++ newtree/include/linux/init_task.h 2006-10-06 16:21:44.000000000 -0400 @@ -90,7 +90,62 @@ /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) + * + * Multiple functions for the multiple schedulers, same thing: */ +#ifdef CONFIG_INGOSCHED +#define INIT_TASK(tsk) \ +{ \ + .state = 0, \ + .thread_info = &init_thread_info, \ + .usage = ATOMIC_INIT(2), \ + .flags = 0, \ + .lock_depth = -1, \ + .prio = MAX_PRIO-20, \ + .static_prio = MAX_PRIO-20, \ + .normal_prio = MAX_PRIO-20, \ + .policy = SCHED_NORMAL, \ + .cpus_allowed = CPU_MASK_ALL, \ + .mm = NULL, \ + .active_mm = &init_mm, \ + .run_list = LIST_HEAD_INIT(tsk.run_list), \ + .ioprio = 0, \ + .time_slice = HZ, \ + .tasks = LIST_HEAD_INIT(tsk.tasks), \ + .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ + .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ + .real_parent = &tsk, \ + .parent = &tsk, \ + .children = LIST_HEAD_INIT(tsk.children), \ + .sibling = LIST_HEAD_INIT(tsk.sibling), \ + .group_leader = &tsk, \ + .group_info = &init_groups, \ + .cap_effective = CAP_INIT_EFF_SET, \ + .cap_inheritable = CAP_INIT_INH_SET, \ + .cap_permitted = CAP_FULL_SET, \ + .keep_capabilities = 0, \ + .user = INIT_USER, \ + .comm = "swapper", \ + .thread = INIT_THREAD, \ + .fs = &init_fs, \ + .files = &init_files, \ + .signal = &init_signals, \ + .sighand = &init_sighand, \ + .nsproxy = &init_nsproxy, \ + .pending = { \ + .list = LIST_HEAD_INIT(tsk.pending.list), \ + .signal = {{0}}}, \ + .blocked = {{0}}, \ + .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ + .journal_info = NULL, \ + .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ + .fs_excl = ATOMIC_INIT(0), \ + .pi_lock = SPIN_LOCK_UNLOCKED, \ + INIT_TRACE_IRQFLAGS \ + INIT_LOCKDEP \ +} +#endif +#ifdef CONFIG_STAIRCASE #define INIT_TASK(tsk) \ { \ .state = 0, \ @@ -98,9 +153,9 @@ .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ - .normal_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ + .normal_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ @@ -142,7 +197,7 @@ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ } - +#endif #define INIT_CPU_TIMERS(cpu_timers) \ { \ diff -urN oldtree/include/linux/sched.h newtree/include/linux/sched.h --- oldtree/include/linux/sched.h 2006-10-06 13:38:03.000000000 -0400 +++ newtree/include/linux/sched.h 2006-10-06 16:22:43.000000000 -0400 @@ -34,6 +34,10 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #define SCHED_BATCH 3 +#ifdef CONFIG_STAIRCASE +#define SCHED_ISO 4 +#define SCHED_IDLEPRIO 5 +#endif #ifdef __KERNEL__ @@ -207,6 +211,9 @@ void io_schedule(void); long io_schedule_timeout(long timeout); +#ifdef CONFIG_STAIRCASE +extern int sched_interactive, sched_compute, sched_iso_cpu; +#endif extern void cpu_init (void); extern void trap_init(void); @@ -512,10 +519,17 @@ #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO +#ifdef CONFIG_STAIRCASE +#define ISO_PRIO (MAX_RT_PRIO - 1) +#endif +#ifdef CONFIG_INGOSCHED #define MAX_PRIO (MAX_RT_PRIO + 40) +#endif #ifdef CONFIG_STAIRCASE -#define MIN_USER_PRIO (MAX_PRIO - 1) +#define MAX_PRIO (MAX_RT_PRIO + 41) +#define MIN_USER_PRIO (MAX_PRIO - 2) +#define IDLEPRIO_PRIO (MAX_PRIO - 1) #endif #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) @@ -523,6 +537,10 @@ #define batch_task(p) (unlikely((p)->policy == SCHED_BATCH)) #define is_rt_policy(p) ((p) != SCHED_NORMAL && (p) != SCHED_BATCH) #define has_rt_policy(p) unlikely(is_rt_policy((p)->policy)) +#ifdef CONFIG_STAIRCASE +#define iso_task(p) (unlikely((p)->policy == SCHED_ISO)) +#define idleprio_task(p) (unlikely((p)->policy == SCHED_IDLEPRIO)) +#endif /* * Some day this will be a full-fledged user tracking system.. @@ -1164,6 +1182,9 @@ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ +#ifdef CONFIG_STAIRCASE +#define PF_ISOREF 0x04000000 /* SCHED_ISO task has used up quota */ +#endif #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #ifdef CONFIG_STAIRCASE diff -urN oldtree/include/linux/sysctl.h newtree/include/linux/sysctl.h --- oldtree/include/linux/sysctl.h 2006-10-06 14:50:06.000000000 -0400 +++ newtree/include/linux/sysctl.h 2006-10-06 16:16:01.000000000 -0400 @@ -154,6 +154,9 @@ KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ KERN_KDUMP_ON_INIT=77, /* int: ia64 kdump with INIT */ + KERN_INTERACTIVE=78, /* interactive tasks can have cpu bursts */ + KERN_COMPUTE=79, /* adjust timeslices for a compute server */ + KERN_ISO_CPU=80, /* percent cpu SCHED_ISO tasks run SCHED_RR */ }; diff -urN oldtree/kernel/sysctl.c newtree/kernel/sysctl.c --- oldtree/kernel/sysctl.c 2006-10-06 14:50:06.000000000 -0400 +++ newtree/kernel/sysctl.c 2006-10-06 16:15:41.000000000 -0400 @@ -234,6 +234,11 @@ { .ctl_name = 0 } }; +/* Constants for minimum and maximum testing. + We use these as one-element integer vectors. */ +static int zero; +static int one_hundred = 100; + static ctl_table kern_table[] = { #ifndef CONFIG_UTS_NS { @@ -685,6 +690,35 @@ .mode = 0444, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_STAIRCASE + { + .ctl_name = KERN_INTERACTIVE, + .procname = "interactive", + .data = &sched_interactive, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_COMPUTE, + .procname = "compute", + .data = &sched_compute, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_ISO_CPU, + .procname = "iso_cpu", + .data = &sched_iso_cpu, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, +#endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { .ctl_name = KERN_UNKNOWN_NMI_PANIC, @@ -785,12 +819,6 @@ { .ctl_name = 0 } }; -/* Constants for minimum and maximum testing in vm_table. - We use these as one-element integer vectors. */ -static int zero; -static int one_hundred = 100; - - static ctl_table vm_table[] = { { .ctl_name = VM_OVERCOMMIT_MEMORY,