diff -urN oldtree/fs/proc/array.c newtree/fs/proc/array.c
--- oldtree/fs/proc/array.c	2006-06-07 15:35:02.301155500 +0000
+++ newtree/fs/proc/array.c	2006-06-08 16:26:18.309095750 +0000
@@ -166,7 +166,7 @@
 	read_lock(&tasklist_lock);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
-		"SleepAVG:\t%lu%%\n"
+		"Bonus:\t%d\n"
 		"Tgid:\t%d\n"
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
@@ -174,7 +174,7 @@
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
-		(p->sleep_avg/1024)*100/(1020000000/1024),
+		p->bonus,
 	       	p->tgid,
 		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
 		pid_alive(p) && p->ptrace ? p->parent->pid : 0,
diff -urN oldtree/include/linux/dwarf2-defs.h newtree/include/linux/dwarf2-defs.h
--- oldtree/include/linux/dwarf2-defs.h	1970-01-01 00:00:00.000000000 +0000
+++ newtree/include/linux/dwarf2-defs.h	2006-06-08 16:26:37.214277250 +0000
@@ -0,0 +1,515 @@
+#ifndef  _ELF_DWARF_H
+/* Machine generated from dwarf2.h by scripts/dwarfh.awk */
+#define _ELF_DWARF2_H
+#define DW_TAG_padding	 0x00
+#define DW_TAG_array_type	 0x01
+#define DW_TAG_class_type	 0x02
+#define DW_TAG_entry_point	 0x03
+#define DW_TAG_enumeration_type	 0x04
+#define DW_TAG_formal_parameter	 0x05
+#define DW_TAG_imported_declaration	 0x08
+#define DW_TAG_label	 0x0a
+#define DW_TAG_lexical_block	 0x0b
+#define DW_TAG_member	 0x0d
+#define DW_TAG_pointer_type	 0x0f
+#define DW_TAG_reference_type	 0x10
+#define DW_TAG_compile_unit	 0x11
+#define DW_TAG_string_type	 0x12
+#define DW_TAG_structure_type	 0x13
+#define DW_TAG_subroutine_type	 0x15
+#define DW_TAG_typedef	 0x16
+#define DW_TAG_union_type	 0x17
+#define DW_TAG_unspecified_parameters	 0x18
+#define DW_TAG_variant	 0x19
+#define DW_TAG_common_block	 0x1a
+#define DW_TAG_common_inclusion	 0x1b
+#define DW_TAG_inheritance	 0x1c
+#define DW_TAG_inlined_subroutine	 0x1d
+#define DW_TAG_module	 0x1e
+#define DW_TAG_ptr_to_member_type	 0x1f
+#define DW_TAG_set_type	 0x20
+#define DW_TAG_subrange_type	 0x21
+#define DW_TAG_with_stmt	 0x22
+#define DW_TAG_access_declaration	 0x23
+#define DW_TAG_base_type	 0x24
+#define DW_TAG_catch_block	 0x25
+#define DW_TAG_const_type	 0x26
+#define DW_TAG_constant	 0x27
+#define DW_TAG_enumerator	 0x28
+#define DW_TAG_file_type	 0x29
+#define DW_TAG_friend	 0x2a
+#define DW_TAG_namelist	 0x2b
+#define DW_TAG_namelist_item	 0x2c
+#define DW_TAG_packed_type	 0x2d
+#define DW_TAG_subprogram	 0x2e
+#define DW_TAG_template_type_param	 0x2f
+#define DW_TAG_template_value_param	 0x30
+#define DW_TAG_thrown_type	 0x31
+#define DW_TAG_try_block	 0x32
+#define DW_TAG_variant_part	 0x33
+#define DW_TAG_variable	 0x34
+#define DW_TAG_volatile_type	 0x35
+#define DW_TAG_dwarf_procedure	 0x36
+#define DW_TAG_restrict_type	 0x37
+#define DW_TAG_interface_type	 0x38
+#define DW_TAG_namespace	 0x39
+#define DW_TAG_imported_module	 0x3a
+#define DW_TAG_unspecified_type	 0x3b
+#define DW_TAG_partial_unit	 0x3c
+#define DW_TAG_imported_unit	 0x3d
+#define DW_TAG_MIPS_loop	 0x4081
+#define DW_TAG_HP_array_descriptor	 0x4090
+#define DW_TAG_format_label	 0x4101
+#define DW_TAG_function_template	 0x4102
+#define DW_TAG_class_template	 0x4103
+#define DW_TAG_GNU_BINCL	 0x4104
+#define DW_TAG_GNU_EINCL	 0x4105
+#define DW_TAG_upc_shared_type	 0x8765
+#define DW_TAG_upc_strict_type	 0x8766
+#define DW_TAG_upc_relaxed_type	 0x8767
+#define DW_TAG_PGI_kanji_type	 0xA000
+#define DW_TAG_PGI_interface_block	 0xA020
+#define DW_TAG_lo_user	0x4080
+#define DW_TAG_hi_user	0xffff
+#define DW_children_no   0
+#define	DW_children_yes  1
+#define DW_FORM_addr	 0x01
+#define DW_FORM_block2	 0x03
+#define DW_FORM_block4	 0x04
+#define DW_FORM_data2	 0x05
+#define DW_FORM_data4	 0x06
+#define DW_FORM_data8	 0x07
+#define DW_FORM_string	 0x08
+#define DW_FORM_block	 0x09
+#define DW_FORM_block1	 0x0a
+#define DW_FORM_data1	 0x0b
+#define DW_FORM_flag	 0x0c
+#define DW_FORM_sdata	 0x0d
+#define DW_FORM_strp	 0x0e
+#define DW_FORM_udata	 0x0f
+#define DW_FORM_ref_addr	 0x10
+#define DW_FORM_ref1	 0x11
+#define DW_FORM_ref2	 0x12
+#define DW_FORM_ref4	 0x13
+#define DW_FORM_ref8	 0x14
+#define DW_FORM_ref_udata	 0x15
+#define DW_FORM_indirect	 0x16
+#define DW_AT_sibling	 0x01
+#define DW_AT_location	 0x02
+#define DW_AT_name	 0x03
+#define DW_AT_ordering	 0x09
+#define DW_AT_subscr_data	 0x0a
+#define DW_AT_byte_size	 0x0b
+#define DW_AT_bit_offset	 0x0c
+#define DW_AT_bit_size	 0x0d
+#define DW_AT_element_list	 0x0f
+#define DW_AT_stmt_list	 0x10
+#define DW_AT_low_pc	 0x11
+#define DW_AT_high_pc	 0x12
+#define DW_AT_language	 0x13
+#define DW_AT_member	 0x14
+#define DW_AT_discr	 0x15
+#define DW_AT_discr_value	 0x16
+#define DW_AT_visibility	 0x17
+#define DW_AT_import	 0x18
+#define DW_AT_string_length	 0x19
+#define DW_AT_common_reference	 0x1a
+#define DW_AT_comp_dir	 0x1b
+#define DW_AT_const_value	 0x1c
+#define DW_AT_containing_type	 0x1d
+#define DW_AT_default_value	 0x1e
+#define DW_AT_inline	 0x20
+#define DW_AT_is_optional	 0x21
+#define DW_AT_lower_bound	 0x22
+#define DW_AT_producer	 0x25
+#define DW_AT_prototyped	 0x27
+#define DW_AT_return_addr	 0x2a
+#define DW_AT_start_scope	 0x2c
+#define DW_AT_stride_size	 0x2e
+#define DW_AT_upper_bound	 0x2f
+#define DW_AT_abstract_origin	 0x31
+#define DW_AT_accessibility	 0x32
+#define DW_AT_address_class	 0x33
+#define DW_AT_artificial	 0x34
+#define DW_AT_base_types	 0x35
+#define DW_AT_calling_convention	 0x36
+#define DW_AT_count	 0x37
+#define DW_AT_data_member_location	 0x38
+#define DW_AT_decl_column	 0x39
+#define DW_AT_decl_file	 0x3a
+#define DW_AT_decl_line	 0x3b
+#define DW_AT_declaration	 0x3c
+#define DW_AT_discr_list	 0x3d
+#define DW_AT_encoding	 0x3e
+#define DW_AT_external	 0x3f
+#define DW_AT_frame_base	 0x40
+#define DW_AT_friend	 0x41
+#define DW_AT_identifier_case	 0x42
+#define DW_AT_macro_info	 0x43
+#define DW_AT_namelist_items	 0x44
+#define DW_AT_priority	 0x45
+#define DW_AT_segment	 0x46
+#define DW_AT_specification	 0x47
+#define DW_AT_static_link	 0x48
+#define DW_AT_type	 0x49
+#define DW_AT_use_location	 0x4a
+#define DW_AT_variable_parameter	 0x4b
+#define DW_AT_virtuality	 0x4c
+#define DW_AT_vtable_elem_location	 0x4d
+#define DW_AT_allocated	 0x4e
+#define DW_AT_associated	 0x4f
+#define DW_AT_data_location	 0x50
+#define DW_AT_stride	 0x51
+#define DW_AT_entry_pc	 0x52
+#define DW_AT_use_UTF8	 0x53
+#define DW_AT_extension	 0x54
+#define DW_AT_ranges	 0x55
+#define DW_AT_trampoline	 0x56
+#define DW_AT_call_column	 0x57
+#define DW_AT_call_file	 0x58
+#define DW_AT_call_line	 0x59
+#define DW_AT_MIPS_fde	 0x2001
+#define DW_AT_MIPS_loop_begin	 0x2002
+#define DW_AT_MIPS_tail_loop_begin	 0x2003
+#define DW_AT_MIPS_epilog_begin	 0x2004
+#define DW_AT_MIPS_loop_unroll_factor	 0x2005
+#define DW_AT_MIPS_software_pipeline_depth	 0x2006
+#define DW_AT_MIPS_linkage_name	 0x2007
+#define DW_AT_MIPS_stride	 0x2008
+#define DW_AT_MIPS_abstract_name	 0x2009
+#define DW_AT_MIPS_clone_origin	 0x200a
+#define DW_AT_MIPS_has_inlines	 0x200b
+#define DW_AT_HP_block_index	 0x2000
+#define DW_AT_HP_unmodifiable	 0x2001
+#define DW_AT_HP_actuals_stmt_list	 0x2010
+#define DW_AT_HP_proc_per_section	 0x2011
+#define DW_AT_HP_raw_data_ptr	 0x2012
+#define DW_AT_HP_pass_by_reference	 0x2013
+#define DW_AT_HP_opt_level	 0x2014
+#define DW_AT_HP_prof_version_id	 0x2015
+#define DW_AT_HP_opt_flags	 0x2016
+#define DW_AT_HP_cold_region_low_pc	 0x2017
+#define DW_AT_HP_cold_region_high_pc	 0x2018
+#define DW_AT_HP_all_variables_modifiable	 0x2019
+#define DW_AT_HP_linkage_name	 0x201a
+#define DW_AT_HP_prof_flags	 0x201b
+#define DW_AT_sf_names	 0x2101
+#define DW_AT_src_info	 0x2102
+#define DW_AT_mac_info	 0x2103
+#define DW_AT_src_coords	 0x2104
+#define DW_AT_body_begin	 0x2105
+#define DW_AT_body_end	 0x2106
+#define DW_AT_GNU_vector	 0x2107
+#define DW_AT_VMS_rtnbeg_pd_address	 0x2201
+#define DW_AT_upc_threads_scaled	 0x3210
+#define DW_AT_PGI_lbase	 0x3a00
+#define DW_AT_PGI_soffset	 0x3a01
+#define DW_AT_PGI_lstride	 0x3a02
+#define DW_AT_lo_user	0x2000	/* Implementation-defined range start.  */
+#define DW_AT_hi_user	0x3ff0	/* Implementation-defined range end.  */
+#define DW_OP_addr	 0x03
+#define DW_OP_deref	 0x06
+#define DW_OP_const1u	 0x08
+#define DW_OP_const1s	 0x09
+#define DW_OP_const2u	 0x0a
+#define DW_OP_const2s	 0x0b
+#define DW_OP_const4u	 0x0c
+#define DW_OP_const4s	 0x0d
+#define DW_OP_const8u	 0x0e
+#define DW_OP_const8s	 0x0f
+#define DW_OP_constu	 0x10
+#define DW_OP_consts	 0x11
+#define DW_OP_dup	 0x12
+#define DW_OP_drop	 0x13
+#define DW_OP_over	 0x14
+#define DW_OP_pick	 0x15
+#define DW_OP_swap	 0x16
+#define DW_OP_rot	 0x17
+#define DW_OP_xderef	 0x18
+#define DW_OP_abs	 0x19
+#define DW_OP_and	 0x1a
+#define DW_OP_div	 0x1b
+#define DW_OP_minus	 0x1c
+#define DW_OP_mod	 0x1d
+#define DW_OP_mul	 0x1e
+#define DW_OP_neg	 0x1f
+#define DW_OP_not	 0x20
+#define DW_OP_or	 0x21
+#define DW_OP_plus	 0x22
+#define DW_OP_plus_uconst	 0x23
+#define DW_OP_shl	 0x24
+#define DW_OP_shr	 0x25
+#define DW_OP_shra	 0x26
+#define DW_OP_xor	 0x27
+#define DW_OP_bra	 0x28
+#define DW_OP_eq	 0x29
+#define DW_OP_ge	 0x2a
+#define DW_OP_gt	 0x2b
+#define DW_OP_le	 0x2c
+#define DW_OP_lt	 0x2d
+#define DW_OP_ne	 0x2e
+#define DW_OP_skip	 0x2f
+#define DW_OP_lit0	 0x30
+#define DW_OP_lit1	 0x31
+#define DW_OP_lit2	 0x32
+#define DW_OP_lit3	 0x33
+#define DW_OP_lit4	 0x34
+#define DW_OP_lit5	 0x35
+#define DW_OP_lit6	 0x36
+#define DW_OP_lit7	 0x37
+#define DW_OP_lit8	 0x38
+#define DW_OP_lit9	 0x39
+#define DW_OP_lit10	 0x3a
+#define DW_OP_lit11	 0x3b
+#define DW_OP_lit12	 0x3c
+#define DW_OP_lit13	 0x3d
+#define DW_OP_lit14	 0x3e
+#define DW_OP_lit15	 0x3f
+#define DW_OP_lit16	 0x40
+#define DW_OP_lit17	 0x41
+#define DW_OP_lit18	 0x42
+#define DW_OP_lit19	 0x43
+#define DW_OP_lit20	 0x44
+#define DW_OP_lit21	 0x45
+#define DW_OP_lit22	 0x46
+#define DW_OP_lit23	 0x47
+#define DW_OP_lit24	 0x48
+#define DW_OP_lit25	 0x49
+#define DW_OP_lit26	 0x4a
+#define DW_OP_lit27	 0x4b
+#define DW_OP_lit28	 0x4c
+#define DW_OP_lit29	 0x4d
+#define DW_OP_lit30	 0x4e
+#define DW_OP_lit31	 0x4f
+#define DW_OP_reg0	 0x50
+#define DW_OP_reg1	 0x51
+#define DW_OP_reg2	 0x52
+#define DW_OP_reg3	 0x53
+#define DW_OP_reg4	 0x54
+#define DW_OP_reg5	 0x55
+#define DW_OP_reg6	 0x56
+#define DW_OP_reg7	 0x57
+#define DW_OP_reg8	 0x58
+#define DW_OP_reg9	 0x59
+#define DW_OP_reg10	 0x5a
+#define DW_OP_reg11	 0x5b
+#define DW_OP_reg12	 0x5c
+#define DW_OP_reg13	 0x5d
+#define DW_OP_reg14	 0x5e
+#define DW_OP_reg15	 0x5f
+#define DW_OP_reg16	 0x60
+#define DW_OP_reg17	 0x61
+#define DW_OP_reg18	 0x62
+#define DW_OP_reg19	 0x63
+#define DW_OP_reg20	 0x64
+#define DW_OP_reg21	 0x65
+#define DW_OP_reg22	 0x66
+#define DW_OP_reg23	 0x67
+#define DW_OP_reg24	 0x68
+#define DW_OP_reg25	 0x69
+#define DW_OP_reg26	 0x6a
+#define DW_OP_reg27	 0x6b
+#define DW_OP_reg28	 0x6c
+#define DW_OP_reg29	 0x6d
+#define DW_OP_reg30	 0x6e
+#define DW_OP_reg31	 0x6f
+#define DW_OP_breg0	 0x70
+#define DW_OP_breg1	 0x71
+#define DW_OP_breg2	 0x72
+#define DW_OP_breg3	 0x73
+#define DW_OP_breg4	 0x74
+#define DW_OP_breg5	 0x75
+#define DW_OP_breg6	 0x76
+#define DW_OP_breg7	 0x77
+#define DW_OP_breg8	 0x78
+#define DW_OP_breg9	 0x79
+#define DW_OP_breg10	 0x7a
+#define DW_OP_breg11	 0x7b
+#define DW_OP_breg12	 0x7c
+#define DW_OP_breg13	 0x7d
+#define DW_OP_breg14	 0x7e
+#define DW_OP_breg15	 0x7f
+#define DW_OP_breg16	 0x80
+#define DW_OP_breg17	 0x81
+#define DW_OP_breg18	 0x82
+#define DW_OP_breg19	 0x83
+#define DW_OP_breg20	 0x84
+#define DW_OP_breg21	 0x85
+#define DW_OP_breg22	 0x86
+#define DW_OP_breg23	 0x87
+#define DW_OP_breg24	 0x88
+#define DW_OP_breg25	 0x89
+#define DW_OP_breg26	 0x8a
+#define DW_OP_breg27	 0x8b
+#define DW_OP_breg28	 0x8c
+#define DW_OP_breg29	 0x8d
+#define DW_OP_breg30	 0x8e
+#define DW_OP_breg31	 0x8f
+#define DW_OP_regx	 0x90
+#define DW_OP_fbreg	 0x91
+#define DW_OP_bregx	 0x92
+#define DW_OP_piece	 0x93
+#define DW_OP_deref_size	 0x94
+#define DW_OP_xderef_size	 0x95
+#define DW_OP_nop	 0x96
+#define DW_OP_push_object_address	 0x97
+#define DW_OP_call2	 0x98
+#define DW_OP_call4	 0x99
+#define DW_OP_call_ref	 0x9a
+#define DW_OP_GNU_push_tls_address	 0xe0
+#define DW_OP_HP_unknown	 0xe0
+#define DW_OP_HP_is_value	 0xe1
+#define DW_OP_HP_fltconst4	 0xe2
+#define DW_OP_HP_fltconst8	 0xe3
+#define DW_OP_HP_mod_range	 0xe4
+#define DW_OP_HP_unmod_range	 0xe5
+#define DW_OP_HP_tls	 0xe6
+#define DW_OP_lo_user	0xe0	/* Implementation-defined range start.  */
+#define DW_OP_hi_user	0xff	/* Implementation-defined range end.  */
+#define DW_ATE_void	 0x0
+#define DW_ATE_address	 0x1
+#define DW_ATE_boolean	 0x2
+#define DW_ATE_complex_float	 0x3
+#define DW_ATE_float	 0x4
+#define DW_ATE_signed	 0x5
+#define DW_ATE_signed_char	 0x6
+#define DW_ATE_unsigned	 0x7
+#define DW_ATE_unsigned_char	 0x8
+#define DW_ATE_imaginary_float	 0x9
+#define DW_ATE_HP_float80	 0x80
+#define DW_ATE_HP_complex_float80	 0x81
+#define DW_ATE_HP_float128	 0x82
+#define DW_ATE_HP_complex_float128	 0x83
+#define DW_ATE_HP_floathpintel	 0x84
+#define DW_ATE_HP_imaginary_float80	 0x85
+#define DW_ATE_HP_imaginary_float128	 0x86
+#define	DW_ATE_lo_user 0x80
+#define	DW_ATE_hi_user 0xff
+#define DW_ORD_row_major	 0
+#define DW_ORD_col_major	 1
+#define DW_ACCESS_public	 1
+#define DW_ACCESS_protected	 2
+#define DW_ACCESS_private	 3
+#define DW_VIS_local	 1
+#define DW_VIS_exported	 2
+#define DW_VIS_qualified	 3
+#define DW_VIRTUALITY_none	 0
+#define DW_VIRTUALITY_virtual	 1
+#define DW_VIRTUALITY_pure_virtual	 2
+#define DW_ID_case_sensitive	 0
+#define DW_ID_up_case	 1
+#define DW_ID_down_case	 2
+#define DW_ID_case_insensitive	 3
+#define DW_CC_normal	 0x1
+#define DW_CC_program	 0x2
+#define DW_CC_nocall	 0x3
+#define DW_CC_lo_user 0x40
+#define DW_CC_hi_user 0xff
+#define DW_INL_not_inlined	 0
+#define DW_INL_inlined	 1
+#define DW_INL_declared_not_inlined	 2
+#define DW_INL_declared_inlined	 3
+#define DW_DSC_label	 0
+#define DW_DSC_range	 1
+#define DW_LNS_extended_op	 0
+#define DW_LNS_copy	 1
+#define DW_LNS_advance_pc	 2
+#define DW_LNS_advance_line	 3
+#define DW_LNS_set_file	 4
+#define DW_LNS_set_column	 5
+#define DW_LNS_negate_stmt	 6
+#define DW_LNS_set_basic_block	 7
+#define DW_LNS_const_add_pc	 8
+#define DW_LNS_fixed_advance_pc	 9
+#define DW_LNS_set_prologue_end	 10
+#define DW_LNS_set_epilogue_begin	 11
+#define DW_LNS_set_isa	 12
+#define DW_LNE_end_sequence	 1
+#define DW_LNE_set_address	 2
+#define DW_LNE_define_file	 3
+#define DW_LNE_HP_negate_is_UV_update	 0x11
+#define DW_LNE_HP_push_context	 0x12
+#define DW_LNE_HP_pop_context	 0x13
+#define DW_LNE_HP_set_file_line_column	 0x14
+#define DW_LNE_HP_set_routine_name	 0x15
+#define DW_LNE_HP_set_sequence	 0x16
+#define DW_LNE_HP_negate_post_semantics	 0x17
+#define DW_LNE_HP_negate_function_exit	 0x18
+#define DW_LNE_HP_negate_front_end_logical	 0x19
+#define DW_LNE_HP_define_proc	 0x20
+#define DW_CFA_advance_loc	 0x40
+#define DW_CFA_offset	 0x80
+#define DW_CFA_restore	 0xc0
+#define DW_CFA_nop	 0x00
+#define DW_CFA_set_loc	 0x01
+#define DW_CFA_advance_loc1	 0x02
+#define DW_CFA_advance_loc2	 0x03
+#define DW_CFA_advance_loc4	 0x04
+#define DW_CFA_offset_extended	 0x05
+#define DW_CFA_restore_extended	 0x06
+#define DW_CFA_undefined	 0x07
+#define DW_CFA_same_value	 0x08
+#define DW_CFA_register	 0x09
+#define DW_CFA_remember_state	 0x0a
+#define DW_CFA_restore_state	 0x0b
+#define DW_CFA_def_cfa	 0x0c
+#define DW_CFA_def_cfa_register	 0x0d
+#define DW_CFA_def_cfa_offset	 0x0e
+#define DW_CFA_def_cfa_expression	 0x0f
+#define DW_CFA_expression	 0x10
+#define DW_CFA_offset_extended_sf	 0x11
+#define DW_CFA_def_cfa_sf	 0x12
+#define DW_CFA_def_cfa_offset_sf	 0x13
+#define DW_CFA_MIPS_advance_loc8	 0x1d
+#define DW_CFA_GNU_window_save	 0x2d
+#define DW_CFA_GNU_args_size	 0x2e
+#define DW_CFA_GNU_negative_offset_extended	 0x2f
+#define DW_CIE_ID	  0xffffffff
+#define DW_CIE_VERSION	  1
+#define DW_CFA_extended   0
+#define DW_CFA_lo_user    0x1c
+#define DW_CFA_hi_user    0x3f
+#define DW_CHILDREN_no		     0x00
+#define DW_CHILDREN_yes		     0x01
+#define DW_ADDR_none		0
+#define DW_LANG_C89	 0x0001
+#define DW_LANG_C	 0x0002
+#define DW_LANG_Ada83	 0x0003
+#define DW_LANG_C_plus_plus	 0x0004
+#define DW_LANG_Cobol74	 0x0005
+#define DW_LANG_Cobol85	 0x0006
+#define DW_LANG_Fortran77	 0x0007
+#define DW_LANG_Fortran90	 0x0008
+#define DW_LANG_Pascal83	 0x0009
+#define DW_LANG_Modula2	 0x000a
+#define DW_LANG_Java	 0x000b
+#define DW_LANG_C99	 0x000c
+#define DW_LANG_Ada95	 0x000d
+#define DW_LANG_Fortran95	 0x000e
+#define DW_LANG_Mips_Assembler	 0x8001
+#define DW_LANG_Upc	 0x8765
+#define DW_LANG_lo_user 0x8000	/* Implementation-defined range start.  */
+#define DW_LANG_hi_user 0xffff	/* Implementation-defined range start.  */
+#define DW_MACINFO_define	 1
+#define DW_MACINFO_undef	 2
+#define DW_MACINFO_start_file	 3
+#define DW_MACINFO_end_file	 4
+#define DW_MACINFO_vendor_ext	 255
+#define DW_EH_PE_absptr		0x00
+#define DW_EH_PE_omit		0xff
+#define DW_EH_PE_uleb128	0x01
+#define DW_EH_PE_udata2		0x02
+#define DW_EH_PE_udata4		0x03
+#define DW_EH_PE_udata8		0x04
+#define DW_EH_PE_sleb128	0x09
+#define DW_EH_PE_sdata2		0x0A
+#define DW_EH_PE_sdata4		0x0B
+#define DW_EH_PE_sdata8		0x0C
+#define DW_EH_PE_signed		0x08
+#define DW_EH_PE_pcrel		0x10
+#define DW_EH_PE_textrel	0x20
+#define DW_EH_PE_datarel	0x30
+#define DW_EH_PE_funcrel	0x40
+#define DW_EH_PE_aligned	0x50
+#define DW_EH_PE_indirect	0x80
+#endif
diff -urN oldtree/include/linux/sched.h newtree/include/linux/sched.h
--- oldtree/include/linux/sched.h	2006-06-07 15:35:02.957196500 +0000
+++ newtree/include/linux/sched.h	2006-06-08 16:26:18.349098250 +0000
@@ -487,6 +487,7 @@
 #define MAX_RT_PRIO		MAX_USER_RT_PRIO
 
 #define MAX_PRIO		(MAX_RT_PRIO + 40)
+#define MIN_USER_PRIO		(MAX_PRIO - 1)
 
 #define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
 #define rt_task(p)		rt_prio((p)->prio)
@@ -525,7 +526,6 @@
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
 
-typedef struct prio_array prio_array_t;
 struct backing_dev_info;
 struct reclaim_state;
 
@@ -760,13 +760,6 @@
 struct pipe_inode_info;
 struct uts_namespace;
 
-enum sleep_type {
-	SLEEP_NORMAL,
-	SLEEP_NONINTERACTIVE,
-	SLEEP_INTERACTIVE,
-	SLEEP_INTERRUPTED,
-};
-
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	struct thread_info *thread_info;
@@ -784,19 +777,18 @@
 	int load_weight;	/* for niceness load balancing purposes */
 	int prio, static_prio, normal_prio;
 	struct list_head run_list;
-	prio_array_t *array;
 
 	unsigned short ioprio;
 	unsigned int btrace_seq;
 
-	unsigned long sleep_avg;
-	unsigned long long timestamp, last_ran;
+	unsigned long long timestamp;
+	unsigned long runtime, totalrun, ns_debit, systime;
+	unsigned int bonus;
+	unsigned int slice, time_slice;
 	unsigned long long sched_time; /* sched_clock time spent running */
-	enum sleep_type sleep_type;
 
 	unsigned long policy;
 	cpumask_t cpus_allowed;
-	unsigned int time_slice, first_time_slice;
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
@@ -1064,6 +1056,8 @@
 #define PF_SPREAD_SLAB	0x08000000	/* Spread some slab caches over cpuset */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x02000000	/* Thread belongs to the rt mutex tester */
+#define PF_NONSLEEP	0x04000000	/* Waiting on in kernel activity */
+#define PF_FORKED	0x08000000	/* Task just forked another process */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
@@ -1196,7 +1190,6 @@
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
 extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
-extern void FASTCALL(sched_exit(task_t * p));
 
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
diff -urN oldtree/kernel/exit.c newtree/kernel/exit.c
--- oldtree/kernel/exit.c	2006-06-07 15:35:03.061203000 +0000
+++ newtree/kernel/exit.c	2006-06-08 16:26:18.365099250 +0000
@@ -168,7 +168,6 @@
 		zap_leader = (leader->exit_signal == -1);
 	}
 
-	sched_exit(p);
 	write_unlock_irq(&tasklist_lock);
 	proc_flush_task(p);
 	release_thread(p);
diff -urN oldtree/kernel/sched.c newtree/kernel/sched.c
--- oldtree/kernel/sched.c	2006-06-07 15:35:03.101205500 +0000
+++ newtree/kernel/sched.c	2006-06-08 16:52:08.769993500 +0000
@@ -1,21 +1,7 @@
 /*
- *  kernel/sched.c
- *
- *  Kernel scheduler and related syscalls
- *
- *  Copyright (C) 1991-2002  Linus Torvalds
- *
- *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
- *		make semaphores SMP safe
- *  1998-11-19	Implemented schedule_timeout() and related stuff
- *		by Andrea Arcangeli
- *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
- *		hybrid priority-list and round-robin design with
- *		an array-switch method of distributing timeslices
- *		and per-CPU runqueues.  Cleanups and useful suggestions
- *		by Davide Libenzi, preemptible kernel bits by Robert Love.
- *  2003-09-03	Interactivity tuning by Con Kolivas.
- *  2004-04-02	Scheduler domains code by Nick Piggin
+ *  <kernel/sched_staircase.c>
+ * Ver: v15.6 
+ * Ported by cheater-conrad of the no-sources team. Send all regressions to him, NO ONE ELSE!
  */
 
 #include <linux/mm.h>
@@ -79,129 +65,26 @@
 /*
  * Some helpers for converting nanosecond timing to jiffy resolution
  */
-#define NS_TO_JIFFIES(TIME)	((TIME) / (1000000000 / HZ))
-#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
-
-/*
- * These are the 'tuning knobs' of the scheduler:
- *
- * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger),
- * default timeslice is 100 msecs, maximum timeslice is 800 msecs.
- * Timeslices get refilled after they expire.
- */
-#define MIN_TIMESLICE		max(5 * HZ / 1000, 1)
-#define DEF_TIMESLICE		(100 * HZ / 1000)
-#define ON_RUNQUEUE_WEIGHT	 30
-#define CHILD_PENALTY		 95
-#define PARENT_PENALTY		100
-#define EXIT_WEIGHT		  3
-#define PRIO_BONUS_RATIO	 25
-#define MAX_BONUS		(MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
-#define INTERACTIVE_DELTA	  2
-#define MAX_SLEEP_AVG		(DEF_TIMESLICE * MAX_BONUS)
-#define STARVATION_LIMIT	(MAX_SLEEP_AVG)
-#define NS_MAX_SLEEP_AVG	(JIFFIES_TO_NS(MAX_SLEEP_AVG))
-
-/*
- * If a task is 'interactive' then we reinsert it in the active
- * array after it has expired its current timeslice. (it will not
- * continue to run immediately, it will still roundrobin with
- * other interactive tasks.)
- *
- * This part scales the interactivity limit depending on niceness.
- *
- * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
- * Here are a few examples of different nice levels:
- *
- *  TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- *  TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- *  TASK_INTERACTIVE(  0): [1,1,1,1,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- *  TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
- *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
- *  priority range a task can explore, a value of '1' means the
- *  task is rated interactive.)
- *
- * Ie. nice +19 tasks can never get 'interactive' enough to be
- * reinserted into the active array. And only heavily CPU-hog nice -20
- * tasks will be expired. Default nice 0 tasks are somewhere between,
- * it takes some effort for them to get interactive, but it's not
- * too hard.
- */
-
-#define CURRENT_BONUS(p) \
-	(NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
-		MAX_SLEEP_AVG)
-
-#define GRANULARITY	(10 * HZ / 1000 ? : 1)
-
-#ifdef CONFIG_SMP
-#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
-		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
-			num_online_cpus())
-#else
-#define TIMESLICE_GRANULARITY(p)	(GRANULARITY * \
-		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
-#endif
-
-#define SCALE(v1,v1_max,v2_max) \
-	(v1) * (v2_max) / (v1_max)
-
-#define DELTA(p) \
-	(SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
-		INTERACTIVE_DELTA)
-
-#define TASK_INTERACTIVE(p) \
-	((p)->prio <= (p)->static_prio - DELTA(p))
-
-#define INTERACTIVE_SLEEP(p) \
-	(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
-		(MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
-
+#define NSJIFFY                       (1000000000 / HZ)       /* One jiffy in ns */
+#define NS_TO_JIFFIES(TIME)   ((TIME) / NSJIFFY)
+#define JIFFIES_TO_NS(TIME)   ((TIME) * NSJIFFY)
 #define TASK_PREEMPTS_CURR(p, rq) \
 	((p)->prio < (rq)->curr->prio)
 
 /*
- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
- * to time slice values: [800ms ... 100ms ... 5ms]
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
+ * This is the time all tasks within the same priority round robin.
+ * Set to a minimum of 6ms.
  */
+#define RR_INTERVAL           ((6 * HZ / 1001) + 1)
+#define DEF_TIMESLICE         (RR_INTERVAL * 19)
 
-#define SCALE_PRIO(x, prio) \
-	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
-
-static unsigned int static_prio_timeslice(int static_prio)
-{
-	if (static_prio < NICE_TO_PRIO(0))
-		return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
-	else
-		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
-}
-
-static inline unsigned int task_timeslice(task_t *p)
-{
-	return static_prio_timeslice(p->static_prio);
-}
-
-#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\
+#define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp)    \
 				< (long long) (sd)->cache_hot_time)
-
 /*
  * These are the runqueue data structures:
  */
-
 typedef struct runqueue runqueue_t;
 
-struct prio_array {
-	unsigned int nr_active;
-	DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
-	struct list_head queue[MAX_PRIO];
-};
-
 /*
  * This is the main, per-CPU runqueue data structure.
  *
@@ -231,12 +114,11 @@
 	 */
 	unsigned long nr_uninterruptible;
 
-	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
 	task_t *curr, *idle;
 	struct mm_struct *prev_mm;
-	prio_array_t *active, *expired, arrays[2];
-	int best_expired_prio;
+	unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)];
+	struct list_head queue[MAX_PRIO];
 	atomic_t nr_iowait;
 
 #ifdef CONFIG_SMP
@@ -248,6 +130,7 @@
 
 	task_t *migration_thread;
 	struct list_head migration_queue;
+	int cpu;
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
@@ -311,13 +194,6 @@
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
 #endif
-	/*
-	 * If we are tracking spinlock dependencies then we have to
-	 * fix up the runqueue lock - which gets 'carried over' from
-	 * prev into current:
-	 */
-	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
 	spin_unlock_irq(&rq->lock);
 }
 
@@ -559,13 +435,7 @@
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 /*
- * Called when a process is dequeued from the active array and given
- * the cpu.  We should note that with the exception of interactive
- * tasks, the expired queue will become the active queue after the active
- * queue is empty, without explicitly dequeuing and requeuing tasks in the
- * expired queue.  (Interactive tasks may be requeued directly to the
- * active queue, thus delaying tasks in the expired queue from running;
- * see scheduler_tick()).
+ * Called when a process is dequeued and given the cpu.
  *
  * This function is only called from sched_info_arrive(), rather than
  * dequeue_task(). Even though a task may be queued and dequeued multiple
@@ -598,13 +468,11 @@
 }
 
 /*
- * Called when a process is queued into either the active or expired
- * array.  The time is noted and later used to determine how long we
- * had to wait for us to reach the cpu.  Since the expired queue will
- * become the active queue after active queue is empty, without dequeuing
- * and requeuing any tasks, we are interested in queuing to either. It
- * is unusual but not impossible for tasks to be dequeued and immediately
- * requeued in the same or another array: this can happen in sched_yield(),
+ * Called when a process is queued
+ * The time is noted and later used to determine how long we had to wait for
+ * us to reach the cpu.
+ * It is unusual but not impossible for tasks to be dequeued and immediately
+ * requeued: this can happen in sched_yield(),
  * set_user_nice(), and even load_balance() as it moves tasks from runqueue
  * to runqueue.
  *
@@ -662,68 +530,128 @@
 #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
 /*
- * Adding/removing a task to/from a priority array:
+ * Get nanosecond clock difference without overflowing unsigned long.
+ */
+static unsigned long ns_diff(unsigned long long v1, unsigned long long v2)
+{
+	unsigned long long vdiff;
+	if (likely(v1 >= v2)) {
+		vdiff = v1 - v2;
+#if BITS_PER_LONG < 64
+		if (vdiff > (1 << 31))
+			vdiff = 1 << 31;
+#endif
+	} else {
+		/*
+		 * Rarely the clock appears to go backwards. There should
+		 * always be a positive difference so return 1.
+		 */
+		vdiff = 1;
+	}
+	return (unsigned long)vdiff;
+}
+
+static inline int task_queued(const struct task_struct *task)
+{
+	return !list_empty(&task->run_list);
+}
+
+/*
+ * Adding/removing a task to/from a runqueue:
  */
-static void dequeue_task(struct task_struct *p, prio_array_t *array)
+static void dequeue_task(struct task_struct *p, runqueue_t *rq)
 {
-	array->nr_active--;
-	list_del(&p->run_list);
-	if (list_empty(array->queue + p->prio))
-		__clear_bit(p->prio, array->bitmap);
+	list_del_init(&p->run_list);
+	if (list_empty(rq->queue + p->prio))
+		__clear_bit(p->prio, rq->bitmap);
+	p->ns_debit = 0;
 }
 
-static void enqueue_task(struct task_struct *p, prio_array_t *array)
+static void enqueue_task(struct task_struct *p, runqueue_t *rq)
 {
 	sched_info_queued(p);
-	list_add_tail(&p->run_list, array->queue + p->prio);
-	__set_bit(p->prio, array->bitmap);
-	array->nr_active++;
-	p->array = array;
+	list_add_tail(&p->run_list, rq->queue + p->prio);
+	__set_bit(p->prio, rq->bitmap);
 }
 
 /*
  * Put task to the end of the run list without the overhead of dequeue
  * followed by enqueue.
  */
-static void requeue_task(struct task_struct *p, prio_array_t *array)
+static void requeue_task(struct task_struct *p, runqueue_t *rq, int prio)
+{
+	list_move_tail(&p->run_list, rq->queue + prio);
+	if (p->prio != prio) {
+		if (list_empty(rq->queue + p->prio))
+			__clear_bit(p->prio, rq->bitmap);
+		p->prio = prio;
+		__set_bit(prio, rq->bitmap);
+	}
+	p->ns_debit = 0;
+}
+
+static inline void enqueue_task_head(struct task_struct *p, runqueue_t *rq)
 {
-	list_move_tail(&p->run_list, array->queue + p->prio);
+	list_add(&p->run_list, rq->queue + p->prio);
+	__set_bit(p->prio, rq->bitmap);
 }
 
-static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
+static unsigned int rr_interval(const struct task_struct *p)
 {
-	list_add(&p->run_list, array->queue + p->prio);
-	__set_bit(p->prio, array->bitmap);
-	array->nr_active++;
-	p->array = array;
+	int nice = TASK_NICE(p);
+
+	if (nice < 0 && !rt_task(p))
+		return RR_INTERVAL * (20 - nice) / 20;
+	return RR_INTERVAL;
+}
+
+/*
+ * slice - the duration a task runs before getting requeued at its best
+ * priority and has its bonus decremented.
+ */
+static unsigned int slice(const struct task_struct *p)
+{
+	unsigned int slice, rr;
+
+	slice = rr = rr_interval(p);
+	if (likely(!rt_task(p)))
+		slice += (39 - TASK_USER_PRIO(p)) * rr;
+	return slice;
+}
+
+/*
+ * Bonus - How much higher than its base priority an interactive task can run.
+ */
+static inline unsigned int bonus(const struct task_struct *p)
+{
+	return TASK_USER_PRIO(p);
 }
 
 /*
  * __normal_prio - return the priority that is based on the static
  * priority but is modified by bonuses/penalties.
- *
- * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
- * into the -5 ... 0 ... +5 bonus/penalty range.
- *
- * We use 25% of the full 0...39 priority range so that:
- *
- * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
- * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
- *
- * Both properties are important to certain workloads.
+ * The priority normally decreases by one each rr_interval().
+ * As the bonus increases the initial priority starts at a higher "stair" or
+ * priority.
  */
-
 static inline int __normal_prio(task_t *p)
 {
-	int bonus, prio;
-
-	bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+	int prio;
+	unsigned int full_slice, used_slice = 0;
+	unsigned int best_bonus, rr;
 
-	prio = p->static_prio - bonus;
-	if (prio < MAX_RT_PRIO)
-		prio = MAX_RT_PRIO;
-	if (prio > MAX_PRIO-1)
-		prio = MAX_PRIO-1;
+	full_slice = slice(p);
+	if (full_slice > p->slice)
+		used_slice = full_slice - p->slice;
+	best_bonus = bonus(p);
+	prio = MAX_RT_PRIO + best_bonus;
+	/* SCHED_BATCH tasks have their bonus ignored */
+	if (!batch_task(p))
+		prio -= p->bonus;
+	rr = rr_interval(p);
+	prio += used_slice / rr;
+	if (prio > MIN_USER_PRIO)
+		prio = MIN_USER_PRIO;
 	return prio;
 }
 
@@ -744,14 +672,13 @@
 #define TIME_SLICE_NICE_ZERO DEF_TIMESLICE
 #define LOAD_WEIGHT(lp) \
 	(((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)
-#define PRIO_TO_LOAD_WEIGHT(prio) \
-	LOAD_WEIGHT(static_prio_timeslice(prio))
-#define RTPRIO_TO_LOAD_WEIGHT(rp) \
-	(PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
+#define TASK_LOAD_WEIGHT(p)	LOAD_WEIGHT(slice(p))
+#define RTPRIO_TO_LOAD_WEIGHT(rp)	\
+	(LOAD_WEIGHT((RR_INTERVAL + 20 + (rp))))
 
 static void set_load_weight(task_t *p)
 {
-	if (has_rt_policy(p)) {
+	if (rt_task(p)) {
 #ifdef CONFIG_SMP
 		if (p == task_rq(p)->migration_thread)
 			/*
@@ -764,7 +691,7 @@
 #endif
 			p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority);
 	} else
-		p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
+		p->load_weight = TASK_LOAD_WEIGHT(p);
 }
 
 static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p)
@@ -800,7 +727,7 @@
 {
 	int prio;
 
-	if (has_rt_policy(p))
+	if (p->policy != SCHED_NORMAL && p->policy != SCHED_BATCH)
 		prio = MAX_RT_PRIO-1 - p->rt_priority;
 	else
 		prio = __normal_prio(p);
@@ -811,7 +738,7 @@
  * Calculate the current priority, i.e. the priority
  * taken into account by the scheduler. This value might
  * be boosted by RT tasks, or might be boosted by
- * interactivity modifiers. Will be RT if the task got
+ * bonus modifiers. Will be RT if the task got
  * RT-boosted. If not then it returns p->normal_prio.
  */
 static int effective_prio(task_t *p)
@@ -832,11 +759,7 @@
  */
 static void __activate_task(task_t *p, runqueue_t *rq)
 {
-	prio_array_t *target = rq->active;
-
-	if (batch_task(p))
-		target = rq->expired;
-	enqueue_task(p, target);
+	enqueue_task(p, rq);
 	inc_nr_running(p, rq);
 }
 
@@ -845,91 +768,103 @@
  */
 static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
 {
-	enqueue_task_head(p, rq->active);
+	enqueue_task_head(p, rq);
 	inc_nr_running(p, rq);
 }
 
 /*
- * Recalculate p->normal_prio and p->prio after having slept,
- * updating the sleep-average too:
+ * We increase our bonus by sleeping more than the time we ran.
+ * The ratio of sleep to run gives us the cpu% that we last ran and determines
+ * the maximum bonus we can acquire.
  */
-static int recalc_task_prio(task_t *p, unsigned long long now)
+static void inc_bonus(task_t *p, unsigned long totalrun, unsigned long sleep)
 {
-	/* Caller must always ensure 'now >= p->timestamp' */
-	unsigned long sleep_time = now - p->timestamp;
+	unsigned int best_bonus = sleep / (totalrun + 1);
 
-	if (batch_task(p))
-		sleep_time = 0;
+	if (p->bonus >= best_bonus)
+		return;
+	best_bonus = bonus(p);
+	if (p->bonus < best_bonus)
+		p->bonus++;
+}
 
-	if (likely(sleep_time > 0)) {
-		/*
-		 * This ceiling is set to the lowest priority that would allow
-		 * a task to be reinserted into the active array on timeslice
-		 * completion.
-		 */
-		unsigned long ceiling = INTERACTIVE_SLEEP(p);
+static inline void dec_bonus(task_t *p)
+{
+	p->totalrun = 0;
+	if (p->bonus)
+		p->bonus--;
+}
 
-		if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) {
-			/*
-			 * Prevents user tasks from achieving best priority
-			 * with one single large enough sleep.
-			 */
-			p->sleep_avg = ceiling;
-			/*
-			 * Using INTERACTIVE_SLEEP() as a ceiling places a
-			 * nice(0) task 1ms sleep away from promotion, and
-			 * gives it 700ms to round-robin with no chance of
-			 * being demoted.  This is more than generous, so
-			 * mark this sleep as non-interactive to prevent the
-			 * on-runqueue bonus logic from intervening should
-			 * this task not receive cpu immediately.
-			 */
-			p->sleep_type = SLEEP_NONINTERACTIVE;
-		} else {
-			/*
-			 * Tasks waking from uninterruptible sleep are
-			 * limited in their sleep_avg rise as they
-			 * are likely to be waiting on I/O
-			 */
-			if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
-				if (p->sleep_avg >= ceiling)
-					sleep_time = 0;
-				else if (p->sleep_avg + sleep_time >=
-					 ceiling) {
-						p->sleep_avg = ceiling;
-						sleep_time = 0;
-				}
-			}
+static inline void continue_slice(task_t *p)
+{
+	unsigned long total_run = NS_TO_JIFFIES(p->totalrun);
 
-			/*
-			 * This code gives a bonus to interactive tasks.
-			 *
-			 * The boost works by updating the 'average sleep time'
-			 * value here, based on ->timestamp. The more time a
-			 * task spends sleeping, the higher the average gets -
-			 * and the higher the priority boost gets as well.
-			 */
-			p->sleep_avg += sleep_time;
+	if (total_run >= p->slice || p->prio == MIN_USER_PRIO)
+		dec_bonus(p);
+	else {
+		unsigned long remainder;
 
-		}
-		if (p->sleep_avg > NS_MAX_SLEEP_AVG)
-			p->sleep_avg = NS_MAX_SLEEP_AVG;
+		p->slice -= total_run;
+		if (p->slice <= p->time_slice)
+			dec_bonus(p);
+		remainder = p->slice % rr_interval(p);
+		if (remainder)
+			p->time_slice = remainder;
 	}
+}
+
+/*
+ * recalc_task_prio - this checks for tasks that run ultra short timeslices
+ * or have just forked a thread/process and make them continue their old
+ * slice instead of starting a new one at high priority.
+ */
+static inline void recalc_task_prio(task_t *p, const unsigned long long now)
+{
+	/* Double the systime to account for missed sub-jiffy time */
+	unsigned long ns_systime = JIFFIES_TO_NS(p->systime) * 2;
+	unsigned long sleep_time = ns_diff(now, p->timestamp);
+
+	/*
+	 * Add the total for this last scheduled run (p->runtime) and system
+	 * time (p->systime) done on behalf of p to the running total so far
+	 * used (p->totalrun).
+	 */
+	p->totalrun += p->runtime + ns_systime;
 
-	return effective_prio(p);
+	/* systime is unintentionally seen as sleep, subtract it */
+	if (likely(ns_systime < sleep_time))
+		sleep_time -= ns_systime;
+	else
+		sleep_time = 0;
+
+	if (unlikely(p->flags & PF_FORKED))
+		sleep_time = 0;
+
+	/*
+	 * If we sleep longer than our running total and have not set the
+	 * PF_NONSLEEP flag we gain a bonus.
+	 */
+	if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP)) {
+		inc_bonus(p, p->totalrun, sleep_time);
+		p->totalrun = 0;
+		return;
+	}
+
+	/* We elevate priority by the amount of time we slept. */
+	p->totalrun -= sleep_time;
+	continue_slice(p);
 }
 
 /*
  * activate_task - move a task to the runqueue and do priority recalculation
  *
- * Update all the scheduling statistics stuff. (sleep average
- * calculation, priority modifiers, etc.)
+ * Update all the scheduling statistics stuff. (priority modifiers, etc.)
  */
 static void activate_task(task_t *p, runqueue_t *rq, int local)
 {
-	unsigned long long now;
+	unsigned long long now = sched_clock();
+	unsigned long rr = rr_interval(p);
 
-	now = sched_clock();
 #ifdef CONFIG_SMP
 	if (!local) {
 		/* Compensate for drifting sched_clock */
@@ -938,31 +873,13 @@
 			+ rq->timestamp_last_tick;
 	}
 #endif
-
-	if (!rt_task(p))
-		p->prio = recalc_task_prio(p, now);
-
-	/*
-	 * This checks to make sure it's not an uninterruptible task
-	 * that is now waking up.
-	 */
-	if (p->sleep_type == SLEEP_NORMAL) {
-		/*
-		 * Tasks which were woken up by interrupts (ie. hw events)
-		 * are most likely of interactive nature. So we give them
-		 * the credit of extending their sleep time to the period
-		 * of time they spend on the runqueue, waiting for execution
-		 * on a CPU, first time around:
-		 */
-		if (in_interrupt())
-			p->sleep_type = SLEEP_INTERRUPTED;
-		else {
-			/*
-			 * Normal first-time wakeups get a credit too for
-			 * on-runqueue time, but it will be weighted down:
-			 */
-			p->sleep_type = SLEEP_INTERACTIVE;
-		}
+	p->slice = slice(p);
+	p->time_slice = p->slice % rr ? : rr;
+	if (!rt_task(p)) {
+		recalc_task_prio(p, now);
+		p->flags &= ~(PF_NONSLEEP | PF_FORKED);
+		p->systime = 0;
+		p->prio = effective_prio(p);
 	}
 	p->timestamp = now;
 
@@ -975,8 +892,7 @@
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
 	dec_nr_running(p, rq);
-	dequeue_task(p, p->array);
-	p->array = NULL;
+	dequeue_task(p, rq);
 }
 
 /*
@@ -1052,7 +968,7 @@
 	 * If the task is not on a runqueue (and not running), then
 	 * it is sufficient to simply update the task's cpu field.
 	 */
-	if (!p->array && !task_running(rq, p)) {
+	if (!task_queued(p) && !task_running(rq, p)) {
 		set_task_cpu(p, dest_cpu);
 		return 0;
 	}
@@ -1082,7 +998,7 @@
 repeat:
 	rq = task_rq_lock(p, &flags);
 	/* Must be off runqueue entirely, not preempted. */
-	if (unlikely(p->array || task_running(rq, p))) {
+	if (unlikely(task_queued(p) || task_running(rq, p))) {
 		/* If it's preempted, we yield.  It could be a while. */
 		preempted = !task_running(rq, p);
 		task_rq_unlock(rq, &flags);
@@ -1258,14 +1174,9 @@
 	struct sched_domain *tmp, *sd = NULL;
 
 	for_each_domain(cpu, tmp) {
- 		/*
- 	 	 * If power savings logic is enabled for a domain, stop there.
- 	 	 */
-		if (tmp->flags & SD_POWERSAVINGS_BALANCE)
-			break;
 		if (tmp->flags & flag)
 			sd = tmp;
-	}
+        }
 
 	while (sd) {
 		cpumask_t span;
@@ -1339,6 +1250,13 @@
 }
 #endif
 
+/* Check to see if p preempts rq->curr and resched if it does. */
+static inline void preempt(const task_t *p, runqueue_t *rq)
+{
+	if (TASK_PREEMPTS_CURR(p, rq))
+		resched_task(rq->curr);
+}
+
 /***
  * try_to_wake_up - wake up a thread
  * @p: the to-be-woken-up thread
@@ -1370,7 +1288,7 @@
 	if (!(old_state & state))
 		goto out;
 
-	if (p->array)
+	if (task_queued(p))
 		goto out_running;
 
 	cpu = task_cpu(p);
@@ -1461,7 +1379,7 @@
 		old_state = p->state;
 		if (!(old_state & state))
 			goto out;
-		if (p->array)
+		if (task_queued(p))
 			goto out_running;
 
 		this_cpu = smp_processor_id();
@@ -1470,23 +1388,8 @@
 
 out_activate:
 #endif /* CONFIG_SMP */
-	if (old_state == TASK_UNINTERRUPTIBLE) {
+	if (old_state == TASK_UNINTERRUPTIBLE)
 		rq->nr_uninterruptible--;
-		/*
-		 * Tasks on involuntary sleep don't earn
-		 * sleep_avg beyond just interactive state.
-		 */
-		p->sleep_type = SLEEP_NONINTERACTIVE;
-	} else
-
-	/*
-	 * Tasks that have marked their sleep as noninteractive get
-	 * woken up with their sleep average not weighted in an
-	 * interactive way.
-	 */
-		if (old_state & TASK_NONINTERACTIVE)
-			p->sleep_type = SLEEP_NONINTERACTIVE;
-
 
 	activate_task(p, rq, cpu == this_cpu);
 	/*
@@ -1497,10 +1400,8 @@
 	 * the waker guarantees that the freshly woken up task is going
 	 * to be considered on this CPU.)
 	 */
-	if (!sync || cpu != this_cpu) {
-		if (TASK_PREEMPTS_CURR(p, rq))
-			resched_task(rq->curr);
-	}
+	if (!sync || cpu != this_cpu)
+		preempt(p, rq);
 	success = 1;
 
 out_running:
@@ -1551,7 +1452,6 @@
 	p->prio = current->normal_prio;
 
 	INIT_LIST_HEAD(&p->run_list);
-	p->array = NULL;
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (unlikely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -1563,30 +1463,6 @@
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
 #endif
-	/*
-	 * Share the timeslice between parent and child, thus the
-	 * total amount of pending timeslices in the system doesn't change,
-	 * resulting in more scheduling fairness.
-	 */
-	local_irq_disable();
-	p->time_slice = (current->time_slice + 1) >> 1;
-	/*
-	 * The remainder of the first timeslice might be recovered by
-	 * the parent if the child exits early enough.
-	 */
-	p->first_time_slice = 1;
-	current->time_slice >>= 1;
-	p->timestamp = sched_clock();
-	if (unlikely(!current->time_slice)) {
-		/*
-		 * This case is rare, it happens when the parent has only
-		 * a single jiffy left from its timeslice. Taking the
-		 * runqueue lock is not a problem.
-		 */
-		current->time_slice = 1;
-		scheduler_tick();
-	}
-	local_irq_enable();
 	put_cpu();
 }
 
@@ -1608,38 +1484,20 @@
 	this_cpu = smp_processor_id();
 	cpu = task_cpu(p);
 
-	/*
-	 * We decrease the sleep average of forking parents
-	 * and children as well, to keep max-interactive tasks
-	 * from forking tasks that are max-interactive. The parent
-	 * (current) is done further down, under its lock.
-	 */
-	p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
-		CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
-
-	p->prio = effective_prio(p);
+	/* Forked process gets no bonus to prevent fork bombs. */
+	p->bonus = 0;
+	current->flags |= PF_FORKED;
 
 	if (likely(cpu == this_cpu)) {
+		activate_task(p, rq, 1);
 		if (!(clone_flags & CLONE_VM)) {
 			/*
 			 * The VM isn't cloned, so we're in a good position to
 			 * do child-runs-first in anticipation of an exec. This
 			 * usually avoids a lot of COW overhead.
 			 */
-			if (unlikely(!current->array))
-				__activate_task(p, rq);
-			else {
-				p->prio = current->prio;
-				p->normal_prio = current->normal_prio;
-				list_add_tail(&p->run_list, &current->run_list);
-				p->array = current->array;
-				p->array->nr_active++;
-				inc_nr_running(p, rq);
-			}
 			set_need_resched();
-		} else
-			/* Run child last */
-			__activate_task(p, rq);
+		}
 		/*
 		 * We skip the following code due to cpu == this_cpu
 	 	 *
@@ -1656,53 +1514,19 @@
 		 */
 		p->timestamp = (p->timestamp - this_rq->timestamp_last_tick)
 					+ rq->timestamp_last_tick;
-		__activate_task(p, rq);
-		if (TASK_PREEMPTS_CURR(p, rq))
-			resched_task(rq->curr);
+		activate_task(p, rq, 0);
+		preempt(p, rq);
 
 		/*
 		 * Parent and child are on different CPUs, now get the
-		 * parent runqueue to update the parent's ->sleep_avg:
+		 * parent runqueue to update the parent's ->flags:
 		 */
 		task_rq_unlock(rq, &flags);
 		this_rq = task_rq_lock(current, &flags);
 	}
-	current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) *
-		PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
 	task_rq_unlock(this_rq, &flags);
 }
 
-/*
- * Potentially available exiting-child timeslices are
- * retrieved here - this way the parent does not get
- * penalized for creating too many threads.
- *
- * (this cannot be used to 'generate' timeslices
- * artificially, because any timeslice recovered here
- * was given away by the parent in the first place.)
- */
-void fastcall sched_exit(task_t *p)
-{
-	unsigned long flags;
-	runqueue_t *rq;
-
-	/*
-	 * If the child was a (relative-) CPU hog then decrease
-	 * the sleep_avg of the parent as well.
-	 */
-	rq = task_rq_lock(p->parent, &flags);
-	if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
-		p->parent->time_slice += p->time_slice;
-		if (unlikely(p->parent->time_slice > task_timeslice(p)))
-			p->parent->time_slice = task_timeslice(p);
-	}
-	if (p->sleep_avg < p->parent->sleep_avg)
-		p->parent->sleep_avg = p->parent->sleep_avg /
-		(EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg /
-		(EXIT_WEIGHT + 1);
-	task_rq_unlock(rq, &flags);
-}
-
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -1809,7 +1633,6 @@
 		WARN_ON(rq->prev_mm);
 		rq->prev_mm = oldmm;
 	}
-	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 
 	/* Here we just switch the register state and the stack. */
 	switch_to(prev, next, prev);
@@ -1892,6 +1715,9 @@
 /*
  * double_rq_lock - safely lock two runqueues
  *
+ * We must take them in cpu order to match code in
+ * dependent_sleeper and wake_dependent_sleeper.
+ *
  * Note this does not disable interrupts like task_rq_lock,
  * you need to do so manually before calling.
  */
@@ -1903,7 +1729,7 @@
 		spin_lock(&rq1->lock);
 		__acquire(rq2->lock);	/* Fake it out ;) */
 	} else {
-		if (rq1 < rq2) {
+                if (rq1 < rq2) {
 			spin_lock(&rq1->lock);
 			spin_lock(&rq2->lock);
 		} else {
@@ -1923,7 +1749,7 @@
 	__releases(rq1->lock)
 	__releases(rq2->lock)
 {
-	spin_unlock_non_nested(&rq1->lock);
+	spin_unlock(&rq1->lock);
 	if (rq1 != rq2)
 		spin_unlock(&rq2->lock);
 	else
@@ -1939,7 +1765,7 @@
 	__acquires(this_rq->lock)
 {
 	if (unlikely(!spin_trylock(&busiest->lock))) {
-		if (busiest < this_rq) {
+                if (busiest < this_rq) {
 			spin_unlock(&this_rq->lock);
 			spin_lock(&busiest->lock);
 			spin_lock(&this_rq->lock);
@@ -1997,23 +1823,21 @@
  * pull_task - move a task from a remote runqueue to the local runqueue.
  * Both runqueues must be locked.
  */
-static
-void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+static void pull_task(runqueue_t *src_rq, task_t *p, runqueue_t *this_rq,
+		      int this_cpu)
 {
-	dequeue_task(p, src_array);
+	dequeue_task(p, src_rq);
 	dec_nr_running(p, src_rq);
 	set_task_cpu(p, this_cpu);
 	inc_nr_running(p, this_rq);
-	enqueue_task(p, this_array);
+	enqueue_task(p, this_rq);
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
 	 * to be always true for them.
 	 */
-	if (TASK_PREEMPTS_CURR(p, this_rq))
-		resched_task(this_rq->curr);
+	preempt(p, this_rq);
 }
 
 /*
@@ -2060,121 +1884,106 @@
  * Called with both runqueues locked.
  */
 static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
-		      unsigned long max_nr_move, unsigned long max_load_move,
-		      struct sched_domain *sd, enum idle_type idle,
-		      int *all_pinned)
-{
-	prio_array_t *array, *dst_array;
-	struct list_head *head, *curr;
-	int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio;
-	int busiest_best_prio_seen;
-	int skip_for_load; /* skip the task based on weighted load issues */
-	long rem_load_move;
-	task_t *tmp;
-
-	if (max_nr_move == 0 || max_load_move == 0)
-		goto out;
+                      unsigned long max_nr_move, unsigned long max_load_move,
+                      struct sched_domain *sd, enum idle_type idle,
+                      int *all_pinned)
+{
+        struct list_head *head, *curr;
+        int idx, pulled = 0, pinned = 0, this_min_prio;
+        long rem_load_move;
+        task_t *tmp;
+
+        if (max_nr_move == 0 || max_load_move == 0)
+                goto out;
+
+        rem_load_move = max_load_move;
+        pinned = 1;
+        this_min_prio = this_rq->curr->prio;
 
-	rem_load_move = max_load_move;
-	pinned = 1;
-	this_best_prio = rq_best_prio(this_rq);
-	busiest_best_prio = rq_best_prio(busiest);
-	/*
-	 * Enable handling of the case where there is more than one task
-	 * with the best priority.   If the current running task is one
-	 * of those with prio==busiest_best_prio we know it won't be moved
-	 * and therefore it's safe to override the skip (based on load) of
-	 * any task we find with that prio.
-	 */
-	busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio;
-
-	/*
-	 * We first consider expired tasks. Those will likely not be
-	 * executed in the near future, and they are most likely to
-	 * be cache-cold, thus switching CPUs has the least effect
-	 * on them.
-	 */
-	if (busiest->expired->nr_active) {
-		array = busiest->expired;
-		dst_array = this_rq->expired;
-	} else {
-		array = busiest->active;
-		dst_array = this_rq->active;
-	}
-
-new_array:
-	/* Start searching at priority 0: */
-	idx = 0;
+        /* Start searching at priority 0: */
+        idx = 0;
 skip_bitmap:
-	if (!idx)
-		idx = sched_find_first_bit(array->bitmap);
-	else
-		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
-	if (idx >= MAX_PRIO) {
-		if (array == busiest->expired && busiest->active->nr_active) {
-			array = busiest->active;
-			dst_array = this_rq->active;
-			goto new_array;
-		}
-		goto out;
-	}
+        if (!idx)
+                idx = sched_find_first_bit(busiest->bitmap);
+        else
+                idx = find_next_bit(busiest->bitmap, MAX_PRIO, idx);
+        if (idx >= MAX_PRIO)
+                goto out;
 
-	head = array->queue + idx;
-	curr = head->prev;
+        head = busiest->queue + idx;
+        curr = head->prev;
 skip_queue:
-	tmp = list_entry(curr, task_t, run_list);
+        tmp = list_entry(curr, task_t, run_list);
 
-	curr = curr->prev;
+        curr = curr->prev;
 
-	/*
-	 * To help distribute high priority tasks accross CPUs we don't
-	 * skip a task if it will be the highest priority task (i.e. smallest
-	 * prio value) on its new queue regardless of its load weight
-	 */
-	skip_for_load = tmp->load_weight > rem_load_move;
-	if (skip_for_load && idx < this_best_prio)
-		skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio;
-	if (skip_for_load ||
-	    !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
-		busiest_best_prio_seen |= idx == busiest_best_prio;
-		if (curr != head)
-			goto skip_queue;
-		idx++;
-		goto skip_bitmap;
-	}
+        /*
+         * To help distribute high priority tasks accross CPUs we don't
+         * skip a task if it will be the highest priority task (i.e. smallest
+         * prio value) on its new queue regardless of its load weight
+         */
+        if ((idx >= this_min_prio && tmp->load_weight > rem_load_move) ||
+            !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
+                if (curr != head)
+                        goto skip_queue;
+                idx++;
+                goto skip_bitmap;
+        }
 
 #ifdef CONFIG_SCHEDSTATS
-	if (task_hot(tmp, busiest->timestamp_last_tick, sd))
-		schedstat_inc(sd, lb_hot_gained[idle]);
+        if (task_hot(tmp, busiest->timestamp_last_tick, sd))
+                schedstat_inc(sd, lb_hot_gained[idle]);
 #endif
 
-	pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
-	pulled++;
-	rem_load_move -= tmp->load_weight;
+        pull_task(busiest, tmp, this_rq, this_cpu);
+        pulled++;
+        rem_load_move -= tmp->load_weight;
+
+        /*
+         * We only want to steal up to the prescribed number of tasks
+         * and the prescribed amount of weighted load.
+         */
+        if (pulled < max_nr_move && rem_load_move > 0) {
+                if (idx < this_min_prio)
+                        this_min_prio = idx;
+                if (curr != head)
+                        goto skip_queue;
+                idx++;
+                goto skip_bitmap;
+        }
 
-	/*
-	 * We only want to steal up to the prescribed number of tasks
-	 * and the prescribed amount of weighted load.
-	 */
-	if (pulled < max_nr_move && rem_load_move > 0) {
-		if (idx < this_best_prio)
-			this_best_prio = idx;
-		if (curr != head)
-			goto skip_queue;
-		idx++;
-		goto skip_bitmap;
-	}
+#ifdef CONFIG_SCHEDSTATS
+        if (task_hot(tmp, busiest->timestamp_last_tick, sd))
+                schedstat_inc(sd, lb_hot_gained[idle]);
+#endif
+
+        pull_task(busiest, tmp, this_rq, this_cpu);
+        pulled++;
+        rem_load_move -= tmp->load_weight;
+
+        /*
+         * We only want to steal up to the prescribed number of tasks
+         * and the prescribed amount of weighted load.
+         */
+        if (pulled < max_nr_move && rem_load_move > 0) {
+                if (idx < this_min_prio)
+                        this_min_prio = idx;
+                if (curr != head)
+                        goto skip_queue;
+                idx++;
+                goto skip_bitmap;
+        }
 out:
-	/*
-	 * Right now, this is the only place pull_task() is called,
-	 * so we can safely collect pull_task() stats here rather than
-	 * inside pull_task().
-	 */
-	schedstat_add(sd, lb_gained[idle], pulled);
-
-	if (all_pinned)
-		*all_pinned = pinned;
-	return pulled;
+        /*
+         * Right now, this is the only place pull_task() is called,
+         * so we can safely collect pull_task() stats here rather than
+         * inside pull_task().
+         */
+        schedstat_add(sd, lb_gained[idle], pulled);
+
+        if (all_pinned)
+                *all_pinned = pinned;
+        return pulled;
 }
 
 /*
@@ -2192,12 +2001,6 @@
 	unsigned long busiest_load_per_task, busiest_nr_running;
 	unsigned long this_load_per_task, this_nr_running;
 	int load_idx;
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	int power_savings_balance = 1;
-	unsigned long leader_nr_running = 0, min_load_per_task = 0;
-	unsigned long min_nr_running = ULONG_MAX;
-	struct sched_group *group_min = NULL, *group_leader = NULL;
-#endif
 
 	max_load = this_load = total_load = total_pwr = 0;
 	busiest_load_per_task = busiest_nr_running = 0;
@@ -2210,7 +2013,7 @@
 		load_idx = sd->idle_idx;
 
 	do {
-		unsigned long load, group_capacity;
+		unsigned long load;
 		int local_group;
 		int i;
 		unsigned long sum_nr_running, sum_weighted_load;
@@ -2243,76 +2046,18 @@
 		/* Adjust by relative CPU power of the group */
 		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
 
-		group_capacity = group->cpu_power / SCHED_LOAD_SCALE;
-
 		if (local_group) {
 			this_load = avg_load;
 			this = group;
 			this_nr_running = sum_nr_running;
 			this_load_per_task = sum_weighted_load;
 		} else if (avg_load > max_load &&
-			   sum_nr_running > group_capacity) {
+			   sum_nr_running > group->cpu_power / SCHED_LOAD_SCALE) {
 			max_load = avg_load;
 			busiest = group;
 			busiest_nr_running = sum_nr_running;
 			busiest_load_per_task = sum_weighted_load;
 		}
-
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-		/*
-		 * Busy processors will not participate in power savings
-		 * balance.
-		 */
- 		if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
- 			goto group_next;
-
-		/*
-		 * If the local group is idle or completely loaded
-		 * no need to do power savings balance at this domain
-		 */
-		if (local_group && (this_nr_running >= group_capacity ||
-				    !this_nr_running))
-			power_savings_balance = 0;
-
- 		/*
-		 * If a group is already running at full capacity or idle,
-		 * don't include that group in power savings calculations
- 		 */
- 		if (!power_savings_balance || sum_nr_running >= group_capacity
-		    || !sum_nr_running)
- 			goto group_next;
-
- 		/*
-		 * Calculate the group which has the least non-idle load.
- 		 * This is the group from where we need to pick up the load
- 		 * for saving power
- 		 */
- 		if ((sum_nr_running < min_nr_running) ||
- 		    (sum_nr_running == min_nr_running &&
-		     first_cpu(group->cpumask) <
-		     first_cpu(group_min->cpumask))) {
- 			group_min = group;
- 			min_nr_running = sum_nr_running;
-			min_load_per_task = sum_weighted_load /
-						sum_nr_running;
- 		}
-
- 		/*
-		 * Calculate the group which is almost near its
- 		 * capacity but still has some space to pick up some load
- 		 * from other group and save more power
- 		 */
- 		if (sum_nr_running <= group_capacity - 1)
- 			if (sum_nr_running > leader_nr_running ||
- 			    (sum_nr_running == leader_nr_running &&
- 			     first_cpu(group->cpumask) >
- 			      first_cpu(group_leader->cpumask))) {
- 				group_leader = group;
- 				leader_nr_running = sum_nr_running;
- 			}
-
-group_next:
-#endif
 		group = group->next;
 	} while (group != sd->groups);
 
@@ -2421,16 +2166,7 @@
 	return busiest;
 
 out_balanced:
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
-		goto ret;
-
-	if (this == group_leader && group_leader != group_min) {
-		*imbalance = min_load_per_task;
-		return group_min;
-	}
-ret:
-#endif
+
 	*imbalance = 0;
 	return NULL;
 }
@@ -2483,8 +2219,7 @@
 	int active_balance = 0;
 	int sd_idle = 0;
 
-	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
-	    !sched_smt_power_savings)
+	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
 		sd_idle = 1;
 
 	schedstat_inc(sd, lb_cnt[idle]);
@@ -2573,8 +2308,7 @@
 			sd->balance_interval *= 2;
 	}
 
-	if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-	    !sched_smt_power_savings)
+	if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
 		return -1;
 	return nr_moved;
 
@@ -2589,7 +2323,7 @@
 			(sd->balance_interval < sd->max_interval))
 		sd->balance_interval *= 2;
 
-	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
 		return -1;
 	return 0;
 }
@@ -2610,7 +2344,7 @@
 	int nr_moved = 0;
 	int sd_idle = 0;
 
-	if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+	if (sd->flags & SD_SHARE_CPUPOWER)
 		sd_idle = 1;
 
 	schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
@@ -2637,7 +2371,7 @@
 		nr_moved = move_tasks(this_rq, this_cpu, busiest,
 					minus_1_or_zero(busiest->nr_running),
 					imbalance, sd, NEWLY_IDLE, NULL);
-		spin_unlock_non_nested(&busiest->lock);
+		spin_unlock(&busiest->lock);
 	}
 
 	if (!nr_moved) {
@@ -2651,7 +2385,7 @@
 
 out_balanced:
 	schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
-	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
 		return -1;
 	sd->nr_balance_failed = 0;
 	return 0;
@@ -2710,7 +2444,7 @@
 		if ((sd->flags & SD_LOAD_BALANCE) &&
 			cpu_isset(busiest_cpu, sd->span))
 				break;
-	}
+        }
 
 	if (unlikely(sd == NULL))
 		goto out;
@@ -2723,7 +2457,7 @@
 	else
 		schedstat_inc(sd, alb_failed);
 out:
-	spin_unlock_non_nested(&target_rq->lock);
+	spin_unlock(&target_rq->lock);
 }
 
 /*
@@ -2851,22 +2585,6 @@
 }
 
 /*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired:
- */
-#define EXPIRED_STARVING(rq) \
-	((STARVATION_LIMIT && ((rq)->expired_timestamp && \
-		(jiffies - (rq)->expired_timestamp >= \
-			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
-			((rq)->curr->static_prio > (rq)->best_expired_prio))
-
-/*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2915,6 +2633,7 @@
 	else
 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
 	/* Account for system time used */
+	p->systime++;
 	acct_update_integrals(p);
 }
 
@@ -2939,18 +2658,23 @@
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
 }
 
+static void time_slice_expired(task_t *p, runqueue_t *rq)
+{
+	set_tsk_need_resched(p);
+	p->time_slice = rr_interval(p);
+	requeue_task(p, rq, effective_prio(p));
+}
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
- *
- * It also gets called by the fork code, when changing the parent's
- * timeslices.
  */
 void scheduler_tick(void)
 {
 	int cpu = smp_processor_id();
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
+	unsigned long debit;
 	unsigned long long now = sched_clock();
 
 	update_cpu_clock(p, rq, now);
@@ -2965,73 +2689,37 @@
 	}
 
 	/* Task might have expired already, but not scheduled off yet */
-	if (p->array != rq->active) {
+	if (unlikely(!task_queued(p))) {
 		set_tsk_need_resched(p);
 		goto out;
 	}
-	spin_lock(&rq->lock);
 	/*
-	 * The task was running during this tick - update the
-	 * time slice counter. Note: we do not update a thread's
-	 * priority until it either goes to sleep or uses up its
-	 * timeslice. This makes it possible for interactive tasks
-	 * to use up their timeslices at their highest priority levels.
+	 * SCHED_FIFO tasks never run out of timeslice.
 	 */
-	if (rt_task(p)) {
-		/*
-		 * RR tasks need a special form of timeslice management.
-		 * FIFO tasks have no timeslices.
-		 */
-		if ((p->policy == SCHED_RR) && !--p->time_slice) {
-			p->time_slice = task_timeslice(p);
-			p->first_time_slice = 0;
-			set_tsk_need_resched(p);
+	if (unlikely(p->policy == SCHED_FIFO))
+		goto out;
 
-			/* put it at the end of the queue: */
-			requeue_task(p, rq->active);
-		}
+	spin_lock(&rq->lock);
+	debit = ns_diff(rq->timestamp_last_tick, p->timestamp);
+	p->ns_debit += debit;
+	if (p->ns_debit < NSJIFFY)
+		goto out_unlock;
+	p->ns_debit %= NSJIFFY;
+
+	/* Tasks lose bonus each time they use up a full slice(). */
+	if (!--p->slice) {
+		dec_bonus(p);
+		p->slice = slice(p);
+		time_slice_expired(p, rq);
 		goto out_unlock;
 	}
+	/*
+	 * Tasks that run out of time_slice but still have slice left get
+	 * requeued with a lower priority && RR_INTERVAL time_slice.
+	 */
 	if (!--p->time_slice) {
-		dequeue_task(p, rq->active);
-		set_tsk_need_resched(p);
-		p->prio = effective_prio(p);
-		p->time_slice = task_timeslice(p);
-		p->first_time_slice = 0;
-
-		if (!rq->expired_timestamp)
-			rq->expired_timestamp = jiffies;
-		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
-			enqueue_task(p, rq->expired);
-			if (p->static_prio < rq->best_expired_prio)
-				rq->best_expired_prio = p->static_prio;
-		} else
-			enqueue_task(p, rq->active);
-	} else {
-		/*
-		 * Prevent a too long timeslice allowing a task to monopolize
-		 * the CPU. We do this by splitting up the timeslice into
-		 * smaller pieces.
-		 *
-		 * Note: this does not mean the task's timeslices expire or
-		 * get lost in any way, they just might be preempted by
-		 * another task of equal priority. (one with higher
-		 * priority would have preempted this task already.) We
-		 * requeue this task to the end of the list on this priority
-		 * level, which is in essence a round-robin of tasks with
-		 * equal priority.
-		 *
-		 * This only applies to tasks in the interactive
-		 * delta range with at least TIMESLICE_GRANULARITY to requeue.
-		 */
-		if (TASK_INTERACTIVE(p) && !((task_timeslice(p) -
-			p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
-			(p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
-			(p->array == rq->active)) {
-
-			requeue_task(p, rq->active);
-			set_tsk_need_resched(p);
-		}
+		time_slice_expired(p, rq);
+		goto out_unlock;
 	}
 out_unlock:
 	spin_unlock(&rq->lock);
@@ -3055,37 +2743,37 @@
 	struct sched_domain *tmp, *sd = NULL;
 	int i;
 
-	for_each_domain(this_cpu, tmp) {
-		if (tmp->flags & SD_SHARE_CPUPOWER) {
+        for_each_domain(this_cpu, tmp) {
+                if (tmp->flags & SD_SHARE_CPUPOWER) {
 			sd = tmp;
-			break;
-		}
-	}
+                        break;
+                }
+        }
 
 	if (!sd)
 		return;
 
-	for_each_cpu_mask(i, sd->span) {
+        for_each_cpu_mask(i, sd->span) {
 		runqueue_t *smt_rq = cpu_rq(i);
 
-		if (i == this_cpu)
-			continue;
-		if (unlikely(!spin_trylock(&smt_rq->lock)))
-			continue;
+                if (i == this_cpu)
+                        continue;
+                if (unlikely(!spin_trylock(&smt_rq->lock)))
+                        continue;
 
 		wakeup_busy_runqueue(smt_rq);
-		spin_unlock(&smt_rq->lock);
+                spin_unlock(&smt_rq->lock);
 	}
 }
 
 /*
  * number of 'lost' timeslices this task wont be able to fully
- * utilize, if another task runs on a sibling. This models the
+ * utilise, if another task runs on a sibling. This models the
  * slowdown effect of other tasks running on siblings:
  */
 static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
 {
-	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
+	return p->slice * (100 - sd->per_cpu_gain) / 100;
 }
 
 /*
@@ -3099,35 +2787,35 @@
 	struct sched_domain *tmp, *sd = NULL;
 	int ret = 0, i;
 
-	/* kernel/rt threads do not participate in dependent sleeping */
-	if (!p->mm || rt_task(p))
-		return 0;
-
-	for_each_domain(this_cpu, tmp) {
-		if (tmp->flags & SD_SHARE_CPUPOWER) {
+        /* kernel/rt threads do not participate in dependent sleeping */
+        if (!p->mm || rt_task(p))
+                return 0;
+ 
+        for_each_domain(this_cpu, tmp) {
+                if (tmp->flags & SD_SHARE_CPUPOWER) {
 			sd = tmp;
-			break;
-		}
-	}
+                        break;
+                }
+        }
 
 	if (!sd)
 		return 0;
 
-	for_each_cpu_mask(i, sd->span) {
-		runqueue_t *smt_rq;
-		task_t *smt_curr;
+        for_each_cpu_mask(i, sd->span) {
+                runqueue_t *smt_rq;
+                task_t *smt_curr;
 
-		if (i == this_cpu)
-			continue;
+                if (i == this_cpu)
+                        continue;
 
-		smt_rq = cpu_rq(i);
-		if (unlikely(!spin_trylock(&smt_rq->lock)))
-			continue;
+                smt_rq = cpu_rq(i);
+                if (unlikely(!spin_trylock(&smt_rq->lock)))
+                        continue;
 
-		smt_curr = smt_rq->curr;
+                smt_curr = smt_rq->curr;
 
-		if (!smt_curr->mm)
-			goto unlock;
+                if (!smt_curr->mm)
+                        goto unlock;
 
 		/*
 		 * If a user task with lower static priority than the
@@ -3145,14 +2833,14 @@
 			if ((jiffies % DEF_TIMESLICE) >
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					ret = 1;
-		} else {
+		} else
 			if (smt_curr->static_prio < p->static_prio &&
 				!TASK_PREEMPTS_CURR(p, smt_rq) &&
-				smt_slice(smt_curr, sd) > task_timeslice(p))
+				smt_slice(smt_curr, sd) > slice(p))
 					ret = 1;
-		}
+
 unlock:
-		spin_unlock(&smt_rq->lock);
+                spin_unlock(&smt_rq->lock);
 	}
 	return ret;
 }
@@ -3161,8 +2849,7 @@
 {
 }
 
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq,
-					task_t *p)
+static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
 {
 	return 0;
 }
@@ -3175,13 +2862,12 @@
 	/*
 	 * Underflow?
 	 */
-	if (DEBUG_WARN_ON((preempt_count() < 0)))
-		return;
+	BUG_ON((preempt_count() < 0));
 	preempt_count() += val;
 	/*
 	 * Spinlock count overflowing soon?
 	 */
-	DEBUG_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+	BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
 }
 EXPORT_SYMBOL(add_preempt_count);
 
@@ -3190,27 +2876,17 @@
 	/*
 	 * Underflow?
 	 */
-	if (DEBUG_WARN_ON(val > preempt_count()))
-		return;
+	BUG_ON(val > preempt_count());
 	/*
 	 * Is the spinlock portion underflowing?
 	 */
-	if (DEBUG_WARN_ON((val < PREEMPT_MASK) &&
-			!(preempt_count() & PREEMPT_MASK)))
-		return;
-
+	BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK));
 	preempt_count() -= val;
 }
 EXPORT_SYMBOL(sub_preempt_count);
 
 #endif
 
-static inline int interactive_sleep(enum sleep_type sleep_type)
-{
-	return (sleep_type == SLEEP_INTERACTIVE ||
-		sleep_type == SLEEP_INTERRUPTED);
-}
-
 /*
  * schedule() is the main scheduler function.
  */
@@ -3219,11 +2895,10 @@
 	long *switch_count;
 	task_t *prev, *next;
 	runqueue_t *rq;
-	prio_array_t *array;
 	struct list_head *queue;
 	unsigned long long now;
-	unsigned long run_time;
-	int cpu, idx, new_prio;
+	unsigned long debit;
+	int cpu, idx;
 
 	/*
 	 * Test if we are atomic.  Since do_exit() needs to call into
@@ -3256,20 +2931,11 @@
 
 	schedstat_inc(rq, sched_cnt);
 	now = sched_clock();
-	if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
-		run_time = now - prev->timestamp;
-		if (unlikely((long long)(now - prev->timestamp) < 0))
-			run_time = 0;
-	} else
-		run_time = NS_MAX_SLEEP_AVG;
-
-	/*
-	 * Tasks charged proportionately less run_time at high sleep_avg to
-	 * delay them losing their interactive status
-	 */
-	run_time /= (CURRENT_BONUS(prev) ? : 1);
 
 	spin_lock_irq(&rq->lock);
+	prev->runtime = ns_diff(now, prev->timestamp);
+	debit = ns_diff(now, rq->timestamp_last_tick) % NSJIFFY;
+	prev->ns_debit += debit;
 
 	if (unlikely(prev->flags & PF_DEAD))
 		prev->state = EXIT_DEAD;
@@ -3281,8 +2947,10 @@
 				unlikely(signal_pending(prev))))
 			prev->state = TASK_RUNNING;
 		else {
-			if (prev->state == TASK_UNINTERRUPTIBLE)
+			if (prev->state == TASK_UNINTERRUPTIBLE) {
+				prev->flags |= PF_NONSLEEP;
 				rq->nr_uninterruptible++;
+			}
 			deactivate_task(prev, rq);
 		}
 	}
@@ -3292,64 +2960,30 @@
 		idle_balance(cpu, rq);
 		if (!rq->nr_running) {
 			next = rq->idle;
-			rq->expired_timestamp = 0;
-			wake_sleeping_dependent(cpu);
+                        wake_sleeping_dependent(cpu);
 			goto switch_tasks;
 		}
 	}
 
-	array = rq->active;
-	if (unlikely(!array->nr_active)) {
-		/*
-		 * Switch the active and expired arrays.
-		 */
-		schedstat_inc(rq, sched_switch);
-		rq->active = rq->expired;
-		rq->expired = array;
-		array = rq->active;
-		rq->expired_timestamp = 0;
-		rq->best_expired_prio = MAX_PRIO;
-	}
-
-	idx = sched_find_first_bit(array->bitmap);
-	queue = array->queue + idx;
+	idx = sched_find_first_bit(rq->bitmap);
+	queue = rq->queue + idx;
 	next = list_entry(queue->next, task_t, run_list);
 
-	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
-		unsigned long long delta = now - next->timestamp;
-		if (unlikely((long long)(now - next->timestamp) < 0))
-			delta = 0;
-
-		if (next->sleep_type == SLEEP_INTERACTIVE)
-			delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
-
-		array = next->array;
-		new_prio = recalc_task_prio(next, next->timestamp + delta);
-
-		if (unlikely(next->prio != new_prio)) {
-			dequeue_task(next, array);
-			next->prio = new_prio;
-			enqueue_task(next, array);
-		}
-	}
-	next->sleep_type = SLEEP_NORMAL;
-	if (dependent_sleeper(cpu, rq, next))
-		next = rq->idle;
+        if (dependent_sleeper(cpu, rq, next))
+                next = rq->idle;
 switch_tasks:
 	if (next == rq->idle)
 		schedstat_inc(rq, sched_goidle);
-	prefetch(next);
-	prefetch_stack(next);
+        else {
+                prefetch(next);
+                prefetch_stack(next);
+        }
+	prev->timestamp = now;
 	clear_tsk_need_resched(prev);
 	rcu_qsctr_inc(task_cpu(prev));
 
 	update_cpu_clock(prev, rq, now);
 
-	prev->sleep_avg -= run_time;
-	if ((long)prev->sleep_avg <= 0)
-		prev->sleep_avg = 0;
-	prev->timestamp = prev->last_ran = now;
-
 	sched_info_switch(prev, next);
 	if (likely(prev != next)) {
 		next->timestamp = now;
@@ -3562,8 +3196,8 @@
 
 void init_completion(struct completion *x)
 {
-	x->done = 0;
-	__init_waitqueue_head(&x->wait);
+        x->done = 0;
+        __init_waitqueue_head(&x->wait);
 }
 
 EXPORT_SYMBOL(init_completion);
@@ -3801,28 +3435,20 @@
 void rt_mutex_setprio(task_t *p, int prio)
 {
 	unsigned long flags;
-	prio_array_t *array;
 	runqueue_t *rq;
-	int oldprio;
+	int oldprio, queued;
 
 	BUG_ON(prio < 0 || prio > MAX_PRIO);
 
 	rq = task_rq_lock(p, &flags);
 
 	oldprio = p->prio;
-	array = p->array;
-	if (array)
-		dequeue_task(p, array);
+	if ((queued = task_queued(p)))
+		dequeue_task(p, rq);
 	p->prio = prio;
 
-	if (array) {
-		/*
-		 * If changing to an RT priority then queue it
-		 * in the active array!
-		 */
-		if (rt_task(p))
-			array = rq->active;
-		enqueue_task(p, array);
+	if (queued) {
+		enqueue_task(p, rq);
 		/*
 		 * Reschedule if we are currently running on this runqueue and
 		 * our priority decreased, or if we are not currently running on
@@ -3831,8 +3457,8 @@
 		if (task_running(rq, p)) {
 			if (p->prio > oldprio)
 				resched_task(rq->curr);
-		} else if (TASK_PREEMPTS_CURR(p, rq))
-			resched_task(rq->curr);
+		} else
+			preempt(p, rq);
 	}
 	task_rq_unlock(rq, &flags);
 }
@@ -3842,9 +3468,8 @@
 void set_user_nice(task_t *p, long nice)
 {
 	unsigned long flags;
-	prio_array_t *array;
 	runqueue_t *rq;
-	int old_prio, delta;
+	int queued, old_prio, new_prio, delta;
 
 	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
 		return;
@@ -3859,24 +3484,26 @@
 	 * it wont have any effect on scheduling until the task is
 	 * not SCHED_NORMAL/SCHED_BATCH:
 	 */
-	if (has_rt_policy(p)) {
+	if (rt_task(p)) {
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
-	array = p->array;
-	if (array) {
-		dequeue_task(p, array);
+	if ((queued = task_queued(p))) {
+		dequeue_task(p, rq);
 		dec_raw_weighted_load(rq, p);
 	}
 
+	old_prio = p->prio;
+	new_prio = NICE_TO_PRIO(nice);
+	delta = new_prio - old_prio;
 	p->static_prio = NICE_TO_PRIO(nice);
 	set_load_weight(p);
-	old_prio = p->prio;
-	p->prio = effective_prio(p);
-	delta = p->prio - old_prio;
+	p->prio += delta;
+	if (p->bonus > bonus(p))
+		p->bonus= bonus(p);
 
-	if (array) {
-		enqueue_task(p, array);
+	if (queued) {
+		enqueue_task(p, rq);
 		inc_raw_weighted_load(rq, p);
 		/*
 		 * If the task increased its priority or is running and
@@ -3888,6 +3515,7 @@
 out_unlock:
 	task_rq_unlock(rq, &flags);
 }
+
 EXPORT_SYMBOL(set_user_nice);
 
 /*
@@ -3999,17 +3627,12 @@
 /* Actually do priority change: must hold rq lock. */
 static void __setscheduler(struct task_struct *p, int policy, int prio)
 {
-	BUG_ON(p->array);
+	BUG_ON(task_queued(p));
 	p->policy = policy;
 	p->rt_priority = prio;
 	p->normal_prio = normal_prio(p);
 	/* we are holding p->pi_lock already */
 	p->prio = rt_mutex_getprio(p);
-	/*
-	 * SCHED_BATCH tasks are treated as perpetual CPU hogs:
-	 */
-	if (policy == SCHED_BATCH)
-		p->sleep_avg = 0;
 	set_load_weight(p);
 }
 
@@ -4024,8 +3647,7 @@
 		       struct sched_param *param)
 {
 	int retval;
-	int oldprio, oldpolicy = -1;
-	prio_array_t *array;
+	int queued, oldprio, oldpolicy = -1;
 	unsigned long flags;
 	runqueue_t *rq;
 
@@ -4093,12 +3715,11 @@
 		spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
-	array = p->array;
-	if (array)
+	if ((queued = task_queued(p)))
 		deactivate_task(p, rq);
 	oldprio = p->prio;
 	__setscheduler(p, policy, param->sched_priority);
-	if (array) {
+	if (queued) {
 		__activate_task(p, rq);
 		/*
 		 * Reschedule if we are currently running on this runqueue and
@@ -4108,8 +3729,8 @@
 		if (task_running(rq, p)) {
 			if (p->prio > oldprio)
 				resched_task(rq->curr);
-		} else if (TASK_PREEMPTS_CURR(p, rq))
-			resched_task(rq->curr);
+		} else
+			preempt(p, rq);
 	}
 	__task_rq_unlock(rq);
 	spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -4368,50 +3989,28 @@
 
 /**
  * sys_sched_yield - yield the current processor to other threads.
- *
- * this function yields the current CPU by moving the calling thread
- * to the expired array. If there are no other threads running on this
- * CPU then this function will return.
+ * This function yields the current CPU by dropping the priority of current
+ * to the lowest priority.
  */
 asmlinkage long sys_sched_yield(void)
 {
+	int newprio;
 	runqueue_t *rq = this_rq_lock();
-	prio_array_t *array = current->array;
-	prio_array_t *target = rq->expired;
 
+	newprio = current->prio;
 	schedstat_inc(rq, yld_cnt);
-	/*
-	 * We implement yielding by moving the task into the expired
-	 * queue.
-	 *
-	 * (special rule: RT tasks will just roundrobin in the active
-	 *  array.)
-	 */
-	if (rt_task(current))
-		target = rq->active;
+	current->slice = slice(current);
+	current->time_slice = rr_interval(current);
+	if (likely(!rt_task(current)))
+		newprio = MIN_USER_PRIO;
 
-	if (array->nr_active == 1) {
-		schedstat_inc(rq, yld_act_empty);
-		if (!rq->expired->nr_active)
-			schedstat_inc(rq, yld_both_empty);
-	} else if (!rq->expired->nr_active)
-		schedstat_inc(rq, yld_exp_empty);
-
-	if (array != target) {
-		dequeue_task(current, array);
-		enqueue_task(current, target);
-	} else
-		/*
-		 * requeue_task is cheaper so perform that if possible.
-		 */
-		requeue_task(current, array);
+	requeue_task(current, rq, newprio);
 
 	/*
 	 * Since we are going to call schedule() anyway, there's
 	 * no need to preempt or enable interrupts:
 	 */
 	__release(rq->lock);
-	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 	_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();
 
@@ -4471,7 +4070,6 @@
 		spin_lock(lock);
 	}
 	if (need_resched()) {
-		spin_release(&lock->dep_map, 1, _THIS_IP_);
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
 		__cond_resched();
@@ -4488,9 +4086,7 @@
 	BUG_ON(!in_softirq());
 
 	if (need_resched()) {
-		raw_local_irq_disable();
-		_local_bh_enable();
-		raw_local_irq_enable();
+		__local_bh_enable();
 		__cond_resched();
 		local_bh_disable();
 		return 1;
@@ -4624,7 +4220,7 @@
 		goto out_unlock;
 
 	jiffies_to_timespec(p->policy & SCHED_FIFO ?
-				0 : task_timeslice(p), &t);
+				0 : slice(p), &t);
 	read_unlock(&tasklist_lock);
 	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
 out_nounlock:
@@ -4747,8 +4343,6 @@
 	unsigned long flags;
 
 	idle->timestamp = sched_clock();
-	idle->sleep_avg = 0;
-	idle->array = NULL;
 	idle->prio = idle->normal_prio = MAX_PRIO;
 	idle->state = TASK_RUNNING;
 	idle->cpus_allowed = cpumask_of_cpu(cpu);
@@ -4865,7 +4459,7 @@
 		goto out;
 
 	set_task_cpu(p, dest_cpu);
-	if (p->array) {
+	if (task_queued(p)) {
 		/*
 		 * Sync timestamp with rq_dest's before activating.
 		 * The same thing could be achieved by doing this step
@@ -4876,8 +4470,7 @@
 				+ rq_dest->timestamp_last_tick;
 		deactivate_task(p, rq_src);
 		activate_task(p, rq_dest, 0);
-		if (TASK_PREEMPTS_CURR(p, rq_dest))
-			resched_task(rq_dest->curr);
+		preempt(p, rq_dest);
 	}
 
 out:
@@ -5091,7 +4684,7 @@
 
 	for (arr = 0; arr < 2; arr++) {
 		for (i = 0; i < MAX_PRIO; i++) {
-			struct list_head *list = &rq->arrays[arr].queue[i];
+			struct list_head *list = &rq->queue[i];
 			while (!list_empty(list))
 				migrate_dead(dead_cpu,
 					     list_entry(list->next, task_t,
@@ -6089,7 +5682,6 @@
 }
 #endif
 
-int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 /*
  * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
  * can switch it on easily if needed.
@@ -6471,72 +6063,37 @@
 #endif
 
 	/* Calculate CPU power for physical packages and nodes */
-#ifdef CONFIG_SCHED_SMT
 	for_each_cpu_mask(i, *cpu_map) {
+		int power;
 		struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
 		sd = &per_cpu(cpu_domains, i);
-		sd->groups->cpu_power = SCHED_LOAD_SCALE;
-	}
+		power = SCHED_LOAD_SCALE;
+		sd->groups->cpu_power = power;
 #endif
 #ifdef CONFIG_SCHED_MC
-	for_each_cpu_mask(i, *cpu_map) {
-		int power;
-		struct sched_domain *sd;
 		sd = &per_cpu(core_domains, i);
-		if (sched_smt_power_savings)
-			power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
-		else
-			power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
+		power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
 					    * SCHED_LOAD_SCALE / 10;
 		sd->groups->cpu_power = power;
-	}
-#endif
 
-	for_each_cpu_mask(i, *cpu_map) {
-		struct sched_domain *sd;
-#ifdef CONFIG_SCHED_MC
 		sd = &per_cpu(phys_domains, i);
-		if (i != first_cpu(sd->groups->cpumask))
-			continue;
 
-		sd->groups->cpu_power = 0;
-		if (sched_mc_power_savings || sched_smt_power_savings) {
-			int j;
-
- 			for_each_cpu_mask(j, sd->groups->cpumask) {
-				struct sched_domain *sd1;
- 				sd1 = &per_cpu(core_domains, j);
- 				/*
- 			 	 * for each core we will add once
- 				 * to the group in physical domain
- 			 	 */
-  	 			if (j != first_cpu(sd1->groups->cpumask))
- 					continue;
-
- 				if (sched_smt_power_savings)
-   					sd->groups->cpu_power += sd1->groups->cpu_power;
- 				else
-   					sd->groups->cpu_power += SCHED_LOAD_SCALE;
-   			}
- 		} else
- 			/*
- 			 * This has to be < 2 * SCHED_LOAD_SCALE
- 			 * Lets keep it SCHED_LOAD_SCALE, so that
- 			 * while calculating NUMA group's cpu_power
- 			 * we can simply do
- 			 *  numa_group->cpu_power += phys_group->cpu_power;
- 			 *
- 			 * See "only add power once for each physical pkg"
- 			 * comment below
- 			 */
- 			sd->groups->cpu_power = SCHED_LOAD_SCALE;
+ 		/*
+ 		 * This has to be < 2 * SCHED_LOAD_SCALE
+ 		 * Lets keep it SCHED_LOAD_SCALE, so that
+ 		 * while calculating NUMA group's cpu_power
+ 		 * we can simply do
+ 		 *  numa_group->cpu_power += phys_group->cpu_power;
+ 		 *
+ 		 * See "only add power once for each physical pkg"
+ 		 * comment below
+ 		 */
+ 		sd->groups->cpu_power = SCHED_LOAD_SCALE;
 #else
-		int power;
 		sd = &per_cpu(phys_domains, i);
-		if (sched_smt_power_savings)
-			power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
-		else
-			power = SCHED_LOAD_SCALE;
+		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+				(cpus_weight(sd->groups->cpumask)-1) / 10;
 		sd->groups->cpu_power = power;
 #endif
 	}
@@ -6637,80 +6194,6 @@
 	return err;
 }
 
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
-{
-	int err;
-
-	lock_cpu_hotplug();
-	detach_destroy_domains(&cpu_online_map);
-	err = arch_init_sched_domains(&cpu_online_map);
-	unlock_cpu_hotplug();
-
-	return err;
-}
-
-static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
-{
-	int ret;
-
-	if (buf[0] != '0' && buf[0] != '1')
-		return -EINVAL;
-
-	if (smt)
-		sched_smt_power_savings = (buf[0] == '1');
-	else
-		sched_mc_power_savings = (buf[0] == '1');
-
-	ret = arch_reinit_sched_domains();
-
-	return ret ? ret : count;
-}
-
-int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
-{
-	int err = 0;
-#ifdef CONFIG_SCHED_SMT
-	if (smt_capable())
-		err = sysfs_create_file(&cls->kset.kobj,
-					&attr_sched_smt_power_savings.attr);
-#endif
-#ifdef CONFIG_SCHED_MC
-	if (!err && mc_capable())
-		err = sysfs_create_file(&cls->kset.kobj,
-					&attr_sched_mc_power_savings.attr);
-#endif
-	return err;
-}
-#endif
-
-#ifdef CONFIG_SCHED_MC
-static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
-{
-	return sprintf(page, "%u\n", sched_mc_power_savings);
-}
-static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
-{
-	return sched_power_savings_store(buf, count, 0);
-}
-SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
-	    sched_mc_power_savings_store);
-#endif
-
-#ifdef CONFIG_SCHED_SMT
-static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
-{
-	return sprintf(page, "%u\n", sched_smt_power_savings);
-}
-static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
-{
-	return sched_power_savings_store(buf, count, 1);
-}
-SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
-	    sched_smt_power_savings_store);
-#endif
-
-
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Force a reinitialization of the sched domains hierarchy.  The domains
@@ -6773,17 +6256,13 @@
 void __init sched_init(void)
 {
 	runqueue_t *rq;
-	int i, j, k;
+	int i, j;
 
 	for_each_possible_cpu(i) {
-		prio_array_t *array;
 
 		rq = cpu_rq(i);
-		spin_lock_init_static(&rq->lock);
+		spin_lock_init(&rq->lock);
 		rq->nr_running = 0;
-		rq->active = rq->arrays;
-		rq->expired = rq->arrays + 1;
-		rq->best_expired_prio = MAX_PRIO;
 
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
@@ -6795,16 +6274,11 @@
 		INIT_LIST_HEAD(&rq->migration_queue);
 #endif
 		atomic_set(&rq->nr_iowait, 0);
-
-		for (j = 0; j < 2; j++) {
-			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
-				INIT_LIST_HEAD(array->queue + k);
-				__clear_bit(k, array->bitmap);
-			}
-			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
-		}
+		for (j = 0; j < MAX_PRIO; j++)
+			INIT_LIST_HEAD(&rq->queue[j]);
+		memset(rq->bitmap, 0, BITS_TO_LONGS(MAX_PRIO)*sizeof(long));
+		/* delimiter for bitsearch */
+		__set_bit(MAX_PRIO, rq->bitmap);
 	}
 
 	set_load_weight(&init_task);
@@ -6852,9 +6326,9 @@
 void normalize_rt_tasks(void)
 {
 	struct task_struct *p;
-	prio_array_t *array;
 	unsigned long flags;
 	runqueue_t *rq;
+	int queued;
 
 	read_lock_irq(&tasklist_lock);
 	for_each_process(p) {
@@ -6864,11 +6338,10 @@
 		spin_lock_irqsave(&p->pi_lock, flags);
 		rq = __task_rq_lock(p);
 
-		array = p->array;
-		if (array)
+		if ((queued = task_queued(p)))
 			deactivate_task(p, task_rq(p));
 		__setscheduler(p, SCHED_NORMAL, 0);
-		if (array) {
+		if (queued) {
 			__activate_task(p, task_rq(p));
 			resched_task(rq->curr);
 		}