diff -urN oldtree/fs/proc/array.c newtree/fs/proc/array.c --- oldtree/fs/proc/array.c 2006-06-07 15:35:02.301155500 +0000 +++ newtree/fs/proc/array.c 2006-06-08 16:26:18.309095750 +0000 @@ -166,7 +166,7 @@ read_lock(&tasklist_lock); buffer += sprintf(buffer, "State:\t%s\n" - "SleepAVG:\t%lu%%\n" + "Bonus:\t%d\n" "Tgid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" @@ -174,7 +174,7 @@ "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), - (p->sleep_avg/1024)*100/(1020000000/1024), + p->bonus, p->tgid, p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, pid_alive(p) && p->ptrace ? p->parent->pid : 0, diff -urN oldtree/include/linux/dwarf2-defs.h newtree/include/linux/dwarf2-defs.h --- oldtree/include/linux/dwarf2-defs.h 1970-01-01 00:00:00.000000000 +0000 +++ newtree/include/linux/dwarf2-defs.h 2006-06-08 16:26:37.214277250 +0000 @@ -0,0 +1,515 @@ +#ifndef _ELF_DWARF_H +/* Machine generated from dwarf2.h by scripts/dwarfh.awk */ +#define _ELF_DWARF2_H +#define DW_TAG_padding 0x00 +#define DW_TAG_array_type 0x01 +#define DW_TAG_class_type 0x02 +#define DW_TAG_entry_point 0x03 +#define DW_TAG_enumeration_type 0x04 +#define DW_TAG_formal_parameter 0x05 +#define DW_TAG_imported_declaration 0x08 +#define DW_TAG_label 0x0a +#define DW_TAG_lexical_block 0x0b +#define DW_TAG_member 0x0d +#define DW_TAG_pointer_type 0x0f +#define DW_TAG_reference_type 0x10 +#define DW_TAG_compile_unit 0x11 +#define DW_TAG_string_type 0x12 +#define DW_TAG_structure_type 0x13 +#define DW_TAG_subroutine_type 0x15 +#define DW_TAG_typedef 0x16 +#define DW_TAG_union_type 0x17 +#define DW_TAG_unspecified_parameters 0x18 +#define DW_TAG_variant 0x19 +#define DW_TAG_common_block 0x1a +#define DW_TAG_common_inclusion 0x1b +#define DW_TAG_inheritance 0x1c +#define DW_TAG_inlined_subroutine 0x1d +#define DW_TAG_module 0x1e +#define DW_TAG_ptr_to_member_type 0x1f +#define DW_TAG_set_type 0x20 +#define DW_TAG_subrange_type 0x21 +#define DW_TAG_with_stmt 0x22 +#define DW_TAG_access_declaration 0x23 +#define DW_TAG_base_type 0x24 +#define DW_TAG_catch_block 0x25 +#define DW_TAG_const_type 0x26 +#define DW_TAG_constant 0x27 +#define DW_TAG_enumerator 0x28 +#define DW_TAG_file_type 0x29 +#define DW_TAG_friend 0x2a +#define DW_TAG_namelist 0x2b +#define DW_TAG_namelist_item 0x2c +#define DW_TAG_packed_type 0x2d +#define DW_TAG_subprogram 0x2e +#define DW_TAG_template_type_param 0x2f +#define DW_TAG_template_value_param 0x30 +#define DW_TAG_thrown_type 0x31 +#define DW_TAG_try_block 0x32 +#define DW_TAG_variant_part 0x33 +#define DW_TAG_variable 0x34 +#define DW_TAG_volatile_type 0x35 +#define DW_TAG_dwarf_procedure 0x36 +#define DW_TAG_restrict_type 0x37 +#define DW_TAG_interface_type 0x38 +#define DW_TAG_namespace 0x39 +#define DW_TAG_imported_module 0x3a +#define DW_TAG_unspecified_type 0x3b +#define DW_TAG_partial_unit 0x3c +#define DW_TAG_imported_unit 0x3d +#define DW_TAG_MIPS_loop 0x4081 +#define DW_TAG_HP_array_descriptor 0x4090 +#define DW_TAG_format_label 0x4101 +#define DW_TAG_function_template 0x4102 +#define DW_TAG_class_template 0x4103 +#define DW_TAG_GNU_BINCL 0x4104 +#define DW_TAG_GNU_EINCL 0x4105 +#define DW_TAG_upc_shared_type 0x8765 +#define DW_TAG_upc_strict_type 0x8766 +#define DW_TAG_upc_relaxed_type 0x8767 +#define DW_TAG_PGI_kanji_type 0xA000 +#define DW_TAG_PGI_interface_block 0xA020 +#define DW_TAG_lo_user 0x4080 +#define DW_TAG_hi_user 0xffff +#define DW_children_no 0 +#define DW_children_yes 1 +#define DW_FORM_addr 0x01 +#define DW_FORM_block2 0x03 +#define DW_FORM_block4 0x04 +#define DW_FORM_data2 0x05 +#define DW_FORM_data4 0x06 +#define DW_FORM_data8 0x07 +#define DW_FORM_string 0x08 +#define DW_FORM_block 0x09 +#define DW_FORM_block1 0x0a +#define DW_FORM_data1 0x0b +#define DW_FORM_flag 0x0c +#define DW_FORM_sdata 0x0d +#define DW_FORM_strp 0x0e +#define DW_FORM_udata 0x0f +#define DW_FORM_ref_addr 0x10 +#define DW_FORM_ref1 0x11 +#define DW_FORM_ref2 0x12 +#define DW_FORM_ref4 0x13 +#define DW_FORM_ref8 0x14 +#define DW_FORM_ref_udata 0x15 +#define DW_FORM_indirect 0x16 +#define DW_AT_sibling 0x01 +#define DW_AT_location 0x02 +#define DW_AT_name 0x03 +#define DW_AT_ordering 0x09 +#define DW_AT_subscr_data 0x0a +#define DW_AT_byte_size 0x0b +#define DW_AT_bit_offset 0x0c +#define DW_AT_bit_size 0x0d +#define DW_AT_element_list 0x0f +#define DW_AT_stmt_list 0x10 +#define DW_AT_low_pc 0x11 +#define DW_AT_high_pc 0x12 +#define DW_AT_language 0x13 +#define DW_AT_member 0x14 +#define DW_AT_discr 0x15 +#define DW_AT_discr_value 0x16 +#define DW_AT_visibility 0x17 +#define DW_AT_import 0x18 +#define DW_AT_string_length 0x19 +#define DW_AT_common_reference 0x1a +#define DW_AT_comp_dir 0x1b +#define DW_AT_const_value 0x1c +#define DW_AT_containing_type 0x1d +#define DW_AT_default_value 0x1e +#define DW_AT_inline 0x20 +#define DW_AT_is_optional 0x21 +#define DW_AT_lower_bound 0x22 +#define DW_AT_producer 0x25 +#define DW_AT_prototyped 0x27 +#define DW_AT_return_addr 0x2a +#define DW_AT_start_scope 0x2c +#define DW_AT_stride_size 0x2e +#define DW_AT_upper_bound 0x2f +#define DW_AT_abstract_origin 0x31 +#define DW_AT_accessibility 0x32 +#define DW_AT_address_class 0x33 +#define DW_AT_artificial 0x34 +#define DW_AT_base_types 0x35 +#define DW_AT_calling_convention 0x36 +#define DW_AT_count 0x37 +#define DW_AT_data_member_location 0x38 +#define DW_AT_decl_column 0x39 +#define DW_AT_decl_file 0x3a +#define DW_AT_decl_line 0x3b +#define DW_AT_declaration 0x3c +#define DW_AT_discr_list 0x3d +#define DW_AT_encoding 0x3e +#define DW_AT_external 0x3f +#define DW_AT_frame_base 0x40 +#define DW_AT_friend 0x41 +#define DW_AT_identifier_case 0x42 +#define DW_AT_macro_info 0x43 +#define DW_AT_namelist_items 0x44 +#define DW_AT_priority 0x45 +#define DW_AT_segment 0x46 +#define DW_AT_specification 0x47 +#define DW_AT_static_link 0x48 +#define DW_AT_type 0x49 +#define DW_AT_use_location 0x4a +#define DW_AT_variable_parameter 0x4b +#define DW_AT_virtuality 0x4c +#define DW_AT_vtable_elem_location 0x4d +#define DW_AT_allocated 0x4e +#define DW_AT_associated 0x4f +#define DW_AT_data_location 0x50 +#define DW_AT_stride 0x51 +#define DW_AT_entry_pc 0x52 +#define DW_AT_use_UTF8 0x53 +#define DW_AT_extension 0x54 +#define DW_AT_ranges 0x55 +#define DW_AT_trampoline 0x56 +#define DW_AT_call_column 0x57 +#define DW_AT_call_file 0x58 +#define DW_AT_call_line 0x59 +#define DW_AT_MIPS_fde 0x2001 +#define DW_AT_MIPS_loop_begin 0x2002 +#define DW_AT_MIPS_tail_loop_begin 0x2003 +#define DW_AT_MIPS_epilog_begin 0x2004 +#define DW_AT_MIPS_loop_unroll_factor 0x2005 +#define DW_AT_MIPS_software_pipeline_depth 0x2006 +#define DW_AT_MIPS_linkage_name 0x2007 +#define DW_AT_MIPS_stride 0x2008 +#define DW_AT_MIPS_abstract_name 0x2009 +#define DW_AT_MIPS_clone_origin 0x200a +#define DW_AT_MIPS_has_inlines 0x200b +#define DW_AT_HP_block_index 0x2000 +#define DW_AT_HP_unmodifiable 0x2001 +#define DW_AT_HP_actuals_stmt_list 0x2010 +#define DW_AT_HP_proc_per_section 0x2011 +#define DW_AT_HP_raw_data_ptr 0x2012 +#define DW_AT_HP_pass_by_reference 0x2013 +#define DW_AT_HP_opt_level 0x2014 +#define DW_AT_HP_prof_version_id 0x2015 +#define DW_AT_HP_opt_flags 0x2016 +#define DW_AT_HP_cold_region_low_pc 0x2017 +#define DW_AT_HP_cold_region_high_pc 0x2018 +#define DW_AT_HP_all_variables_modifiable 0x2019 +#define DW_AT_HP_linkage_name 0x201a +#define DW_AT_HP_prof_flags 0x201b +#define DW_AT_sf_names 0x2101 +#define DW_AT_src_info 0x2102 +#define DW_AT_mac_info 0x2103 +#define DW_AT_src_coords 0x2104 +#define DW_AT_body_begin 0x2105 +#define DW_AT_body_end 0x2106 +#define DW_AT_GNU_vector 0x2107 +#define DW_AT_VMS_rtnbeg_pd_address 0x2201 +#define DW_AT_upc_threads_scaled 0x3210 +#define DW_AT_PGI_lbase 0x3a00 +#define DW_AT_PGI_soffset 0x3a01 +#define DW_AT_PGI_lstride 0x3a02 +#define DW_AT_lo_user 0x2000 /* Implementation-defined range start. */ +#define DW_AT_hi_user 0x3ff0 /* Implementation-defined range end. */ +#define DW_OP_addr 0x03 +#define DW_OP_deref 0x06 +#define DW_OP_const1u 0x08 +#define DW_OP_const1s 0x09 +#define DW_OP_const2u 0x0a +#define DW_OP_const2s 0x0b +#define DW_OP_const4u 0x0c +#define DW_OP_const4s 0x0d +#define DW_OP_const8u 0x0e +#define DW_OP_const8s 0x0f +#define DW_OP_constu 0x10 +#define DW_OP_consts 0x11 +#define DW_OP_dup 0x12 +#define DW_OP_drop 0x13 +#define DW_OP_over 0x14 +#define DW_OP_pick 0x15 +#define DW_OP_swap 0x16 +#define DW_OP_rot 0x17 +#define DW_OP_xderef 0x18 +#define DW_OP_abs 0x19 +#define DW_OP_and 0x1a +#define DW_OP_div 0x1b +#define DW_OP_minus 0x1c +#define DW_OP_mod 0x1d +#define DW_OP_mul 0x1e +#define DW_OP_neg 0x1f +#define DW_OP_not 0x20 +#define DW_OP_or 0x21 +#define DW_OP_plus 0x22 +#define DW_OP_plus_uconst 0x23 +#define DW_OP_shl 0x24 +#define DW_OP_shr 0x25 +#define DW_OP_shra 0x26 +#define DW_OP_xor 0x27 +#define DW_OP_bra 0x28 +#define DW_OP_eq 0x29 +#define DW_OP_ge 0x2a +#define DW_OP_gt 0x2b +#define DW_OP_le 0x2c +#define DW_OP_lt 0x2d +#define DW_OP_ne 0x2e +#define DW_OP_skip 0x2f +#define DW_OP_lit0 0x30 +#define DW_OP_lit1 0x31 +#define DW_OP_lit2 0x32 +#define DW_OP_lit3 0x33 +#define DW_OP_lit4 0x34 +#define DW_OP_lit5 0x35 +#define DW_OP_lit6 0x36 +#define DW_OP_lit7 0x37 +#define DW_OP_lit8 0x38 +#define DW_OP_lit9 0x39 +#define DW_OP_lit10 0x3a +#define DW_OP_lit11 0x3b +#define DW_OP_lit12 0x3c +#define DW_OP_lit13 0x3d +#define DW_OP_lit14 0x3e +#define DW_OP_lit15 0x3f +#define DW_OP_lit16 0x40 +#define DW_OP_lit17 0x41 +#define DW_OP_lit18 0x42 +#define DW_OP_lit19 0x43 +#define DW_OP_lit20 0x44 +#define DW_OP_lit21 0x45 +#define DW_OP_lit22 0x46 +#define DW_OP_lit23 0x47 +#define DW_OP_lit24 0x48 +#define DW_OP_lit25 0x49 +#define DW_OP_lit26 0x4a +#define DW_OP_lit27 0x4b +#define DW_OP_lit28 0x4c +#define DW_OP_lit29 0x4d +#define DW_OP_lit30 0x4e +#define DW_OP_lit31 0x4f +#define DW_OP_reg0 0x50 +#define DW_OP_reg1 0x51 +#define DW_OP_reg2 0x52 +#define DW_OP_reg3 0x53 +#define DW_OP_reg4 0x54 +#define DW_OP_reg5 0x55 +#define DW_OP_reg6 0x56 +#define DW_OP_reg7 0x57 +#define DW_OP_reg8 0x58 +#define DW_OP_reg9 0x59 +#define DW_OP_reg10 0x5a +#define DW_OP_reg11 0x5b +#define DW_OP_reg12 0x5c +#define DW_OP_reg13 0x5d +#define DW_OP_reg14 0x5e +#define DW_OP_reg15 0x5f +#define DW_OP_reg16 0x60 +#define DW_OP_reg17 0x61 +#define DW_OP_reg18 0x62 +#define DW_OP_reg19 0x63 +#define DW_OP_reg20 0x64 +#define DW_OP_reg21 0x65 +#define DW_OP_reg22 0x66 +#define DW_OP_reg23 0x67 +#define DW_OP_reg24 0x68 +#define DW_OP_reg25 0x69 +#define DW_OP_reg26 0x6a +#define DW_OP_reg27 0x6b +#define DW_OP_reg28 0x6c +#define DW_OP_reg29 0x6d +#define DW_OP_reg30 0x6e +#define DW_OP_reg31 0x6f +#define DW_OP_breg0 0x70 +#define DW_OP_breg1 0x71 +#define DW_OP_breg2 0x72 +#define DW_OP_breg3 0x73 +#define DW_OP_breg4 0x74 +#define DW_OP_breg5 0x75 +#define DW_OP_breg6 0x76 +#define DW_OP_breg7 0x77 +#define DW_OP_breg8 0x78 +#define DW_OP_breg9 0x79 +#define DW_OP_breg10 0x7a +#define DW_OP_breg11 0x7b +#define DW_OP_breg12 0x7c +#define DW_OP_breg13 0x7d +#define DW_OP_breg14 0x7e +#define DW_OP_breg15 0x7f +#define DW_OP_breg16 0x80 +#define DW_OP_breg17 0x81 +#define DW_OP_breg18 0x82 +#define DW_OP_breg19 0x83 +#define DW_OP_breg20 0x84 +#define DW_OP_breg21 0x85 +#define DW_OP_breg22 0x86 +#define DW_OP_breg23 0x87 +#define DW_OP_breg24 0x88 +#define DW_OP_breg25 0x89 +#define DW_OP_breg26 0x8a +#define DW_OP_breg27 0x8b +#define DW_OP_breg28 0x8c +#define DW_OP_breg29 0x8d +#define DW_OP_breg30 0x8e +#define DW_OP_breg31 0x8f +#define DW_OP_regx 0x90 +#define DW_OP_fbreg 0x91 +#define DW_OP_bregx 0x92 +#define DW_OP_piece 0x93 +#define DW_OP_deref_size 0x94 +#define DW_OP_xderef_size 0x95 +#define DW_OP_nop 0x96 +#define DW_OP_push_object_address 0x97 +#define DW_OP_call2 0x98 +#define DW_OP_call4 0x99 +#define DW_OP_call_ref 0x9a +#define DW_OP_GNU_push_tls_address 0xe0 +#define DW_OP_HP_unknown 0xe0 +#define DW_OP_HP_is_value 0xe1 +#define DW_OP_HP_fltconst4 0xe2 +#define DW_OP_HP_fltconst8 0xe3 +#define DW_OP_HP_mod_range 0xe4 +#define DW_OP_HP_unmod_range 0xe5 +#define DW_OP_HP_tls 0xe6 +#define DW_OP_lo_user 0xe0 /* Implementation-defined range start. */ +#define DW_OP_hi_user 0xff /* Implementation-defined range end. */ +#define DW_ATE_void 0x0 +#define DW_ATE_address 0x1 +#define DW_ATE_boolean 0x2 +#define DW_ATE_complex_float 0x3 +#define DW_ATE_float 0x4 +#define DW_ATE_signed 0x5 +#define DW_ATE_signed_char 0x6 +#define DW_ATE_unsigned 0x7 +#define DW_ATE_unsigned_char 0x8 +#define DW_ATE_imaginary_float 0x9 +#define DW_ATE_HP_float80 0x80 +#define DW_ATE_HP_complex_float80 0x81 +#define DW_ATE_HP_float128 0x82 +#define DW_ATE_HP_complex_float128 0x83 +#define DW_ATE_HP_floathpintel 0x84 +#define DW_ATE_HP_imaginary_float80 0x85 +#define DW_ATE_HP_imaginary_float128 0x86 +#define DW_ATE_lo_user 0x80 +#define DW_ATE_hi_user 0xff +#define DW_ORD_row_major 0 +#define DW_ORD_col_major 1 +#define DW_ACCESS_public 1 +#define DW_ACCESS_protected 2 +#define DW_ACCESS_private 3 +#define DW_VIS_local 1 +#define DW_VIS_exported 2 +#define DW_VIS_qualified 3 +#define DW_VIRTUALITY_none 0 +#define DW_VIRTUALITY_virtual 1 +#define DW_VIRTUALITY_pure_virtual 2 +#define DW_ID_case_sensitive 0 +#define DW_ID_up_case 1 +#define DW_ID_down_case 2 +#define DW_ID_case_insensitive 3 +#define DW_CC_normal 0x1 +#define DW_CC_program 0x2 +#define DW_CC_nocall 0x3 +#define DW_CC_lo_user 0x40 +#define DW_CC_hi_user 0xff +#define DW_INL_not_inlined 0 +#define DW_INL_inlined 1 +#define DW_INL_declared_not_inlined 2 +#define DW_INL_declared_inlined 3 +#define DW_DSC_label 0 +#define DW_DSC_range 1 +#define DW_LNS_extended_op 0 +#define DW_LNS_copy 1 +#define DW_LNS_advance_pc 2 +#define DW_LNS_advance_line 3 +#define DW_LNS_set_file 4 +#define DW_LNS_set_column 5 +#define DW_LNS_negate_stmt 6 +#define DW_LNS_set_basic_block 7 +#define DW_LNS_const_add_pc 8 +#define DW_LNS_fixed_advance_pc 9 +#define DW_LNS_set_prologue_end 10 +#define DW_LNS_set_epilogue_begin 11 +#define DW_LNS_set_isa 12 +#define DW_LNE_end_sequence 1 +#define DW_LNE_set_address 2 +#define DW_LNE_define_file 3 +#define DW_LNE_HP_negate_is_UV_update 0x11 +#define DW_LNE_HP_push_context 0x12 +#define DW_LNE_HP_pop_context 0x13 +#define DW_LNE_HP_set_file_line_column 0x14 +#define DW_LNE_HP_set_routine_name 0x15 +#define DW_LNE_HP_set_sequence 0x16 +#define DW_LNE_HP_negate_post_semantics 0x17 +#define DW_LNE_HP_negate_function_exit 0x18 +#define DW_LNE_HP_negate_front_end_logical 0x19 +#define DW_LNE_HP_define_proc 0x20 +#define DW_CFA_advance_loc 0x40 +#define DW_CFA_offset 0x80 +#define DW_CFA_restore 0xc0 +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_MIPS_advance_loc8 0x1d +#define DW_CFA_GNU_window_save 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CIE_ID 0xffffffff +#define DW_CIE_VERSION 1 +#define DW_CFA_extended 0 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_hi_user 0x3f +#define DW_CHILDREN_no 0x00 +#define DW_CHILDREN_yes 0x01 +#define DW_ADDR_none 0 +#define DW_LANG_C89 0x0001 +#define DW_LANG_C 0x0002 +#define DW_LANG_Ada83 0x0003 +#define DW_LANG_C_plus_plus 0x0004 +#define DW_LANG_Cobol74 0x0005 +#define DW_LANG_Cobol85 0x0006 +#define DW_LANG_Fortran77 0x0007 +#define DW_LANG_Fortran90 0x0008 +#define DW_LANG_Pascal83 0x0009 +#define DW_LANG_Modula2 0x000a +#define DW_LANG_Java 0x000b +#define DW_LANG_C99 0x000c +#define DW_LANG_Ada95 0x000d +#define DW_LANG_Fortran95 0x000e +#define DW_LANG_Mips_Assembler 0x8001 +#define DW_LANG_Upc 0x8765 +#define DW_LANG_lo_user 0x8000 /* Implementation-defined range start. */ +#define DW_LANG_hi_user 0xffff /* Implementation-defined range start. */ +#define DW_MACINFO_define 1 +#define DW_MACINFO_undef 2 +#define DW_MACINFO_start_file 3 +#define DW_MACINFO_end_file 4 +#define DW_MACINFO_vendor_ext 255 +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C +#define DW_EH_PE_signed 0x08 +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 +#endif diff -urN oldtree/include/linux/sched.h newtree/include/linux/sched.h --- oldtree/include/linux/sched.h 2006-06-07 15:35:02.957196500 +0000 +++ newtree/include/linux/sched.h 2006-06-08 16:26:18.349098250 +0000 @@ -487,6 +487,7 @@ #define MAX_RT_PRIO MAX_USER_RT_PRIO #define MAX_PRIO (MAX_RT_PRIO + 40) +#define MIN_USER_PRIO (MAX_PRIO - 1) #define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO) #define rt_task(p) rt_prio((p)->prio) @@ -525,7 +526,6 @@ extern struct user_struct root_user; #define INIT_USER (&root_user) -typedef struct prio_array prio_array_t; struct backing_dev_info; struct reclaim_state; @@ -760,13 +760,6 @@ struct pipe_inode_info; struct uts_namespace; -enum sleep_type { - SLEEP_NORMAL, - SLEEP_NONINTERACTIVE, - SLEEP_INTERACTIVE, - SLEEP_INTERRUPTED, -}; - struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ struct thread_info *thread_info; @@ -784,19 +777,18 @@ int load_weight; /* for niceness load balancing purposes */ int prio, static_prio, normal_prio; struct list_head run_list; - prio_array_t *array; unsigned short ioprio; unsigned int btrace_seq; - unsigned long sleep_avg; - unsigned long long timestamp, last_ran; + unsigned long long timestamp; + unsigned long runtime, totalrun, ns_debit, systime; + unsigned int bonus; + unsigned int slice, time_slice; unsigned long long sched_time; /* sched_clock time spent running */ - enum sleep_type sleep_type; unsigned long policy; cpumask_t cpus_allowed; - unsigned int time_slice, first_time_slice; #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -1064,6 +1056,8 @@ #define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x02000000 /* Thread belongs to the rt mutex tester */ +#define PF_NONSLEEP 0x04000000 /* Waiting on in kernel activity */ +#define PF_FORKED 0x08000000 /* Task just forked another process */ /* * Only the _current_ task can read/write to tsk->flags, but other @@ -1196,7 +1190,6 @@ static inline void kick_process(struct task_struct *tsk) { } #endif extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); -extern void FASTCALL(sched_exit(task_t * p)); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); diff -urN oldtree/kernel/exit.c newtree/kernel/exit.c --- oldtree/kernel/exit.c 2006-06-07 15:35:03.061203000 +0000 +++ newtree/kernel/exit.c 2006-06-08 16:26:18.365099250 +0000 @@ -168,7 +168,6 @@ zap_leader = (leader->exit_signal == -1); } - sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); release_thread(p); diff -urN oldtree/kernel/sched.c newtree/kernel/sched.c --- oldtree/kernel/sched.c 2006-06-07 15:35:03.101205500 +0000 +++ newtree/kernel/sched.c 2006-06-08 16:52:08.769993500 +0000 @@ -1,21 +1,7 @@ /* - * kernel/sched.c - * - * Kernel scheduler and related syscalls - * - * Copyright (C) 1991-2002 Linus Torvalds - * - * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and - * make semaphores SMP safe - * 1998-11-19 Implemented schedule_timeout() and related stuff - * by Andrea Arcangeli - * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: - * hybrid priority-list and round-robin design with - * an array-switch method of distributing timeslices - * and per-CPU runqueues. Cleanups and useful suggestions - * by Davide Libenzi, preemptible kernel bits by Robert Love. - * 2003-09-03 Interactivity tuning by Con Kolivas. - * 2004-04-02 Scheduler domains code by Nick Piggin + * + * Ver: v15.6 + * Ported by cheater-conrad of the no-sources team. Send all regressions to him, NO ONE ELSE! */ #include @@ -79,129 +65,26 @@ /* * Some helpers for converting nanosecond timing to jiffy resolution */ -#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) -#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) - -/* - * These are the 'tuning knobs' of the scheduler: - * - * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger), - * default timeslice is 100 msecs, maximum timeslice is 800 msecs. - * Timeslices get refilled after they expire. - */ -#define MIN_TIMESLICE max(5 * HZ / 1000, 1) -#define DEF_TIMESLICE (100 * HZ / 1000) -#define ON_RUNQUEUE_WEIGHT 30 -#define CHILD_PENALTY 95 -#define PARENT_PENALTY 100 -#define EXIT_WEIGHT 3 -#define PRIO_BONUS_RATIO 25 -#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) -#define INTERACTIVE_DELTA 2 -#define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS) -#define STARVATION_LIMIT (MAX_SLEEP_AVG) -#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) - -/* - * If a task is 'interactive' then we reinsert it in the active - * array after it has expired its current timeslice. (it will not - * continue to run immediately, it will still roundrobin with - * other interactive tasks.) - * - * This part scales the interactivity limit depending on niceness. - * - * We scale it linearly, offset by the INTERACTIVE_DELTA delta. - * Here are a few examples of different nice levels: - * - * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] - * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] - * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] - * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] - * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] - * - * (the X axis represents the possible -5 ... 0 ... +5 dynamic - * priority range a task can explore, a value of '1' means the - * task is rated interactive.) - * - * Ie. nice +19 tasks can never get 'interactive' enough to be - * reinserted into the active array. And only heavily CPU-hog nice -20 - * tasks will be expired. Default nice 0 tasks are somewhere between, - * it takes some effort for them to get interactive, but it's not - * too hard. - */ - -#define CURRENT_BONUS(p) \ - (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \ - MAX_SLEEP_AVG) - -#define GRANULARITY (10 * HZ / 1000 ? : 1) - -#ifdef CONFIG_SMP -#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ - (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \ - num_online_cpus()) -#else -#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ - (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1))) -#endif - -#define SCALE(v1,v1_max,v2_max) \ - (v1) * (v2_max) / (v1_max) - -#define DELTA(p) \ - (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ - INTERACTIVE_DELTA) - -#define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) - -#define INTERACTIVE_SLEEP(p) \ - (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ - (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1)) - +#define NSJIFFY (1000000000 / HZ) /* One jiffy in ns */ +#define NS_TO_JIFFIES(TIME) ((TIME) / NSJIFFY) +#define JIFFIES_TO_NS(TIME) ((TIME) * NSJIFFY) #define TASK_PREEMPTS_CURR(p, rq) \ ((p)->prio < (rq)->curr->prio) /* - * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] - * to time slice values: [800ms ... 100ms ... 5ms] - * - * The higher a thread's priority, the bigger timeslices - * it gets during one round of execution. But even the lowest - * priority thread gets MIN_TIMESLICE worth of execution time. + * This is the time all tasks within the same priority round robin. + * Set to a minimum of 6ms. */ +#define RR_INTERVAL ((6 * HZ / 1001) + 1) +#define DEF_TIMESLICE (RR_INTERVAL * 19) -#define SCALE_PRIO(x, prio) \ - max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) - -static unsigned int static_prio_timeslice(int static_prio) -{ - if (static_prio < NICE_TO_PRIO(0)) - return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); - else - return SCALE_PRIO(DEF_TIMESLICE, static_prio); -} - -static inline unsigned int task_timeslice(task_t *p) -{ - return static_prio_timeslice(p->static_prio); -} - -#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ +#define task_hot(p, now, sd) ((long long) ((now) - (p)->timestamp) \ < (long long) (sd)->cache_hot_time) - /* * These are the runqueue data structures: */ - typedef struct runqueue runqueue_t; -struct prio_array { - unsigned int nr_active; - DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ - struct list_head queue[MAX_PRIO]; -}; - /* * This is the main, per-CPU runqueue data structure. * @@ -231,12 +114,11 @@ */ unsigned long nr_uninterruptible; - unsigned long expired_timestamp; unsigned long long timestamp_last_tick; task_t *curr, *idle; struct mm_struct *prev_mm; - prio_array_t *active, *expired, arrays[2]; - int best_expired_prio; + unsigned long bitmap[BITS_TO_LONGS(MAX_PRIO + 1)]; + struct list_head queue[MAX_PRIO]; atomic_t nr_iowait; #ifdef CONFIG_SMP @@ -248,6 +130,7 @@ task_t *migration_thread; struct list_head migration_queue; + int cpu; #endif #ifdef CONFIG_SCHEDSTATS @@ -311,13 +194,6 @@ /* this is a valid case when another task releases the spinlock */ rq->lock.owner = current; #endif - /* - * If we are tracking spinlock dependencies then we have to - * fix up the runqueue lock - which gets 'carried over' from - * prev into current: - */ - spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - spin_unlock_irq(&rq->lock); } @@ -559,13 +435,7 @@ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) /* - * Called when a process is dequeued from the active array and given - * the cpu. We should note that with the exception of interactive - * tasks, the expired queue will become the active queue after the active - * queue is empty, without explicitly dequeuing and requeuing tasks in the - * expired queue. (Interactive tasks may be requeued directly to the - * active queue, thus delaying tasks in the expired queue from running; - * see scheduler_tick()). + * Called when a process is dequeued and given the cpu. * * This function is only called from sched_info_arrive(), rather than * dequeue_task(). Even though a task may be queued and dequeued multiple @@ -598,13 +468,11 @@ } /* - * Called when a process is queued into either the active or expired - * array. The time is noted and later used to determine how long we - * had to wait for us to reach the cpu. Since the expired queue will - * become the active queue after active queue is empty, without dequeuing - * and requeuing any tasks, we are interested in queuing to either. It - * is unusual but not impossible for tasks to be dequeued and immediately - * requeued in the same or another array: this can happen in sched_yield(), + * Called when a process is queued + * The time is noted and later used to determine how long we had to wait for + * us to reach the cpu. + * It is unusual but not impossible for tasks to be dequeued and immediately + * requeued: this can happen in sched_yield(), * set_user_nice(), and even load_balance() as it moves tasks from runqueue * to runqueue. * @@ -662,68 +530,128 @@ #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ /* - * Adding/removing a task to/from a priority array: + * Get nanosecond clock difference without overflowing unsigned long. + */ +static unsigned long ns_diff(unsigned long long v1, unsigned long long v2) +{ + unsigned long long vdiff; + if (likely(v1 >= v2)) { + vdiff = v1 - v2; +#if BITS_PER_LONG < 64 + if (vdiff > (1 << 31)) + vdiff = 1 << 31; +#endif + } else { + /* + * Rarely the clock appears to go backwards. There should + * always be a positive difference so return 1. + */ + vdiff = 1; + } + return (unsigned long)vdiff; +} + +static inline int task_queued(const struct task_struct *task) +{ + return !list_empty(&task->run_list); +} + +/* + * Adding/removing a task to/from a runqueue: */ -static void dequeue_task(struct task_struct *p, prio_array_t *array) +static void dequeue_task(struct task_struct *p, runqueue_t *rq) { - array->nr_active--; - list_del(&p->run_list); - if (list_empty(array->queue + p->prio)) - __clear_bit(p->prio, array->bitmap); + list_del_init(&p->run_list); + if (list_empty(rq->queue + p->prio)) + __clear_bit(p->prio, rq->bitmap); + p->ns_debit = 0; } -static void enqueue_task(struct task_struct *p, prio_array_t *array) +static void enqueue_task(struct task_struct *p, runqueue_t *rq) { sched_info_queued(p); - list_add_tail(&p->run_list, array->queue + p->prio); - __set_bit(p->prio, array->bitmap); - array->nr_active++; - p->array = array; + list_add_tail(&p->run_list, rq->queue + p->prio); + __set_bit(p->prio, rq->bitmap); } /* * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */ -static void requeue_task(struct task_struct *p, prio_array_t *array) +static void requeue_task(struct task_struct *p, runqueue_t *rq, int prio) +{ + list_move_tail(&p->run_list, rq->queue + prio); + if (p->prio != prio) { + if (list_empty(rq->queue + p->prio)) + __clear_bit(p->prio, rq->bitmap); + p->prio = prio; + __set_bit(prio, rq->bitmap); + } + p->ns_debit = 0; +} + +static inline void enqueue_task_head(struct task_struct *p, runqueue_t *rq) { - list_move_tail(&p->run_list, array->queue + p->prio); + list_add(&p->run_list, rq->queue + p->prio); + __set_bit(p->prio, rq->bitmap); } -static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) +static unsigned int rr_interval(const struct task_struct *p) { - list_add(&p->run_list, array->queue + p->prio); - __set_bit(p->prio, array->bitmap); - array->nr_active++; - p->array = array; + int nice = TASK_NICE(p); + + if (nice < 0 && !rt_task(p)) + return RR_INTERVAL * (20 - nice) / 20; + return RR_INTERVAL; +} + +/* + * slice - the duration a task runs before getting requeued at its best + * priority and has its bonus decremented. + */ +static unsigned int slice(const struct task_struct *p) +{ + unsigned int slice, rr; + + slice = rr = rr_interval(p); + if (likely(!rt_task(p))) + slice += (39 - TASK_USER_PRIO(p)) * rr; + return slice; +} + +/* + * Bonus - How much higher than its base priority an interactive task can run. + */ +static inline unsigned int bonus(const struct task_struct *p) +{ + return TASK_USER_PRIO(p); } /* * __normal_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. - * - * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] - * into the -5 ... 0 ... +5 bonus/penalty range. - * - * We use 25% of the full 0...39 priority range so that: - * - * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. - * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. - * - * Both properties are important to certain workloads. + * The priority normally decreases by one each rr_interval(). + * As the bonus increases the initial priority starts at a higher "stair" or + * priority. */ - static inline int __normal_prio(task_t *p) { - int bonus, prio; - - bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; + int prio; + unsigned int full_slice, used_slice = 0; + unsigned int best_bonus, rr; - prio = p->static_prio - bonus; - if (prio < MAX_RT_PRIO) - prio = MAX_RT_PRIO; - if (prio > MAX_PRIO-1) - prio = MAX_PRIO-1; + full_slice = slice(p); + if (full_slice > p->slice) + used_slice = full_slice - p->slice; + best_bonus = bonus(p); + prio = MAX_RT_PRIO + best_bonus; + /* SCHED_BATCH tasks have their bonus ignored */ + if (!batch_task(p)) + prio -= p->bonus; + rr = rr_interval(p); + prio += used_slice / rr; + if (prio > MIN_USER_PRIO) + prio = MIN_USER_PRIO; return prio; } @@ -744,14 +672,13 @@ #define TIME_SLICE_NICE_ZERO DEF_TIMESLICE #define LOAD_WEIGHT(lp) \ (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO) -#define PRIO_TO_LOAD_WEIGHT(prio) \ - LOAD_WEIGHT(static_prio_timeslice(prio)) -#define RTPRIO_TO_LOAD_WEIGHT(rp) \ - (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) +#define TASK_LOAD_WEIGHT(p) LOAD_WEIGHT(slice(p)) +#define RTPRIO_TO_LOAD_WEIGHT(rp) \ + (LOAD_WEIGHT((RR_INTERVAL + 20 + (rp)))) static void set_load_weight(task_t *p) { - if (has_rt_policy(p)) { + if (rt_task(p)) { #ifdef CONFIG_SMP if (p == task_rq(p)->migration_thread) /* @@ -764,7 +691,7 @@ #endif p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority); } else - p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); + p->load_weight = TASK_LOAD_WEIGHT(p); } static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) @@ -800,7 +727,7 @@ { int prio; - if (has_rt_policy(p)) + if (p->policy != SCHED_NORMAL && p->policy != SCHED_BATCH) prio = MAX_RT_PRIO-1 - p->rt_priority; else prio = __normal_prio(p); @@ -811,7 +738,7 @@ * Calculate the current priority, i.e. the priority * taken into account by the scheduler. This value might * be boosted by RT tasks, or might be boosted by - * interactivity modifiers. Will be RT if the task got + * bonus modifiers. Will be RT if the task got * RT-boosted. If not then it returns p->normal_prio. */ static int effective_prio(task_t *p) @@ -832,11 +759,7 @@ */ static void __activate_task(task_t *p, runqueue_t *rq) { - prio_array_t *target = rq->active; - - if (batch_task(p)) - target = rq->expired; - enqueue_task(p, target); + enqueue_task(p, rq); inc_nr_running(p, rq); } @@ -845,91 +768,103 @@ */ static inline void __activate_idle_task(task_t *p, runqueue_t *rq) { - enqueue_task_head(p, rq->active); + enqueue_task_head(p, rq); inc_nr_running(p, rq); } /* - * Recalculate p->normal_prio and p->prio after having slept, - * updating the sleep-average too: + * We increase our bonus by sleeping more than the time we ran. + * The ratio of sleep to run gives us the cpu% that we last ran and determines + * the maximum bonus we can acquire. */ -static int recalc_task_prio(task_t *p, unsigned long long now) +static void inc_bonus(task_t *p, unsigned long totalrun, unsigned long sleep) { - /* Caller must always ensure 'now >= p->timestamp' */ - unsigned long sleep_time = now - p->timestamp; + unsigned int best_bonus = sleep / (totalrun + 1); - if (batch_task(p)) - sleep_time = 0; + if (p->bonus >= best_bonus) + return; + best_bonus = bonus(p); + if (p->bonus < best_bonus) + p->bonus++; +} - if (likely(sleep_time > 0)) { - /* - * This ceiling is set to the lowest priority that would allow - * a task to be reinserted into the active array on timeslice - * completion. - */ - unsigned long ceiling = INTERACTIVE_SLEEP(p); +static inline void dec_bonus(task_t *p) +{ + p->totalrun = 0; + if (p->bonus) + p->bonus--; +} - if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) { - /* - * Prevents user tasks from achieving best priority - * with one single large enough sleep. - */ - p->sleep_avg = ceiling; - /* - * Using INTERACTIVE_SLEEP() as a ceiling places a - * nice(0) task 1ms sleep away from promotion, and - * gives it 700ms to round-robin with no chance of - * being demoted. This is more than generous, so - * mark this sleep as non-interactive to prevent the - * on-runqueue bonus logic from intervening should - * this task not receive cpu immediately. - */ - p->sleep_type = SLEEP_NONINTERACTIVE; - } else { - /* - * Tasks waking from uninterruptible sleep are - * limited in their sleep_avg rise as they - * are likely to be waiting on I/O - */ - if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) { - if (p->sleep_avg >= ceiling) - sleep_time = 0; - else if (p->sleep_avg + sleep_time >= - ceiling) { - p->sleep_avg = ceiling; - sleep_time = 0; - } - } +static inline void continue_slice(task_t *p) +{ + unsigned long total_run = NS_TO_JIFFIES(p->totalrun); - /* - * This code gives a bonus to interactive tasks. - * - * The boost works by updating the 'average sleep time' - * value here, based on ->timestamp. The more time a - * task spends sleeping, the higher the average gets - - * and the higher the priority boost gets as well. - */ - p->sleep_avg += sleep_time; + if (total_run >= p->slice || p->prio == MIN_USER_PRIO) + dec_bonus(p); + else { + unsigned long remainder; - } - if (p->sleep_avg > NS_MAX_SLEEP_AVG) - p->sleep_avg = NS_MAX_SLEEP_AVG; + p->slice -= total_run; + if (p->slice <= p->time_slice) + dec_bonus(p); + remainder = p->slice % rr_interval(p); + if (remainder) + p->time_slice = remainder; } +} + +/* + * recalc_task_prio - this checks for tasks that run ultra short timeslices + * or have just forked a thread/process and make them continue their old + * slice instead of starting a new one at high priority. + */ +static inline void recalc_task_prio(task_t *p, const unsigned long long now) +{ + /* Double the systime to account for missed sub-jiffy time */ + unsigned long ns_systime = JIFFIES_TO_NS(p->systime) * 2; + unsigned long sleep_time = ns_diff(now, p->timestamp); + + /* + * Add the total for this last scheduled run (p->runtime) and system + * time (p->systime) done on behalf of p to the running total so far + * used (p->totalrun). + */ + p->totalrun += p->runtime + ns_systime; - return effective_prio(p); + /* systime is unintentionally seen as sleep, subtract it */ + if (likely(ns_systime < sleep_time)) + sleep_time -= ns_systime; + else + sleep_time = 0; + + if (unlikely(p->flags & PF_FORKED)) + sleep_time = 0; + + /* + * If we sleep longer than our running total and have not set the + * PF_NONSLEEP flag we gain a bonus. + */ + if (sleep_time >= p->totalrun && !(p->flags & PF_NONSLEEP)) { + inc_bonus(p, p->totalrun, sleep_time); + p->totalrun = 0; + return; + } + + /* We elevate priority by the amount of time we slept. */ + p->totalrun -= sleep_time; + continue_slice(p); } /* * activate_task - move a task to the runqueue and do priority recalculation * - * Update all the scheduling statistics stuff. (sleep average - * calculation, priority modifiers, etc.) + * Update all the scheduling statistics stuff. (priority modifiers, etc.) */ static void activate_task(task_t *p, runqueue_t *rq, int local) { - unsigned long long now; + unsigned long long now = sched_clock(); + unsigned long rr = rr_interval(p); - now = sched_clock(); #ifdef CONFIG_SMP if (!local) { /* Compensate for drifting sched_clock */ @@ -938,31 +873,13 @@ + rq->timestamp_last_tick; } #endif - - if (!rt_task(p)) - p->prio = recalc_task_prio(p, now); - - /* - * This checks to make sure it's not an uninterruptible task - * that is now waking up. - */ - if (p->sleep_type == SLEEP_NORMAL) { - /* - * Tasks which were woken up by interrupts (ie. hw events) - * are most likely of interactive nature. So we give them - * the credit of extending their sleep time to the period - * of time they spend on the runqueue, waiting for execution - * on a CPU, first time around: - */ - if (in_interrupt()) - p->sleep_type = SLEEP_INTERRUPTED; - else { - /* - * Normal first-time wakeups get a credit too for - * on-runqueue time, but it will be weighted down: - */ - p->sleep_type = SLEEP_INTERACTIVE; - } + p->slice = slice(p); + p->time_slice = p->slice % rr ? : rr; + if (!rt_task(p)) { + recalc_task_prio(p, now); + p->flags &= ~(PF_NONSLEEP | PF_FORKED); + p->systime = 0; + p->prio = effective_prio(p); } p->timestamp = now; @@ -975,8 +892,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) { dec_nr_running(p, rq); - dequeue_task(p, p->array); - p->array = NULL; + dequeue_task(p, rq); } /* @@ -1052,7 +968,7 @@ * If the task is not on a runqueue (and not running), then * it is sufficient to simply update the task's cpu field. */ - if (!p->array && !task_running(rq, p)) { + if (!task_queued(p) && !task_running(rq, p)) { set_task_cpu(p, dest_cpu); return 0; } @@ -1082,7 +998,7 @@ repeat: rq = task_rq_lock(p, &flags); /* Must be off runqueue entirely, not preempted. */ - if (unlikely(p->array || task_running(rq, p))) { + if (unlikely(task_queued(p) || task_running(rq, p))) { /* If it's preempted, we yield. It could be a while. */ preempted = !task_running(rq, p); task_rq_unlock(rq, &flags); @@ -1258,14 +1174,9 @@ struct sched_domain *tmp, *sd = NULL; for_each_domain(cpu, tmp) { - /* - * If power savings logic is enabled for a domain, stop there. - */ - if (tmp->flags & SD_POWERSAVINGS_BALANCE) - break; if (tmp->flags & flag) sd = tmp; - } + } while (sd) { cpumask_t span; @@ -1339,6 +1250,13 @@ } #endif +/* Check to see if p preempts rq->curr and resched if it does. */ +static inline void preempt(const task_t *p, runqueue_t *rq) +{ + if (TASK_PREEMPTS_CURR(p, rq)) + resched_task(rq->curr); +} + /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -1370,7 +1288,7 @@ if (!(old_state & state)) goto out; - if (p->array) + if (task_queued(p)) goto out_running; cpu = task_cpu(p); @@ -1461,7 +1379,7 @@ old_state = p->state; if (!(old_state & state)) goto out; - if (p->array) + if (task_queued(p)) goto out_running; this_cpu = smp_processor_id(); @@ -1470,23 +1388,8 @@ out_activate: #endif /* CONFIG_SMP */ - if (old_state == TASK_UNINTERRUPTIBLE) { + if (old_state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - /* - * Tasks on involuntary sleep don't earn - * sleep_avg beyond just interactive state. - */ - p->sleep_type = SLEEP_NONINTERACTIVE; - } else - - /* - * Tasks that have marked their sleep as noninteractive get - * woken up with their sleep average not weighted in an - * interactive way. - */ - if (old_state & TASK_NONINTERACTIVE) - p->sleep_type = SLEEP_NONINTERACTIVE; - activate_task(p, rq, cpu == this_cpu); /* @@ -1497,10 +1400,8 @@ * the waker guarantees that the freshly woken up task is going * to be considered on this CPU.) */ - if (!sync || cpu != this_cpu) { - if (TASK_PREEMPTS_CURR(p, rq)) - resched_task(rq->curr); - } + if (!sync || cpu != this_cpu) + preempt(p, rq); success = 1; out_running: @@ -1551,7 +1452,6 @@ p->prio = current->normal_prio; INIT_LIST_HEAD(&p->run_list); - p->array = NULL; #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (unlikely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -1563,30 +1463,6 @@ /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif - /* - * Share the timeslice between parent and child, thus the - * total amount of pending timeslices in the system doesn't change, - * resulting in more scheduling fairness. - */ - local_irq_disable(); - p->time_slice = (current->time_slice + 1) >> 1; - /* - * The remainder of the first timeslice might be recovered by - * the parent if the child exits early enough. - */ - p->first_time_slice = 1; - current->time_slice >>= 1; - p->timestamp = sched_clock(); - if (unlikely(!current->time_slice)) { - /* - * This case is rare, it happens when the parent has only - * a single jiffy left from its timeslice. Taking the - * runqueue lock is not a problem. - */ - current->time_slice = 1; - scheduler_tick(); - } - local_irq_enable(); put_cpu(); } @@ -1608,38 +1484,20 @@ this_cpu = smp_processor_id(); cpu = task_cpu(p); - /* - * We decrease the sleep average of forking parents - * and children as well, to keep max-interactive tasks - * from forking tasks that are max-interactive. The parent - * (current) is done further down, under its lock. - */ - p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) * - CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); - - p->prio = effective_prio(p); + /* Forked process gets no bonus to prevent fork bombs. */ + p->bonus = 0; + current->flags |= PF_FORKED; if (likely(cpu == this_cpu)) { + activate_task(p, rq, 1); if (!(clone_flags & CLONE_VM)) { /* * The VM isn't cloned, so we're in a good position to * do child-runs-first in anticipation of an exec. This * usually avoids a lot of COW overhead. */ - if (unlikely(!current->array)) - __activate_task(p, rq); - else { - p->prio = current->prio; - p->normal_prio = current->normal_prio; - list_add_tail(&p->run_list, ¤t->run_list); - p->array = current->array; - p->array->nr_active++; - inc_nr_running(p, rq); - } set_need_resched(); - } else - /* Run child last */ - __activate_task(p, rq); + } /* * We skip the following code due to cpu == this_cpu * @@ -1656,53 +1514,19 @@ */ p->timestamp = (p->timestamp - this_rq->timestamp_last_tick) + rq->timestamp_last_tick; - __activate_task(p, rq); - if (TASK_PREEMPTS_CURR(p, rq)) - resched_task(rq->curr); + activate_task(p, rq, 0); + preempt(p, rq); /* * Parent and child are on different CPUs, now get the - * parent runqueue to update the parent's ->sleep_avg: + * parent runqueue to update the parent's ->flags: */ task_rq_unlock(rq, &flags); this_rq = task_rq_lock(current, &flags); } - current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) * - PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); task_rq_unlock(this_rq, &flags); } -/* - * Potentially available exiting-child timeslices are - * retrieved here - this way the parent does not get - * penalized for creating too many threads. - * - * (this cannot be used to 'generate' timeslices - * artificially, because any timeslice recovered here - * was given away by the parent in the first place.) - */ -void fastcall sched_exit(task_t *p) -{ - unsigned long flags; - runqueue_t *rq; - - /* - * If the child was a (relative-) CPU hog then decrease - * the sleep_avg of the parent as well. - */ - rq = task_rq_lock(p->parent, &flags); - if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) { - p->parent->time_slice += p->time_slice; - if (unlikely(p->parent->time_slice > task_timeslice(p))) - p->parent->time_slice = task_timeslice(p); - } - if (p->sleep_avg < p->parent->sleep_avg) - p->parent->sleep_avg = p->parent->sleep_avg / - (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg / - (EXIT_WEIGHT + 1); - task_rq_unlock(rq, &flags); -} - /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -1809,7 +1633,6 @@ WARN_ON(rq->prev_mm); rq->prev_mm = oldmm; } - spin_release(&rq->lock.dep_map, 1, _THIS_IP_); /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); @@ -1892,6 +1715,9 @@ /* * double_rq_lock - safely lock two runqueues * + * We must take them in cpu order to match code in + * dependent_sleeper and wake_dependent_sleeper. + * * Note this does not disable interrupts like task_rq_lock, * you need to do so manually before calling. */ @@ -1903,7 +1729,7 @@ spin_lock(&rq1->lock); __acquire(rq2->lock); /* Fake it out ;) */ } else { - if (rq1 < rq2) { + if (rq1 < rq2) { spin_lock(&rq1->lock); spin_lock(&rq2->lock); } else { @@ -1923,7 +1749,7 @@ __releases(rq1->lock) __releases(rq2->lock) { - spin_unlock_non_nested(&rq1->lock); + spin_unlock(&rq1->lock); if (rq1 != rq2) spin_unlock(&rq2->lock); else @@ -1939,7 +1765,7 @@ __acquires(this_rq->lock) { if (unlikely(!spin_trylock(&busiest->lock))) { - if (busiest < this_rq) { + if (busiest < this_rq) { spin_unlock(&this_rq->lock); spin_lock(&busiest->lock); spin_lock(&this_rq->lock); @@ -1997,23 +1823,21 @@ * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ -static -void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, - runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) +static void pull_task(runqueue_t *src_rq, task_t *p, runqueue_t *this_rq, + int this_cpu) { - dequeue_task(p, src_array); + dequeue_task(p, src_rq); dec_nr_running(p, src_rq); set_task_cpu(p, this_cpu); inc_nr_running(p, this_rq); - enqueue_task(p, this_array); + enqueue_task(p, this_rq); p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) + this_rq->timestamp_last_tick; /* * Note that idle threads have a prio of MAX_PRIO, for this test * to be always true for them. */ - if (TASK_PREEMPTS_CURR(p, this_rq)) - resched_task(this_rq->curr); + preempt(p, this_rq); } /* @@ -2060,121 +1884,106 @@ * Called with both runqueues locked. */ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum idle_type idle, - int *all_pinned) -{ - prio_array_t *array, *dst_array; - struct list_head *head, *curr; - int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; - int busiest_best_prio_seen; - int skip_for_load; /* skip the task based on weighted load issues */ - long rem_load_move; - task_t *tmp; - - if (max_nr_move == 0 || max_load_move == 0) - goto out; + unsigned long max_nr_move, unsigned long max_load_move, + struct sched_domain *sd, enum idle_type idle, + int *all_pinned) +{ + struct list_head *head, *curr; + int idx, pulled = 0, pinned = 0, this_min_prio; + long rem_load_move; + task_t *tmp; + + if (max_nr_move == 0 || max_load_move == 0) + goto out; + + rem_load_move = max_load_move; + pinned = 1; + this_min_prio = this_rq->curr->prio; - rem_load_move = max_load_move; - pinned = 1; - this_best_prio = rq_best_prio(this_rq); - busiest_best_prio = rq_best_prio(busiest); - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==busiest_best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) of - * any task we find with that prio. - */ - busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; - - /* - * We first consider expired tasks. Those will likely not be - * executed in the near future, and they are most likely to - * be cache-cold, thus switching CPUs has the least effect - * on them. - */ - if (busiest->expired->nr_active) { - array = busiest->expired; - dst_array = this_rq->expired; - } else { - array = busiest->active; - dst_array = this_rq->active; - } - -new_array: - /* Start searching at priority 0: */ - idx = 0; + /* Start searching at priority 0: */ + idx = 0; skip_bitmap: - if (!idx) - idx = sched_find_first_bit(array->bitmap); - else - idx = find_next_bit(array->bitmap, MAX_PRIO, idx); - if (idx >= MAX_PRIO) { - if (array == busiest->expired && busiest->active->nr_active) { - array = busiest->active; - dst_array = this_rq->active; - goto new_array; - } - goto out; - } + if (!idx) + idx = sched_find_first_bit(busiest->bitmap); + else + idx = find_next_bit(busiest->bitmap, MAX_PRIO, idx); + if (idx >= MAX_PRIO) + goto out; - head = array->queue + idx; - curr = head->prev; + head = busiest->queue + idx; + curr = head->prev; skip_queue: - tmp = list_entry(curr, task_t, run_list); + tmp = list_entry(curr, task_t, run_list); - curr = curr->prev; + curr = curr->prev; - /* - * To help distribute high priority tasks accross CPUs we don't - * skip a task if it will be the highest priority task (i.e. smallest - * prio value) on its new queue regardless of its load weight - */ - skip_for_load = tmp->load_weight > rem_load_move; - if (skip_for_load && idx < this_best_prio) - skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; - if (skip_for_load || - !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { - busiest_best_prio_seen |= idx == busiest_best_prio; - if (curr != head) - goto skip_queue; - idx++; - goto skip_bitmap; - } + /* + * To help distribute high priority tasks accross CPUs we don't + * skip a task if it will be the highest priority task (i.e. smallest + * prio value) on its new queue regardless of its load weight + */ + if ((idx >= this_min_prio && tmp->load_weight > rem_load_move) || + !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { + if (curr != head) + goto skip_queue; + idx++; + goto skip_bitmap; + } #ifdef CONFIG_SCHEDSTATS - if (task_hot(tmp, busiest->timestamp_last_tick, sd)) - schedstat_inc(sd, lb_hot_gained[idle]); + if (task_hot(tmp, busiest->timestamp_last_tick, sd)) + schedstat_inc(sd, lb_hot_gained[idle]); #endif - pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); - pulled++; - rem_load_move -= tmp->load_weight; + pull_task(busiest, tmp, this_rq, this_cpu); + pulled++; + rem_load_move -= tmp->load_weight; + + /* + * We only want to steal up to the prescribed number of tasks + * and the prescribed amount of weighted load. + */ + if (pulled < max_nr_move && rem_load_move > 0) { + if (idx < this_min_prio) + this_min_prio = idx; + if (curr != head) + goto skip_queue; + idx++; + goto skip_bitmap; + } - /* - * We only want to steal up to the prescribed number of tasks - * and the prescribed amount of weighted load. - */ - if (pulled < max_nr_move && rem_load_move > 0) { - if (idx < this_best_prio) - this_best_prio = idx; - if (curr != head) - goto skip_queue; - idx++; - goto skip_bitmap; - } +#ifdef CONFIG_SCHEDSTATS + if (task_hot(tmp, busiest->timestamp_last_tick, sd)) + schedstat_inc(sd, lb_hot_gained[idle]); +#endif + + pull_task(busiest, tmp, this_rq, this_cpu); + pulled++; + rem_load_move -= tmp->load_weight; + + /* + * We only want to steal up to the prescribed number of tasks + * and the prescribed amount of weighted load. + */ + if (pulled < max_nr_move && rem_load_move > 0) { + if (idx < this_min_prio) + this_min_prio = idx; + if (curr != head) + goto skip_queue; + idx++; + goto skip_bitmap; + } out: - /* - * Right now, this is the only place pull_task() is called, - * so we can safely collect pull_task() stats here rather than - * inside pull_task(). - */ - schedstat_add(sd, lb_gained[idle], pulled); - - if (all_pinned) - *all_pinned = pinned; - return pulled; + /* + * Right now, this is the only place pull_task() is called, + * so we can safely collect pull_task() stats here rather than + * inside pull_task(). + */ + schedstat_add(sd, lb_gained[idle], pulled); + + if (all_pinned) + *all_pinned = pinned; + return pulled; } /* @@ -2192,12 +2001,6 @@ unsigned long busiest_load_per_task, busiest_nr_running; unsigned long this_load_per_task, this_nr_running; int load_idx; -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - int power_savings_balance = 1; - unsigned long leader_nr_running = 0, min_load_per_task = 0; - unsigned long min_nr_running = ULONG_MAX; - struct sched_group *group_min = NULL, *group_leader = NULL; -#endif max_load = this_load = total_load = total_pwr = 0; busiest_load_per_task = busiest_nr_running = 0; @@ -2210,7 +2013,7 @@ load_idx = sd->idle_idx; do { - unsigned long load, group_capacity; + unsigned long load; int local_group; int i; unsigned long sum_nr_running, sum_weighted_load; @@ -2243,76 +2046,18 @@ /* Adjust by relative CPU power of the group */ avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; - group_capacity = group->cpu_power / SCHED_LOAD_SCALE; - if (local_group) { this_load = avg_load; this = group; this_nr_running = sum_nr_running; this_load_per_task = sum_weighted_load; } else if (avg_load > max_load && - sum_nr_running > group_capacity) { + sum_nr_running > group->cpu_power / SCHED_LOAD_SCALE) { max_load = avg_load; busiest = group; busiest_nr_running = sum_nr_running; busiest_load_per_task = sum_weighted_load; } - -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - /* - * Busy processors will not participate in power savings - * balance. - */ - if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) - goto group_next; - - /* - * If the local group is idle or completely loaded - * no need to do power savings balance at this domain - */ - if (local_group && (this_nr_running >= group_capacity || - !this_nr_running)) - power_savings_balance = 0; - - /* - * If a group is already running at full capacity or idle, - * don't include that group in power savings calculations - */ - if (!power_savings_balance || sum_nr_running >= group_capacity - || !sum_nr_running) - goto group_next; - - /* - * Calculate the group which has the least non-idle load. - * This is the group from where we need to pick up the load - * for saving power - */ - if ((sum_nr_running < min_nr_running) || - (sum_nr_running == min_nr_running && - first_cpu(group->cpumask) < - first_cpu(group_min->cpumask))) { - group_min = group; - min_nr_running = sum_nr_running; - min_load_per_task = sum_weighted_load / - sum_nr_running; - } - - /* - * Calculate the group which is almost near its - * capacity but still has some space to pick up some load - * from other group and save more power - */ - if (sum_nr_running <= group_capacity - 1) - if (sum_nr_running > leader_nr_running || - (sum_nr_running == leader_nr_running && - first_cpu(group->cpumask) > - first_cpu(group_leader->cpumask))) { - group_leader = group; - leader_nr_running = sum_nr_running; - } - -group_next: -#endif group = group->next; } while (group != sd->groups); @@ -2421,16 +2166,7 @@ return busiest; out_balanced: -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) - goto ret; - - if (this == group_leader && group_leader != group_min) { - *imbalance = min_load_per_task; - return group_min; - } -ret: -#endif + *imbalance = 0; return NULL; } @@ -2483,8 +2219,7 @@ int active_balance = 0; int sd_idle = 0; - if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && - !sched_smt_power_savings) + if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER) sd_idle = 1; schedstat_inc(sd, lb_cnt[idle]); @@ -2573,8 +2308,7 @@ sd->balance_interval *= 2; } - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && - !sched_smt_power_savings) + if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER) return -1; return nr_moved; @@ -2589,7 +2323,7 @@ (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) return -1; return 0; } @@ -2610,7 +2344,7 @@ int nr_moved = 0; int sd_idle = 0; - if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) + if (sd->flags & SD_SHARE_CPUPOWER) sd_idle = 1; schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); @@ -2637,7 +2371,7 @@ nr_moved = move_tasks(this_rq, this_cpu, busiest, minus_1_or_zero(busiest->nr_running), imbalance, sd, NEWLY_IDLE, NULL); - spin_unlock_non_nested(&busiest->lock); + spin_unlock(&busiest->lock); } if (!nr_moved) { @@ -2651,7 +2385,7 @@ out_balanced: schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); - if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) + if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) return -1; sd->nr_balance_failed = 0; return 0; @@ -2710,7 +2444,7 @@ if ((sd->flags & SD_LOAD_BALANCE) && cpu_isset(busiest_cpu, sd->span)) break; - } + } if (unlikely(sd == NULL)) goto out; @@ -2723,7 +2457,7 @@ else schedstat_inc(sd, alb_failed); out: - spin_unlock_non_nested(&target_rq->lock); + spin_unlock(&target_rq->lock); } /* @@ -2851,22 +2585,6 @@ } /* - * We place interactive tasks back into the active array, if possible. - * - * To guarantee that this does not starve expired tasks we ignore the - * interactivity of a task if the first expired task had to wait more - * than a 'reasonable' amount of time. This deadline timeout is - * load-dependent, as the frequency of array switched decreases with - * increasing number of running tasks. We also ignore the interactivity - * if a better static_prio task has expired: - */ -#define EXPIRED_STARVING(rq) \ - ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ - (jiffies - (rq)->expired_timestamp >= \ - STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ - ((rq)->curr->static_prio > (rq)->best_expired_prio)) - -/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() @@ -2915,6 +2633,7 @@ else cpustat->idle = cputime64_add(cpustat->idle, tmp); /* Account for system time used */ + p->systime++; acct_update_integrals(p); } @@ -2939,18 +2658,23 @@ cpustat->steal = cputime64_add(cpustat->steal, tmp); } +static void time_slice_expired(task_t *p, runqueue_t *rq) +{ + set_tsk_need_resched(p); + p->time_slice = rr_interval(p); + requeue_task(p, rq, effective_prio(p)); +} + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. - * - * It also gets called by the fork code, when changing the parent's - * timeslices. */ void scheduler_tick(void) { int cpu = smp_processor_id(); runqueue_t *rq = this_rq(); task_t *p = current; + unsigned long debit; unsigned long long now = sched_clock(); update_cpu_clock(p, rq, now); @@ -2965,73 +2689,37 @@ } /* Task might have expired already, but not scheduled off yet */ - if (p->array != rq->active) { + if (unlikely(!task_queued(p))) { set_tsk_need_resched(p); goto out; } - spin_lock(&rq->lock); /* - * The task was running during this tick - update the - * time slice counter. Note: we do not update a thread's - * priority until it either goes to sleep or uses up its - * timeslice. This makes it possible for interactive tasks - * to use up their timeslices at their highest priority levels. + * SCHED_FIFO tasks never run out of timeslice. */ - if (rt_task(p)) { - /* - * RR tasks need a special form of timeslice management. - * FIFO tasks have no timeslices. - */ - if ((p->policy == SCHED_RR) && !--p->time_slice) { - p->time_slice = task_timeslice(p); - p->first_time_slice = 0; - set_tsk_need_resched(p); + if (unlikely(p->policy == SCHED_FIFO)) + goto out; - /* put it at the end of the queue: */ - requeue_task(p, rq->active); - } + spin_lock(&rq->lock); + debit = ns_diff(rq->timestamp_last_tick, p->timestamp); + p->ns_debit += debit; + if (p->ns_debit < NSJIFFY) + goto out_unlock; + p->ns_debit %= NSJIFFY; + + /* Tasks lose bonus each time they use up a full slice(). */ + if (!--p->slice) { + dec_bonus(p); + p->slice = slice(p); + time_slice_expired(p, rq); goto out_unlock; } + /* + * Tasks that run out of time_slice but still have slice left get + * requeued with a lower priority && RR_INTERVAL time_slice. + */ if (!--p->time_slice) { - dequeue_task(p, rq->active); - set_tsk_need_resched(p); - p->prio = effective_prio(p); - p->time_slice = task_timeslice(p); - p->first_time_slice = 0; - - if (!rq->expired_timestamp) - rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { - enqueue_task(p, rq->expired); - if (p->static_prio < rq->best_expired_prio) - rq->best_expired_prio = p->static_prio; - } else - enqueue_task(p, rq->active); - } else { - /* - * Prevent a too long timeslice allowing a task to monopolize - * the CPU. We do this by splitting up the timeslice into - * smaller pieces. - * - * Note: this does not mean the task's timeslices expire or - * get lost in any way, they just might be preempted by - * another task of equal priority. (one with higher - * priority would have preempted this task already.) We - * requeue this task to the end of the list on this priority - * level, which is in essence a round-robin of tasks with - * equal priority. - * - * This only applies to tasks in the interactive - * delta range with at least TIMESLICE_GRANULARITY to requeue. - */ - if (TASK_INTERACTIVE(p) && !((task_timeslice(p) - - p->time_slice) % TIMESLICE_GRANULARITY(p)) && - (p->time_slice >= TIMESLICE_GRANULARITY(p)) && - (p->array == rq->active)) { - - requeue_task(p, rq->active); - set_tsk_need_resched(p); - } + time_slice_expired(p, rq); + goto out_unlock; } out_unlock: spin_unlock(&rq->lock); @@ -3055,37 +2743,37 @@ struct sched_domain *tmp, *sd = NULL; int i; - for_each_domain(this_cpu, tmp) { - if (tmp->flags & SD_SHARE_CPUPOWER) { + for_each_domain(this_cpu, tmp) { + if (tmp->flags & SD_SHARE_CPUPOWER) { sd = tmp; - break; - } - } + break; + } + } if (!sd) return; - for_each_cpu_mask(i, sd->span) { + for_each_cpu_mask(i, sd->span) { runqueue_t *smt_rq = cpu_rq(i); - if (i == this_cpu) - continue; - if (unlikely(!spin_trylock(&smt_rq->lock))) - continue; + if (i == this_cpu) + continue; + if (unlikely(!spin_trylock(&smt_rq->lock))) + continue; wakeup_busy_runqueue(smt_rq); - spin_unlock(&smt_rq->lock); + spin_unlock(&smt_rq->lock); } } /* * number of 'lost' timeslices this task wont be able to fully - * utilize, if another task runs on a sibling. This models the + * utilise, if another task runs on a sibling. This models the * slowdown effect of other tasks running on siblings: */ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) { - return p->time_slice * (100 - sd->per_cpu_gain) / 100; + return p->slice * (100 - sd->per_cpu_gain) / 100; } /* @@ -3099,35 +2787,35 @@ struct sched_domain *tmp, *sd = NULL; int ret = 0, i; - /* kernel/rt threads do not participate in dependent sleeping */ - if (!p->mm || rt_task(p)) - return 0; - - for_each_domain(this_cpu, tmp) { - if (tmp->flags & SD_SHARE_CPUPOWER) { + /* kernel/rt threads do not participate in dependent sleeping */ + if (!p->mm || rt_task(p)) + return 0; + + for_each_domain(this_cpu, tmp) { + if (tmp->flags & SD_SHARE_CPUPOWER) { sd = tmp; - break; - } - } + break; + } + } if (!sd) return 0; - for_each_cpu_mask(i, sd->span) { - runqueue_t *smt_rq; - task_t *smt_curr; + for_each_cpu_mask(i, sd->span) { + runqueue_t *smt_rq; + task_t *smt_curr; - if (i == this_cpu) - continue; + if (i == this_cpu) + continue; - smt_rq = cpu_rq(i); - if (unlikely(!spin_trylock(&smt_rq->lock))) - continue; + smt_rq = cpu_rq(i); + if (unlikely(!spin_trylock(&smt_rq->lock))) + continue; - smt_curr = smt_rq->curr; + smt_curr = smt_rq->curr; - if (!smt_curr->mm) - goto unlock; + if (!smt_curr->mm) + goto unlock; /* * If a user task with lower static priority than the @@ -3145,14 +2833,14 @@ if ((jiffies % DEF_TIMESLICE) > (sd->per_cpu_gain * DEF_TIMESLICE / 100)) ret = 1; - } else { + } else if (smt_curr->static_prio < p->static_prio && !TASK_PREEMPTS_CURR(p, smt_rq) && - smt_slice(smt_curr, sd) > task_timeslice(p)) + smt_slice(smt_curr, sd) > slice(p)) ret = 1; - } + unlock: - spin_unlock(&smt_rq->lock); + spin_unlock(&smt_rq->lock); } return ret; } @@ -3161,8 +2849,7 @@ { } -static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, - task_t *p) +static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) { return 0; } @@ -3175,13 +2862,12 @@ /* * Underflow? */ - if (DEBUG_WARN_ON((preempt_count() < 0))) - return; + BUG_ON((preempt_count() < 0)); preempt_count() += val; /* * Spinlock count overflowing soon? */ - DEBUG_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); + BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); } EXPORT_SYMBOL(add_preempt_count); @@ -3190,27 +2876,17 @@ /* * Underflow? */ - if (DEBUG_WARN_ON(val > preempt_count())) - return; + BUG_ON(val > preempt_count()); /* * Is the spinlock portion underflowing? */ - if (DEBUG_WARN_ON((val < PREEMPT_MASK) && - !(preempt_count() & PREEMPT_MASK))) - return; - + BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); preempt_count() -= val; } EXPORT_SYMBOL(sub_preempt_count); #endif -static inline int interactive_sleep(enum sleep_type sleep_type) -{ - return (sleep_type == SLEEP_INTERACTIVE || - sleep_type == SLEEP_INTERRUPTED); -} - /* * schedule() is the main scheduler function. */ @@ -3219,11 +2895,10 @@ long *switch_count; task_t *prev, *next; runqueue_t *rq; - prio_array_t *array; struct list_head *queue; unsigned long long now; - unsigned long run_time; - int cpu, idx, new_prio; + unsigned long debit; + int cpu, idx; /* * Test if we are atomic. Since do_exit() needs to call into @@ -3256,20 +2931,11 @@ schedstat_inc(rq, sched_cnt); now = sched_clock(); - if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) { - run_time = now - prev->timestamp; - if (unlikely((long long)(now - prev->timestamp) < 0)) - run_time = 0; - } else - run_time = NS_MAX_SLEEP_AVG; - - /* - * Tasks charged proportionately less run_time at high sleep_avg to - * delay them losing their interactive status - */ - run_time /= (CURRENT_BONUS(prev) ? : 1); spin_lock_irq(&rq->lock); + prev->runtime = ns_diff(now, prev->timestamp); + debit = ns_diff(now, rq->timestamp_last_tick) % NSJIFFY; + prev->ns_debit += debit; if (unlikely(prev->flags & PF_DEAD)) prev->state = EXIT_DEAD; @@ -3281,8 +2947,10 @@ unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state == TASK_UNINTERRUPTIBLE) { + prev->flags |= PF_NONSLEEP; rq->nr_uninterruptible++; + } deactivate_task(prev, rq); } } @@ -3292,64 +2960,30 @@ idle_balance(cpu, rq); if (!rq->nr_running) { next = rq->idle; - rq->expired_timestamp = 0; - wake_sleeping_dependent(cpu); + wake_sleeping_dependent(cpu); goto switch_tasks; } } - array = rq->active; - if (unlikely(!array->nr_active)) { - /* - * Switch the active and expired arrays. - */ - schedstat_inc(rq, sched_switch); - rq->active = rq->expired; - rq->expired = array; - array = rq->active; - rq->expired_timestamp = 0; - rq->best_expired_prio = MAX_PRIO; - } - - idx = sched_find_first_bit(array->bitmap); - queue = array->queue + idx; + idx = sched_find_first_bit(rq->bitmap); + queue = rq->queue + idx; next = list_entry(queue->next, task_t, run_list); - if (!rt_task(next) && interactive_sleep(next->sleep_type)) { - unsigned long long delta = now - next->timestamp; - if (unlikely((long long)(now - next->timestamp) < 0)) - delta = 0; - - if (next->sleep_type == SLEEP_INTERACTIVE) - delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; - - array = next->array; - new_prio = recalc_task_prio(next, next->timestamp + delta); - - if (unlikely(next->prio != new_prio)) { - dequeue_task(next, array); - next->prio = new_prio; - enqueue_task(next, array); - } - } - next->sleep_type = SLEEP_NORMAL; - if (dependent_sleeper(cpu, rq, next)) - next = rq->idle; + if (dependent_sleeper(cpu, rq, next)) + next = rq->idle; switch_tasks: if (next == rq->idle) schedstat_inc(rq, sched_goidle); - prefetch(next); - prefetch_stack(next); + else { + prefetch(next); + prefetch_stack(next); + } + prev->timestamp = now; clear_tsk_need_resched(prev); rcu_qsctr_inc(task_cpu(prev)); update_cpu_clock(prev, rq, now); - prev->sleep_avg -= run_time; - if ((long)prev->sleep_avg <= 0) - prev->sleep_avg = 0; - prev->timestamp = prev->last_ran = now; - sched_info_switch(prev, next); if (likely(prev != next)) { next->timestamp = now; @@ -3562,8 +3196,8 @@ void init_completion(struct completion *x) { - x->done = 0; - __init_waitqueue_head(&x->wait); + x->done = 0; + __init_waitqueue_head(&x->wait); } EXPORT_SYMBOL(init_completion); @@ -3801,28 +3435,20 @@ void rt_mutex_setprio(task_t *p, int prio) { unsigned long flags; - prio_array_t *array; runqueue_t *rq; - int oldprio; + int oldprio, queued; BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); oldprio = p->prio; - array = p->array; - if (array) - dequeue_task(p, array); + if ((queued = task_queued(p))) + dequeue_task(p, rq); p->prio = prio; - if (array) { - /* - * If changing to an RT priority then queue it - * in the active array! - */ - if (rt_task(p)) - array = rq->active; - enqueue_task(p, array); + if (queued) { + enqueue_task(p, rq); /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on @@ -3831,8 +3457,8 @@ if (task_running(rq, p)) { if (p->prio > oldprio) resched_task(rq->curr); - } else if (TASK_PREEMPTS_CURR(p, rq)) - resched_task(rq->curr); + } else + preempt(p, rq); } task_rq_unlock(rq, &flags); } @@ -3842,9 +3468,8 @@ void set_user_nice(task_t *p, long nice) { unsigned long flags; - prio_array_t *array; runqueue_t *rq; - int old_prio, delta; + int queued, old_prio, new_prio, delta; if (TASK_NICE(p) == nice || nice < -20 || nice > 19) return; @@ -3859,24 +3484,26 @@ * it wont have any effect on scheduling until the task is * not SCHED_NORMAL/SCHED_BATCH: */ - if (has_rt_policy(p)) { + if (rt_task(p)) { p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } - array = p->array; - if (array) { - dequeue_task(p, array); + if ((queued = task_queued(p))) { + dequeue_task(p, rq); dec_raw_weighted_load(rq, p); } + old_prio = p->prio; + new_prio = NICE_TO_PRIO(nice); + delta = new_prio - old_prio; p->static_prio = NICE_TO_PRIO(nice); set_load_weight(p); - old_prio = p->prio; - p->prio = effective_prio(p); - delta = p->prio - old_prio; + p->prio += delta; + if (p->bonus > bonus(p)) + p->bonus= bonus(p); - if (array) { - enqueue_task(p, array); + if (queued) { + enqueue_task(p, rq); inc_raw_weighted_load(rq, p); /* * If the task increased its priority or is running and @@ -3888,6 +3515,7 @@ out_unlock: task_rq_unlock(rq, &flags); } + EXPORT_SYMBOL(set_user_nice); /* @@ -3999,17 +3627,12 @@ /* Actually do priority change: must hold rq lock. */ static void __setscheduler(struct task_struct *p, int policy, int prio) { - BUG_ON(p->array); + BUG_ON(task_queued(p)); p->policy = policy; p->rt_priority = prio; p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); - /* - * SCHED_BATCH tasks are treated as perpetual CPU hogs: - */ - if (policy == SCHED_BATCH) - p->sleep_avg = 0; set_load_weight(p); } @@ -4024,8 +3647,7 @@ struct sched_param *param) { int retval; - int oldprio, oldpolicy = -1; - prio_array_t *array; + int queued, oldprio, oldpolicy = -1; unsigned long flags; runqueue_t *rq; @@ -4093,12 +3715,11 @@ spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } - array = p->array; - if (array) + if ((queued = task_queued(p))) deactivate_task(p, rq); oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); - if (array) { + if (queued) { __activate_task(p, rq); /* * Reschedule if we are currently running on this runqueue and @@ -4108,8 +3729,8 @@ if (task_running(rq, p)) { if (p->prio > oldprio) resched_task(rq->curr); - } else if (TASK_PREEMPTS_CURR(p, rq)) - resched_task(rq->curr); + } else + preempt(p, rq); } __task_rq_unlock(rq); spin_unlock_irqrestore(&p->pi_lock, flags); @@ -4368,50 +3989,28 @@ /** * sys_sched_yield - yield the current processor to other threads. - * - * this function yields the current CPU by moving the calling thread - * to the expired array. If there are no other threads running on this - * CPU then this function will return. + * This function yields the current CPU by dropping the priority of current + * to the lowest priority. */ asmlinkage long sys_sched_yield(void) { + int newprio; runqueue_t *rq = this_rq_lock(); - prio_array_t *array = current->array; - prio_array_t *target = rq->expired; + newprio = current->prio; schedstat_inc(rq, yld_cnt); - /* - * We implement yielding by moving the task into the expired - * queue. - * - * (special rule: RT tasks will just roundrobin in the active - * array.) - */ - if (rt_task(current)) - target = rq->active; + current->slice = slice(current); + current->time_slice = rr_interval(current); + if (likely(!rt_task(current))) + newprio = MIN_USER_PRIO; - if (array->nr_active == 1) { - schedstat_inc(rq, yld_act_empty); - if (!rq->expired->nr_active) - schedstat_inc(rq, yld_both_empty); - } else if (!rq->expired->nr_active) - schedstat_inc(rq, yld_exp_empty); - - if (array != target) { - dequeue_task(current, array); - enqueue_task(current, target); - } else - /* - * requeue_task is cheaper so perform that if possible. - */ - requeue_task(current, array); + requeue_task(current, rq, newprio); /* * Since we are going to call schedule() anyway, there's * no need to preempt or enable interrupts: */ __release(rq->lock); - spin_release(&rq->lock.dep_map, 1, _THIS_IP_); _raw_spin_unlock(&rq->lock); preempt_enable_no_resched(); @@ -4471,7 +4070,6 @@ spin_lock(lock); } if (need_resched()) { - spin_release(&lock->dep_map, 1, _THIS_IP_); _raw_spin_unlock(lock); preempt_enable_no_resched(); __cond_resched(); @@ -4488,9 +4086,7 @@ BUG_ON(!in_softirq()); if (need_resched()) { - raw_local_irq_disable(); - _local_bh_enable(); - raw_local_irq_enable(); + __local_bh_enable(); __cond_resched(); local_bh_disable(); return 1; @@ -4624,7 +4220,7 @@ goto out_unlock; jiffies_to_timespec(p->policy & SCHED_FIFO ? - 0 : task_timeslice(p), &t); + 0 : slice(p), &t); read_unlock(&tasklist_lock); retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; out_nounlock: @@ -4747,8 +4343,6 @@ unsigned long flags; idle->timestamp = sched_clock(); - idle->sleep_avg = 0; - idle->array = NULL; idle->prio = idle->normal_prio = MAX_PRIO; idle->state = TASK_RUNNING; idle->cpus_allowed = cpumask_of_cpu(cpu); @@ -4865,7 +4459,7 @@ goto out; set_task_cpu(p, dest_cpu); - if (p->array) { + if (task_queued(p)) { /* * Sync timestamp with rq_dest's before activating. * The same thing could be achieved by doing this step @@ -4876,8 +4470,7 @@ + rq_dest->timestamp_last_tick; deactivate_task(p, rq_src); activate_task(p, rq_dest, 0); - if (TASK_PREEMPTS_CURR(p, rq_dest)) - resched_task(rq_dest->curr); + preempt(p, rq_dest); } out: @@ -5091,7 +4684,7 @@ for (arr = 0; arr < 2; arr++) { for (i = 0; i < MAX_PRIO; i++) { - struct list_head *list = &rq->arrays[arr].queue[i]; + struct list_head *list = &rq->queue[i]; while (!list_empty(list)) migrate_dead(dead_cpu, list_entry(list->next, task_t, @@ -6089,7 +5682,6 @@ } #endif -int sched_smt_power_savings = 0, sched_mc_power_savings = 0; /* * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we * can switch it on easily if needed. @@ -6471,72 +6063,37 @@ #endif /* Calculate CPU power for physical packages and nodes */ -#ifdef CONFIG_SCHED_SMT for_each_cpu_mask(i, *cpu_map) { + int power; struct sched_domain *sd; +#ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); - sd->groups->cpu_power = SCHED_LOAD_SCALE; - } + power = SCHED_LOAD_SCALE; + sd->groups->cpu_power = power; #endif #ifdef CONFIG_SCHED_MC - for_each_cpu_mask(i, *cpu_map) { - int power; - struct sched_domain *sd; sd = &per_cpu(core_domains, i); - if (sched_smt_power_savings) - power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); - else - power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) + power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) * SCHED_LOAD_SCALE / 10; sd->groups->cpu_power = power; - } -#endif - for_each_cpu_mask(i, *cpu_map) { - struct sched_domain *sd; -#ifdef CONFIG_SCHED_MC sd = &per_cpu(phys_domains, i); - if (i != first_cpu(sd->groups->cpumask)) - continue; - sd->groups->cpu_power = 0; - if (sched_mc_power_savings || sched_smt_power_savings) { - int j; - - for_each_cpu_mask(j, sd->groups->cpumask) { - struct sched_domain *sd1; - sd1 = &per_cpu(core_domains, j); - /* - * for each core we will add once - * to the group in physical domain - */ - if (j != first_cpu(sd1->groups->cpumask)) - continue; - - if (sched_smt_power_savings) - sd->groups->cpu_power += sd1->groups->cpu_power; - else - sd->groups->cpu_power += SCHED_LOAD_SCALE; - } - } else - /* - * This has to be < 2 * SCHED_LOAD_SCALE - * Lets keep it SCHED_LOAD_SCALE, so that - * while calculating NUMA group's cpu_power - * we can simply do - * numa_group->cpu_power += phys_group->cpu_power; - * - * See "only add power once for each physical pkg" - * comment below - */ - sd->groups->cpu_power = SCHED_LOAD_SCALE; + /* + * This has to be < 2 * SCHED_LOAD_SCALE + * Lets keep it SCHED_LOAD_SCALE, so that + * while calculating NUMA group's cpu_power + * we can simply do + * numa_group->cpu_power += phys_group->cpu_power; + * + * See "only add power once for each physical pkg" + * comment below + */ + sd->groups->cpu_power = SCHED_LOAD_SCALE; #else - int power; sd = &per_cpu(phys_domains, i); - if (sched_smt_power_savings) - power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask); - else - power = SCHED_LOAD_SCALE; + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; sd->groups->cpu_power = power; #endif } @@ -6637,80 +6194,6 @@ return err; } -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) -{ - int err; - - lock_cpu_hotplug(); - detach_destroy_domains(&cpu_online_map); - err = arch_init_sched_domains(&cpu_online_map); - unlock_cpu_hotplug(); - - return err; -} - -static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) -{ - int ret; - - if (buf[0] != '0' && buf[0] != '1') - return -EINVAL; - - if (smt) - sched_smt_power_savings = (buf[0] == '1'); - else - sched_mc_power_savings = (buf[0] == '1'); - - ret = arch_reinit_sched_domains(); - - return ret ? ret : count; -} - -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) -{ - int err = 0; -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); -#endif - return err; -} -#endif - -#ifdef CONFIG_SCHED_MC -static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) -{ - return sprintf(page, "%u\n", sched_mc_power_savings); -} -static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) -{ - return sched_power_savings_store(buf, count, 0); -} -SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); -#endif - -#ifdef CONFIG_SCHED_SMT -static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) -{ - return sprintf(page, "%u\n", sched_smt_power_savings); -} -static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) -{ - return sched_power_savings_store(buf, count, 1); -} -SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, - sched_smt_power_savings_store); -#endif - - #ifdef CONFIG_HOTPLUG_CPU /* * Force a reinitialization of the sched domains hierarchy. The domains @@ -6773,17 +6256,13 @@ void __init sched_init(void) { runqueue_t *rq; - int i, j, k; + int i, j; for_each_possible_cpu(i) { - prio_array_t *array; rq = cpu_rq(i); - spin_lock_init_static(&rq->lock); + spin_lock_init(&rq->lock); rq->nr_running = 0; - rq->active = rq->arrays; - rq->expired = rq->arrays + 1; - rq->best_expired_prio = MAX_PRIO; #ifdef CONFIG_SMP rq->sd = NULL; @@ -6795,16 +6274,11 @@ INIT_LIST_HEAD(&rq->migration_queue); #endif atomic_set(&rq->nr_iowait, 0); - - for (j = 0; j < 2; j++) { - array = rq->arrays + j; - for (k = 0; k < MAX_PRIO; k++) { - INIT_LIST_HEAD(array->queue + k); - __clear_bit(k, array->bitmap); - } - // delimiter for bitsearch - __set_bit(MAX_PRIO, array->bitmap); - } + for (j = 0; j < MAX_PRIO; j++) + INIT_LIST_HEAD(&rq->queue[j]); + memset(rq->bitmap, 0, BITS_TO_LONGS(MAX_PRIO)*sizeof(long)); + /* delimiter for bitsearch */ + __set_bit(MAX_PRIO, rq->bitmap); } set_load_weight(&init_task); @@ -6852,9 +6326,9 @@ void normalize_rt_tasks(void) { struct task_struct *p; - prio_array_t *array; unsigned long flags; runqueue_t *rq; + int queued; read_lock_irq(&tasklist_lock); for_each_process(p) { @@ -6864,11 +6338,10 @@ spin_lock_irqsave(&p->pi_lock, flags); rq = __task_rq_lock(p); - array = p->array; - if (array) + if ((queued = task_queued(p))) deactivate_task(p, task_rq(p)); __setscheduler(p, SCHED_NORMAL, 0); - if (array) { + if (queued) { __activate_task(p, task_rq(p)); resched_task(rq->curr); }