diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/boost.c | 9 | ||||
-rw-r--r-- | kernel/sched/core.c | 11 | ||||
-rw-r--r-- | kernel/sched/fair.c | 29 | ||||
-rw-r--r-- | kernel/sched/hmp.c | 322 | ||||
-rw-r--r-- | kernel/sched/rt.c | 20 | ||||
-rw-r--r-- | kernel/sched/sched.h | 35 | ||||
-rw-r--r-- | kernel/sysctl.c | 5 |
7 files changed, 107 insertions, 324 deletions
diff --git a/kernel/sched/boost.c b/kernel/sched/boost.c index fcfda385b74a..5bdd51b1e55e 100644 --- a/kernel/sched/boost.c +++ b/kernel/sched/boost.c @@ -156,9 +156,6 @@ void sched_boost_parse_dt(void) struct device_node *sn; const char *boost_policy; - if (!sched_enable_hmp) - return; - sn = of_find_node_by_path("/sched-hmp"); if (!sn) return; @@ -175,9 +172,6 @@ int sched_set_boost(int type) { int ret = 0; - if (!sched_enable_hmp) - return -EINVAL; - mutex_lock(&boost_mutex); if (verify_boost_params(sysctl_sched_boost, type)) @@ -197,9 +191,6 @@ int sched_boost_handler(struct ctl_table *table, int write, unsigned int *data = (unsigned int *)table->data; unsigned int old_val; - if (!sched_enable_hmp) - return -EINVAL; - mutex_lock(&boost_mutex); old_val = *data; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 519aee32e122..3fcadbae663d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3025,8 +3025,9 @@ void sched_exec(void) unsigned long flags; int dest_cpu, curr_cpu; - if (sched_enable_hmp) - return; +#ifdef CONFIG_SCHED_HMP + return; +#endif raw_spin_lock_irqsave(&p->pi_lock, flags); curr_cpu = task_cpu(p); @@ -8215,8 +8216,9 @@ void __init sched_init(void) int i, j; unsigned long alloc_size = 0, ptr; - if (sched_enable_hmp) - pr_info("HMP scheduling enabled.\n"); +#ifdef CONFIG_SCHED_HMP + pr_info("HMP scheduling enabled.\n"); +#endif BUG_ON(num_possible_cpus() > BITS_PER_LONG); @@ -8362,6 +8364,7 @@ void __init sched_init(void) rq->cluster = &init_cluster; rq->curr_runnable_sum = rq->prev_runnable_sum = 0; rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0; + memset(&rq->grp_time, 0, sizeof(struct group_cpu_time)); rq->old_busy_time = 0; rq->old_estimated_time = 0; rq->old_busy_time_group = 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ac4c3f1d144a..6f68b0e19c4a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3332,9 +3332,9 @@ void _inc_hmp_sched_stats_fair(struct rq *rq, * inc/dec_nr_big_task and inc/dec_cumulative_runnable_avg called * from inc_cfs_rq_hmp_stats() have similar checks), we gain a bit on * efficiency by short-circuiting for_each_sched_entity() loop when - * !sched_enable_hmp || sched_disable_window_stats + * sched_disable_window_stats */ - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; for_each_sched_entity(se) { @@ -3357,7 +3357,7 @@ _dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p, int change_cra) struct sched_entity *se = &p->se; /* See comment on efficiency in _inc_hmp_sched_stats_fair */ - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; for_each_sched_entity(se) { @@ -3482,8 +3482,7 @@ static inline int migration_needed(struct task_struct *p, int cpu) int nice; struct related_thread_group *grp; - if (!sched_enable_hmp || p->state != TASK_RUNNING || - p->nr_cpus_allowed == 1) + if (p->state != TASK_RUNNING || p->nr_cpus_allowed == 1) return 0; /* No need to migrate task that is about to be throttled */ @@ -7024,8 +7023,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int want_affine = 0; int sync = wake_flags & WF_SYNC; - if (sched_enable_hmp) - return select_best_cpu(p, prev_cpu, 0, sync); +#ifdef CONFIG_SCHED_HMP + return select_best_cpu(p, prev_cpu, 0, sync); +#endif if (sd_flag & SD_BALANCE_WAKE) want_affine = (!wake_wide(p) && task_fits_max(p, cpu) && @@ -9313,8 +9313,9 @@ static struct rq *find_busiest_queue(struct lb_env *env, unsigned long busiest_load = 0, busiest_capacity = 1; int i; - if (sched_enable_hmp) - return find_busiest_queue_hmp(env, group); +#ifdef CONFIG_SCHED_HMP + return find_busiest_queue_hmp(env, group); +#endif for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { unsigned long capacity, wl; @@ -10120,8 +10121,9 @@ static inline int find_new_ilb(int type) { int ilb; - if (sched_enable_hmp) - return find_new_hmp_ilb(type); +#ifdef CONFIG_SCHED_HMP + return find_new_hmp_ilb(type); +#endif ilb = cpumask_first(nohz.idle_cpus_mask); @@ -10496,8 +10498,9 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) if (likely(!atomic_read(&nohz.nr_cpus))) return 0; - if (sched_enable_hmp) - return _nohz_kick_needed_hmp(rq, cpu, type); +#ifdef CONFIG_SCHED_HMP + return _nohz_kick_needed_hmp(rq, cpu, type); +#endif if (time_before(now, nohz.next_balance)) return 0; diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index d54db37a7d0c..6379de764236 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -28,8 +28,7 @@ const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK", "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE", "IRQ_UPDATE"}; -const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP", - "RQ_TO_RQ", "GROUP_TO_GROUP"}; +const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP"}; static ktime_t ktime_last; static bool sched_ktime_suspended; @@ -616,19 +615,6 @@ int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) return 0; } -int __init set_sched_enable_hmp(char *str) -{ - int enable_hmp = 0; - - get_option(&str, &enable_hmp); - - sched_enable_hmp = !!enable_hmp; - - return 0; -} - -early_param("sched_enable_hmp", set_sched_enable_hmp); - /* Clear any HMP scheduler related requests pending from or on cpu */ void clear_hmp_request(int cpu) { @@ -870,9 +856,6 @@ unsigned int max_task_load(void) return sched_ravg_window; } -/* Use this knob to turn on or off HMP-aware task placement logic */ -unsigned int __read_mostly sched_enable_hmp; - /* A cpu can no longer accommodate more tasks if: * * rq->nr_running > sysctl_sched_spill_nr_run || @@ -1245,7 +1228,7 @@ unlock: void inc_nr_big_task(struct hmp_sched_stats *stats, struct task_struct *p) { - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; if (is_big_task(p)) @@ -1254,7 +1237,7 @@ void inc_nr_big_task(struct hmp_sched_stats *stats, struct task_struct *p) void dec_nr_big_task(struct hmp_sched_stats *stats, struct task_struct *p) { - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; if (is_big_task(p)) @@ -1323,7 +1306,7 @@ void fixup_nr_big_tasks(struct hmp_sched_stats *stats, u64 new_task_load; u64 old_task_load; - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; old_task_load = scale_load_to_cpu(task_load(p), task_cpu(p)); @@ -1433,9 +1416,6 @@ int sched_window_update_handler(struct ctl_table *table, int write, unsigned int *data = (unsigned int *)table->data; unsigned int old_val; - if (!sched_enable_hmp) - return -EINVAL; - mutex_lock(&policy_mutex); old_val = *data; @@ -1471,9 +1451,6 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write, unsigned int *data = (unsigned int *)table->data; int update_task_count = 0; - if (!sched_enable_hmp) - return 0; - /* * The policy mutex is acquired with cpu_hotplug.lock * held from cpu_up()->cpufreq_governor_interactive()-> @@ -1713,45 +1690,19 @@ static inline unsigned int load_to_freq(struct rq *rq, u64 load) return freq; } -static inline struct group_cpu_time * -_group_cpu_time(struct related_thread_group *grp, int cpu); - -/* - * Return load from all related group in given cpu. - * Caller must ensure that related_thread_group_lock is held. - */ -static void _group_load_in_cpu(int cpu, u64 *grp_load, u64 *new_grp_load) -{ - struct related_thread_group *grp; - - for_each_related_thread_group(grp) { - struct group_cpu_time *cpu_time; - - cpu_time = _group_cpu_time(grp, cpu); - *grp_load += cpu_time->prev_runnable_sum; - if (new_grp_load) - *new_grp_load += cpu_time->nt_prev_runnable_sum; - } -} - /* * Return load from all related groups in given frequency domain. - * Caller must ensure that related_thread_group_lock is held. */ static void group_load_in_freq_domain(struct cpumask *cpus, u64 *grp_load, u64 *new_grp_load) { - struct related_thread_group *grp; int j; - for_each_related_thread_group(grp) { - for_each_cpu(j, cpus) { - struct group_cpu_time *cpu_time; + for_each_cpu(j, cpus) { + struct rq *rq = cpu_rq(j); - cpu_time = _group_cpu_time(grp, j); - *grp_load += cpu_time->prev_runnable_sum; - *new_grp_load += cpu_time->nt_prev_runnable_sum; - } + *grp_load += rq->grp_time.prev_runnable_sum; + *new_grp_load += rq->grp_time.nt_prev_runnable_sum; } } @@ -1776,9 +1727,6 @@ static int send_notification(struct rq *rq, int check_pred, int check_groups) int rc = 0; u64 group_load = 0, new_load = 0; - if (!sched_enable_hmp) - return 0; - if (check_pred) { u64 prev = rq->old_busy_time; u64 predicted = rq->hmp_stats.pred_demands_sum; @@ -1796,20 +1744,18 @@ static int send_notification(struct rq *rq, int check_pred, int check_groups) if (freq_required < cur_freq + sysctl_sched_pred_alert_freq) return 0; } else { - read_lock_irqsave(&related_thread_group_lock, flags); /* * Protect from concurrent update of rq->prev_runnable_sum and * group cpu load */ - raw_spin_lock(&rq->lock); + raw_spin_lock_irqsave(&rq->lock, flags); if (check_groups) - _group_load_in_cpu(cpu_of(rq), &group_load, NULL); + group_load = rq->grp_time.prev_runnable_sum; new_load = rq->prev_runnable_sum + group_load; new_load = freq_policy_load(rq, new_load); - raw_spin_unlock(&rq->lock); - read_unlock_irqrestore(&related_thread_group_lock, flags); + raw_spin_unlock_irqrestore(&rq->lock, flags); cur_freq = load_to_freq(rq, rq->old_busy_time); freq_required = load_to_freq(rq, new_load); @@ -2283,6 +2229,31 @@ static void rollover_task_window(struct task_struct *p, bool full_window) } } +static void rollover_cpu_window(struct rq *rq, bool full_window) +{ + u64 curr_sum = rq->curr_runnable_sum; + u64 nt_curr_sum = rq->nt_curr_runnable_sum; + u64 grp_curr_sum = rq->grp_time.curr_runnable_sum; + u64 grp_nt_curr_sum = rq->grp_time.nt_curr_runnable_sum; + + if (unlikely(full_window)) { + curr_sum = 0; + nt_curr_sum = 0; + grp_curr_sum = 0; + grp_nt_curr_sum = 0; + } + + rq->prev_runnable_sum = curr_sum; + rq->nt_prev_runnable_sum = nt_curr_sum; + rq->grp_time.prev_runnable_sum = grp_curr_sum; + rq->grp_time.nt_prev_runnable_sum = grp_nt_curr_sum; + + rq->curr_runnable_sum = 0; + rq->nt_curr_runnable_sum = 0; + rq->grp_time.curr_runnable_sum = 0; + rq->grp_time.nt_curr_runnable_sum = 0; +} + /* * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ @@ -2299,8 +2270,6 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, u64 *prev_runnable_sum = &rq->prev_runnable_sum; u64 *nt_curr_runnable_sum = &rq->nt_curr_runnable_sum; u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum; - int flip_counters = 0; - int prev_sum_reset = 0; bool new_task; struct related_thread_group *grp; int cpu = rq->cpu; @@ -2315,51 +2284,6 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, new_task = is_new_task(p); - grp = p->grp; - if (grp && sched_freq_aggregate) { - /* cpu_time protected by rq_lock */ - struct group_cpu_time *cpu_time = - _group_cpu_time(grp, cpu_of(rq)); - - curr_runnable_sum = &cpu_time->curr_runnable_sum; - prev_runnable_sum = &cpu_time->prev_runnable_sum; - - nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; - nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; - - if (cpu_time->window_start != rq->window_start) { - int nr_windows; - - delta = rq->window_start - cpu_time->window_start; - nr_windows = div64_u64(delta, window_size); - if (nr_windows > 1) - prev_sum_reset = 1; - - cpu_time->window_start = rq->window_start; - flip_counters = 1; - } - - if (p_is_curr_task && new_window) { - u64 curr_sum = rq->curr_runnable_sum; - u64 nt_curr_sum = rq->nt_curr_runnable_sum; - - if (full_window) - curr_sum = nt_curr_sum = 0; - - rq->prev_runnable_sum = curr_sum; - rq->nt_prev_runnable_sum = nt_curr_sum; - - rq->curr_runnable_sum = 0; - rq->nt_curr_runnable_sum = 0; - } - } else { - if (p_is_curr_task && new_window) { - flip_counters = 1; - if (full_window) - prev_sum_reset = 1; - } - } - /* * Handle per-task window rollover. We don't care about the idle * task or exiting tasks. @@ -2369,26 +2293,25 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rollover_task_window(p, full_window); } - if (flip_counters) { - u64 curr_sum = *curr_runnable_sum; - u64 nt_curr_sum = *nt_curr_runnable_sum; + if (p_is_curr_task && new_window) { + rollover_cpu_window(rq, full_window); + rollover_top_tasks(rq, full_window); + } - if (prev_sum_reset) - curr_sum = nt_curr_sum = 0; + if (!account_busy_for_cpu_time(rq, p, irqtime, event)) + goto done; - *prev_runnable_sum = curr_sum; - *nt_prev_runnable_sum = nt_curr_sum; + grp = p->grp; + if (grp && sched_freq_aggregate) { + struct group_cpu_time *cpu_time = &rq->grp_time; - *curr_runnable_sum = 0; - *nt_curr_runnable_sum = 0; + curr_runnable_sum = &cpu_time->curr_runnable_sum; + prev_runnable_sum = &cpu_time->prev_runnable_sum; - if (p_is_curr_task) - rollover_top_tasks(rq, full_window); + nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; + nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; } - if (!account_busy_for_cpu_time(rq, p, irqtime, event)) - goto done; - if (!new_window) { /* * account_busy_for_cpu_time() = 1 so busy time needs @@ -2905,7 +2828,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event, done: trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime, rq->cc.cycles, rq->cc.time, - _group_cpu_time(p->grp, cpu_of(rq))); + p->grp ? &rq->grp_time : NULL); p->ravg.mark_start = wallclock; } @@ -3012,7 +2935,7 @@ void set_window_start(struct rq *rq) { static int sync_cpu_available; - if (rq->window_start || !sched_enable_hmp) + if (rq->window_start) return; if (!sync_cpu_available) { @@ -3063,7 +2986,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) u64 start_ts = sched_ktime_clock(); int reason = WINDOW_CHANGE; unsigned int old = 0, new = 0; - struct related_thread_group *grp; local_irq_save(flags); @@ -3081,19 +3003,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) read_unlock(&tasklist_lock); - list_for_each_entry(grp, &active_related_thread_groups, list) { - int j; - - for_each_possible_cpu(j) { - struct group_cpu_time *cpu_time; - /* Protected by rq lock */ - cpu_time = _group_cpu_time(grp, j); - memset(cpu_time, 0, sizeof(struct group_cpu_time)); - if (window_start) - cpu_time->window_start = window_start; - } - } - if (window_size) { sched_ravg_window = window_size * TICK_NSEC; set_hmp_defaults(); @@ -3109,6 +3018,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) rq->window_start = window_start; rq->curr_runnable_sum = rq->prev_runnable_sum = 0; rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0; + memset(&rq->grp_time, 0, sizeof(struct group_cpu_time)); for (i = 0; i < NUM_TRACKED_WINDOWS; i++) { memset(&rq->load_subs[i], 0, sizeof(struct load_subtractions)); @@ -3198,15 +3108,12 @@ static inline u64 freq_policy_load(struct rq *rq, u64 load) case FREQ_REPORT_CPU_LOAD: break; default: - WARN_ON_ONCE(1); + break; } return load; } -static inline void -sync_window_start(struct rq *rq, struct group_cpu_time *cpu_time); - void sched_get_cpus_busy(struct sched_load *busy, const struct cpumask *query_cpus) { @@ -3223,7 +3130,6 @@ void sched_get_cpus_busy(struct sched_load *busy, unsigned int window_size; u64 max_prev_sum = 0; int max_busy_cpu = cpumask_first(query_cpus); - struct related_thread_group *grp; u64 total_group_load = 0, total_ngload = 0; bool aggregate_load = false; struct sched_cluster *cluster = cpu_cluster(cpumask_first(query_cpus)); @@ -3233,8 +3139,6 @@ void sched_get_cpus_busy(struct sched_load *busy, local_irq_save(flags); - read_lock(&related_thread_group_lock); - /* * This function could be called in timer context, and the * current task may have been executing for a long time. Ensure @@ -3287,15 +3191,6 @@ void sched_get_cpus_busy(struct sched_load *busy, raw_spin_unlock(&cluster->load_lock); - for_each_related_thread_group(grp) { - for_each_cpu(cpu, query_cpus) { - /* Protected by rq_lock */ - struct group_cpu_time *cpu_time = - _group_cpu_time(grp, cpu); - sync_window_start(cpu_rq(cpu), cpu_time); - } - } - group_load_in_freq_domain( &cpu_rq(max_busy_cpu)->freq_domain_cpumask, &total_group_load, &total_ngload); @@ -3316,7 +3211,8 @@ void sched_get_cpus_busy(struct sched_load *busy, ngload[i] = total_ngload; } } else { - _group_load_in_cpu(cpu, &group_load[i], &ngload[i]); + group_load[i] = rq->grp_time.prev_runnable_sum; + ngload[i] = rq->grp_time.nt_prev_runnable_sum; } load[i] += group_load[i]; @@ -3341,8 +3237,6 @@ skip_early: for_each_cpu(cpu, query_cpus) raw_spin_unlock(&(cpu_rq(cpu))->lock); - read_unlock(&related_thread_group_lock); - local_irq_restore(flags); i = 0; @@ -3620,7 +3514,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) bool new_task; struct related_thread_group *grp; - if (!sched_enable_hmp || (!p->on_rq && p->state != TASK_WAKING)) + if (!p->on_rq && p->state != TASK_WAKING) return; if (exiting_task(p)) { @@ -3659,18 +3553,17 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) if (grp && sched_freq_aggregate) { struct group_cpu_time *cpu_time; - cpu_time = _group_cpu_time(grp, cpu_of(src_rq)); + cpu_time = &src_rq->grp_time; src_curr_runnable_sum = &cpu_time->curr_runnable_sum; src_prev_runnable_sum = &cpu_time->prev_runnable_sum; src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; - cpu_time = _group_cpu_time(grp, cpu_of(dest_rq)); + cpu_time = &dest_rq->grp_time; dst_curr_runnable_sum = &cpu_time->curr_runnable_sum; dst_prev_runnable_sum = &cpu_time->prev_runnable_sum; dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; - sync_window_start(dest_rq, cpu_time); if (p->ravg.curr_window) { *src_curr_runnable_sum -= p->ravg.curr_window; @@ -3799,61 +3692,6 @@ void set_preferred_cluster(struct related_thread_group *grp) #define DEFAULT_CGROUP_COLOC_ID 1 -static inline void free_group_cputime(struct related_thread_group *grp) -{ - free_percpu(grp->cpu_time); -} - -static int alloc_group_cputime(struct related_thread_group *grp) -{ - int i; - struct group_cpu_time *cpu_time; - int cpu = raw_smp_processor_id(); - struct rq *rq = cpu_rq(cpu); - u64 window_start = rq->window_start; - - grp->cpu_time = alloc_percpu_gfp(struct group_cpu_time, GFP_ATOMIC); - if (!grp->cpu_time) - return -ENOMEM; - - for_each_possible_cpu(i) { - cpu_time = per_cpu_ptr(grp->cpu_time, i); - memset(cpu_time, 0, sizeof(struct group_cpu_time)); - cpu_time->window_start = window_start; - } - - return 0; -} - -/* - * A group's window_start may be behind. When moving it forward, flip prev/curr - * counters. When moving forward > 1 window, prev counter is set to 0 - */ -static inline void -sync_window_start(struct rq *rq, struct group_cpu_time *cpu_time) -{ - u64 delta; - int nr_windows; - u64 curr_sum = cpu_time->curr_runnable_sum; - u64 nt_curr_sum = cpu_time->nt_curr_runnable_sum; - - delta = rq->window_start - cpu_time->window_start; - if (!delta) - return; - - nr_windows = div64_u64(delta, sched_ravg_window); - if (nr_windows > 1) - curr_sum = nt_curr_sum = 0; - - cpu_time->prev_runnable_sum = curr_sum; - cpu_time->curr_runnable_sum = 0; - - cpu_time->nt_prev_runnable_sum = nt_curr_sum; - cpu_time->nt_curr_runnable_sum = 0; - - cpu_time->window_start = rq->window_start; -} - /* * Task's cpu usage is accounted in: * rq->curr/prev_runnable_sum, when its ->grp is NULL @@ -3871,7 +3709,6 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, u64 *src_prev_runnable_sum, *dst_prev_runnable_sum; u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum; u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum; - struct migration_sum_data d; int migrate_type; int cpu = cpu_of(rq); bool new_task; @@ -3886,15 +3723,10 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0); new_task = is_new_task(p); - /* cpu_time protected by related_thread_group_lock, grp->lock rq_lock */ - cpu_time = _group_cpu_time(grp, cpu); + cpu_time = &rq->grp_time; if (event == ADD_TASK) { - sync_window_start(rq, cpu_time); migrate_type = RQ_TO_GROUP; - d.src_rq = rq; - d.src_cpu_time = NULL; - d.dst_rq = NULL; - d.dst_cpu_time = cpu_time; + src_curr_runnable_sum = &rq->curr_runnable_sum; dst_curr_runnable_sum = &cpu_time->curr_runnable_sum; src_prev_runnable_sum = &rq->prev_runnable_sum; @@ -3919,17 +3751,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, } else { migrate_type = GROUP_TO_RQ; - d.src_rq = NULL; - d.src_cpu_time = cpu_time; - d.dst_rq = rq; - d.dst_cpu_time = NULL; - /* - * In case of REM_TASK, cpu_time->window_start would be - * uptodate, because of the update_task_ravg() we called - * above on the moving task. Hence no need for - * sync_window_start() - */ src_curr_runnable_sum = &cpu_time->curr_runnable_sum; dst_curr_runnable_sum = &rq->curr_runnable_sum; src_prev_runnable_sum = &cpu_time->prev_runnable_sum; @@ -3975,7 +3797,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window; p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window; - trace_sched_migration_update_sum(p, migrate_type, &d); + trace_sched_migration_update_sum(p, migrate_type, rq); BUG_ON((s64)*src_curr_runnable_sum < 0); BUG_ON((s64)*src_prev_runnable_sum < 0); @@ -3983,18 +3805,6 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, BUG_ON((s64)*src_nt_prev_runnable_sum < 0); } -static inline struct group_cpu_time * -task_group_cpu_time(struct task_struct *p, int cpu) -{ - return _group_cpu_time(rcu_dereference(p->grp), cpu); -} - -static inline struct group_cpu_time * -_group_cpu_time(struct related_thread_group *grp, int cpu) -{ - return grp ? per_cpu_ptr(grp->cpu_time, cpu) : NULL; -} - static inline struct related_thread_group* lookup_related_thread_group(unsigned int group_id) { @@ -4014,12 +3824,6 @@ int alloc_related_thread_groups(void) goto err; } - if (alloc_group_cputime(grp)) { - kfree(grp); - ret = -ENOMEM; - goto err; - } - grp->id = i; INIT_LIST_HEAD(&grp->tasks); INIT_LIST_HEAD(&grp->list); @@ -4034,7 +3838,6 @@ err: for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) { grp = lookup_related_thread_group(i); if (grp) { - free_group_cputime(grp); kfree(grp); related_thread_groups[i] = NULL; } else { @@ -4418,9 +4221,6 @@ static int register_sched_callback(void) { int ret; - if (!sched_enable_hmp) - return 0; - ret = cpufreq_register_notifier(¬ifier_policy_block, CPUFREQ_POLICY_NOTIFIER); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3fe00d6fa335..b72352bbd752 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1406,6 +1406,7 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); +#ifdef CONFIG_SCHED_HMP static int select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags) { @@ -1419,6 +1420,7 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags) return cpu; } +#endif static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) @@ -1426,8 +1428,9 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) struct task_struct *curr; struct rq *rq; - if (sched_enable_hmp) - return select_task_rq_rt_hmp(p, cpu, sd_flag, flags); +#ifdef CONFIG_SCHED_HMP + return select_task_rq_rt_hmp(p, cpu, sd_flag, flags); +#endif /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) @@ -1796,14 +1799,6 @@ static int find_lowest_rq_hmp(struct task_struct *task) return best_cpu; } - -#else /* CONFIG_SCHED_HMP */ - -static int find_lowest_rq_hmp(struct task_struct *task) -{ - return -1; -} - #endif /* CONFIG_SCHED_HMP */ static int find_lowest_rq(struct task_struct *task) @@ -1813,8 +1808,9 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); - if (sched_enable_hmp) - return find_lowest_rq_hmp(task); +#ifdef CONFIG_SCHED_HMP + return find_lowest_rq_hmp(task); +#endif /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d907eeb297a3..360e298398fb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -366,6 +366,13 @@ struct load_subtractions { u64 new_subs; }; +struct group_cpu_time { + u64 curr_runnable_sum; + u64 prev_runnable_sum; + u64 nt_curr_runnable_sum; + u64 nt_prev_runnable_sum; +}; + struct sched_cluster { raw_spinlock_t load_lock; struct list_head list; @@ -407,12 +414,6 @@ struct related_thread_group { struct sched_cluster *preferred_cluster; struct rcu_head rcu; u64 last_update; - struct group_cpu_time __percpu *cpu_time; /* one per cluster */ -}; - -struct migration_sum_data { - struct rq *src_rq, *dst_rq; - struct group_cpu_time *src_cpu_time, *dst_cpu_time; }; extern struct list_head cluster_head; @@ -776,6 +777,7 @@ struct rq { u64 prev_runnable_sum; u64 nt_curr_runnable_sum; u64 nt_prev_runnable_sum; + struct group_cpu_time grp_time; struct load_subtractions load_subs[NUM_TRACKED_WINDOWS]; DECLARE_BITMAP_ARRAY(top_tasks_bitmap, NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES); @@ -1069,10 +1071,6 @@ enum sched_boost_policy { #define WINDOW_STATS_AVG 3 #define WINDOW_STATS_INVALID_POLICY 4 -#define FREQ_REPORT_MAX_CPU_LOAD_TOP_TASK 0 -#define FREQ_REPORT_CPU_LOAD 1 -#define FREQ_REPORT_TOP_TASK 2 - #define SCHED_UPMIGRATE_MIN_NICE 15 #define EXITING_TASK_MARKER 0xdeaddead @@ -1083,7 +1081,6 @@ enum sched_boost_policy { extern struct mutex policy_mutex; extern unsigned int sched_ravg_window; extern unsigned int sched_disable_window_stats; -extern unsigned int sched_enable_hmp; extern unsigned int max_possible_freq; extern unsigned int min_max_freq; extern unsigned int pct_task_load(struct task_struct *p); @@ -1127,7 +1124,6 @@ extern void update_cluster_topology(void); extern void note_task_waking(struct task_struct *p, u64 wallclock); extern void set_task_last_switch_out(struct task_struct *p, u64 wallclock); extern void init_clusters(void); -extern int __init set_sched_enable_hmp(char *str); extern void reset_cpu_hmp_stats(int cpu, int reset_cra); extern unsigned int max_task_load(void); extern void sched_account_irqtime(int cpu, struct task_struct *curr, @@ -1257,7 +1253,7 @@ inc_cumulative_runnable_avg(struct hmp_sched_stats *stats, { u32 task_load; - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; task_load = sched_disable_window_stats ? 0 : p->ravg.demand; @@ -1272,7 +1268,7 @@ dec_cumulative_runnable_avg(struct hmp_sched_stats *stats, { u32 task_load; - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; task_load = sched_disable_window_stats ? 0 : p->ravg.demand; @@ -1290,7 +1286,7 @@ fixup_cumulative_runnable_avg(struct hmp_sched_stats *stats, struct task_struct *p, s64 task_load_delta, s64 pred_demand_delta) { - if (!sched_enable_hmp || sched_disable_window_stats) + if (sched_disable_window_stats) return; stats->cumulative_runnable_avg += task_load_delta; @@ -1350,14 +1346,6 @@ check_for_freq_change(struct rq *rq, bool check_pred, bool check_groups); extern void notify_migration(int src_cpu, int dest_cpu, bool src_cpu_dead, struct task_struct *p); -struct group_cpu_time { - u64 curr_runnable_sum; - u64 prev_runnable_sum; - u64 nt_curr_runnable_sum; - u64 nt_prev_runnable_sum; - u64 window_start; -}; - /* Is frequency of two cpus synchronized with each other? */ static inline int same_freq_domain(int src_cpu, int dst_cpu) { @@ -1667,7 +1655,6 @@ static inline int update_preferred_cluster(struct related_thread_group *grp, static inline void add_new_task_to_grp(struct task_struct *new) {} -#define sched_enable_hmp 0 #define PRED_DEMAND_DELTA (0) static inline void diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a2a87c3ad44e..7112dc54d88e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -133,6 +133,7 @@ static int ten_thousand = 10000; #endif #ifdef CONFIG_SCHED_HMP static int one_thousand = 1000; +static int max_freq_reporting_policy = FREQ_REPORT_INVALID_POLICY - 1; #endif /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ @@ -297,6 +298,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, + .extra2 = &max_freq_reporting_policy, }, { .procname = "sched_freq_inc_notify", @@ -591,7 +593,8 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_time_avg, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "sched_shares_window_ns", |