From 1cb87c38cb81847938bbb4776d9c6f3afe8fd938 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Wed, 1 Nov 2017 14:11:06 +0800 Subject: sched: EAS: Fix the condition to distinguish energy before/after Before commit 5f8b3a757d65 ("sched/fair: consider task utilization in group_norm_util()"), eenv->util_delta is used to distinguish energy before and energy after in sched_group_energy(). After that commit, eenv->util_delta can not do that any more. In this commit, use trg_cpu to distinguish energy before/after in sched_group_energy(). Before apply this commit, cap_before/cap_delta is not correct: -0 [001] 147504.608920: sched_energy_diff: pid=7 comm=rcu_preempt src_cpu=1 dst_cpu=3 usage_delta=7 nrg_before=250 nrg_after=250 nrg_diff=0 cap_before=0 cap_after=528 cap_delta=1056 nrg_delta=0 nrg_payoff=0 After apply this commit, cap_before/cap_delta retrun to normal: -0 [001] 220.494011: sched_energy_diff: pid=7 comm=rcu_preempt src_cpu=1 dst_cpu=2 usage_delta=3 nrg_before=248 nrg_after=248 nrg_diff=0 cap_before=528 cap_after=528 cap_delta=0 nrg_delta=0 nrg_payoff=0 Signed-off-by: Ke Wang --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ac22d32a6255..06b814b58d20 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5534,13 +5534,13 @@ static int sched_group_energy(struct energy_env *eenv) if (sg->group_weight == 1) { /* Remove capacity of src CPU (before task move) */ - if (eenv->util_delta == 0 && + if (eenv->trg_cpu == eenv->src_cpu && cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { eenv->cap.before = sg->sge->cap_states[cap_idx].cap; eenv->cap.delta -= eenv->cap.before; } /* Add capacity of dst CPU (after task move) */ - if (eenv->util_delta != 0 && + if (eenv->trg_cpu == eenv->dst_cpu && cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { eenv->cap.after = sg->sge->cap_states[cap_idx].cap; eenv->cap.delta += eenv->cap.after; -- cgit v1.2.3 From 7d5a251c66be3516c14cffa80e6b076b37736971 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Mon, 30 Oct 2017 17:38:16 +0800 Subject: sched: EAS: update trg_cpu to backup_cpu if no energy saving for target_cpu If no energy saving for target_cpu in the calculation of energy_diff(), backup_cpu will be set as the new dst_cpu for the next calculation. At this point, we also need update the new trg_cpu as backup_cpu to make sure the subsequent calculation of energy_diff() is correct. Signed-off-by: Ke Wang --- kernel/sched/fair.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 06b814b58d20..3b429c5ce721 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6723,6 +6723,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync /* No energy saving for target_cpu, try backup */ target_cpu = tmp_backup; eenv.dst_cpu = target_cpu; + eenv.trg_cpu = target_cpu; if (tmp_backup < 0 || tmp_backup == prev_cpu || energy_diff(&eenv) >= 0) { -- cgit v1.2.3 From 47c87b2654376e7dda646ca5a2af067c5d368ca7 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Wed, 1 Nov 2017 16:07:38 +0800 Subject: sched: EAS: Fix the calculation of group util in group_idle_state() util_delta becomes not zero in eenv_before, which will affect the calculation of grp_util in group_idle_state(). Fix it under the new condition. Change-Id: Ic3853bb45876a8e388afcbe4e72d25fc42b1d7b0 Signed-off-by: Ke Wang --- kernel/sched/fair.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b429c5ce721..5c65f3ad6da1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5429,13 +5429,6 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; - /* - * Try to estimate if a deeper idle state is - * achievable when we move the task. - */ - for_each_cpu(i, sched_group_cpus(sg)) - grp_util += cpu_util(i); - src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg)); dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg)); if (src_in_grp == dst_in_grp) { @@ -5444,10 +5437,16 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) */ goto end; } - /* add or remove util as appropriate to indicate what group util - * will be (worst case - no concurrent execution) after moving the task + + /* + * Try to estimate if a deeper idle state is + * achievable when we move the task. */ - grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta; + for_each_cpu(i, sched_group_cpus(sg)) { + grp_util += cpu_util_wake(i, eenv->task); + if (unlikely(i == eenv->trg_cpu)) + grp_util += eenv->util_delta; + } if (grp_util <= ((long)sg->sgc->max_capacity * (int)sg->group_weight)) { -- cgit v1.2.3 From df147c9e336cfcb4183db1eb9552b0429060cd0d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Nov 2017 15:13:26 +0530 Subject: cpufreq: Drop schedfreq governor We all should be using (and improving) the schedutil governor now. Get rid of the non-upstream governor. Tested on Hikey. Change-Id: Ic660756536e5da51952738c3c18b94e31f58cd57 Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 13 -- include/linux/sched/sysctl.h | 1 - kernel/sched/Makefile | 1 - kernel/sched/core.c | 86 ------- kernel/sched/cpufreq_sched.c | 525 ------------------------------------------- kernel/sched/fair.c | 89 +------- kernel/sched/rt.c | 49 +--- kernel/sched/sched.h | 75 ------- kernel/sysctl.c | 7 - 9 files changed, 4 insertions(+), 842 deletions(-) delete mode 100644 kernel/sched/cpufreq_sched.c (limited to 'kernel/sched/fair.c') diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 91b05e2b8799..91e2743f057b 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -224,19 +224,6 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. -config CPU_FREQ_GOV_SCHED - bool "'sched' cpufreq governor" - depends on CPU_FREQ - depends on SMP - select CPU_FREQ_GOV_COMMON - help - 'sched' - this governor scales cpu frequency from the - scheduler as a function of cpu capacity utilization. It does - not evaluate utilization on a periodic basis (as ondemand - does) but instead is event-driven by the scheduler. - - If in doubt, say N. - config CPU_FREQ_GOV_SCHEDUTIL bool "'schedutil' cpufreq policy governor" depends on CPU_FREQ && SMP diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 2bf4520d8089..167279a4e24b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -40,7 +40,6 @@ extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_sync_hint_enable; -extern unsigned int sysctl_sched_initial_task_util; extern unsigned int sysctl_sched_cstate_aware; #ifdef CONFIG_SCHED_WALT extern unsigned int sysctl_sched_use_walt_cpu_util; diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index ca0d94096170..d7ec4f7dd0d9 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -22,5 +22,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_SCHED_TUNE) += tune.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o -obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3030633d8900..889fb1aff1e0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2983,91 +2983,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } -#ifdef CONFIG_CPU_FREQ_GOV_SCHED - -static inline -unsigned long add_capacity_margin(unsigned long cpu_capacity) -{ - cpu_capacity = cpu_capacity * capacity_margin; - cpu_capacity /= SCHED_CAPACITY_SCALE; - return cpu_capacity; -} - -static inline -unsigned long sum_capacity_reqs(unsigned long cfs_cap, - struct sched_capacity_reqs *scr) -{ - unsigned long total = add_capacity_margin(cfs_cap + scr->rt); - return total += scr->dl; -} - -unsigned long boosted_cpu_util(int cpu); -static void sched_freq_tick_pelt(int cpu) -{ - unsigned long cpu_utilization = boosted_cpu_util(cpu); - unsigned long capacity_curr = capacity_curr_of(cpu); - struct sched_capacity_reqs *scr; - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr) - return; - - /* - * To make free room for a task that is building up its "real" - * utilization and to harm its performance the least, request - * a jump to a higher OPP as soon as the margin of free capacity - * is impacted (specified by capacity_margin). - * Remember CPU utilization in sched_capacity_reqs should be normalised. - */ - cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, cpu_utilization); -} - -#ifdef CONFIG_SCHED_WALT -static void sched_freq_tick_walt(int cpu) -{ - unsigned long cpu_utilization = cpu_util_freq(cpu); - unsigned long capacity_curr = capacity_curr_of(cpu); - - if (walt_disabled || !sysctl_sched_use_walt_cpu_util) - return sched_freq_tick_pelt(cpu); - - /* - * Add a margin to the WALT utilization to check if we will need to - * increase frequency. - * NOTE: WALT tracks a single CPU signal for all the scheduling - * classes, thus this margin is going to be added to the DL class as - * well, which is something we do not do in sched_freq_tick_pelt case. - */ - if (add_capacity_margin(cpu_utilization) <= capacity_curr) - return; - - /* - * It is likely that the load is growing so we - * keep the added margin in our request as an - * extra boost. - * Remember CPU utilization in sched_capacity_reqs should be normalised. - */ - cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, cpu_utilization); - -} -#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu) -#else -#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu) -#endif /* CONFIG_SCHED_WALT */ - -static void sched_freq_tick(int cpu) -{ - if (!sched_freq()) - return; - - _sched_freq_tick(cpu); -} -#else -static inline void sched_freq_tick(int cpu) { } -#endif /* CONFIG_CPU_FREQ_GOV_SCHED */ - /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3088,7 +3003,6 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); update_cpu_load_active(rq); calc_global_load_tick(rq); - sched_freq_tick(cpu); raw_spin_unlock(&rq->lock); perf_event_task_tick(); diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c deleted file mode 100644 index ec0aed7a8f96..000000000000 --- a/kernel/sched/cpufreq_sched.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (C) 2015 Michael Turquette - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -#include "sched.h" - -#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */ -#define THROTTLE_UP_NSEC 500000 /* 500us default */ - -struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE; -static bool __read_mostly cpufreq_driver_slow; - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED -static struct cpufreq_governor cpufreq_gov_sched; -#endif - -static DEFINE_PER_CPU(unsigned long, enabled); -DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); - -struct gov_tunables { - struct gov_attr_set attr_set; - unsigned int up_throttle_nsec; - unsigned int down_throttle_nsec; -}; - -/** - * gov_data - per-policy data internal to the governor - * @up_throttle: next throttling period expiry if increasing OPP - * @down_throttle: next throttling period expiry if decreasing OPP - * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP - * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP - * @task: worker thread for dvfs transition that may block/sleep - * @irq_work: callback used to wake up worker thread - * @requested_freq: last frequency requested by the sched governor - * - * struct gov_data is the per-policy cpufreq_sched-specific data structure. A - * per-policy instance of it is created when the cpufreq_sched governor receives - * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data - * member of struct cpufreq_policy. - * - * Readers of this data must call down_read(policy->rwsem). Writers must - * call down_write(policy->rwsem). - */ -struct gov_data { - ktime_t up_throttle; - ktime_t down_throttle; - struct gov_tunables *tunables; - struct list_head tunables_hook; - struct task_struct *task; - struct irq_work irq_work; - unsigned int requested_freq; -}; - -static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, - unsigned int freq) -{ - struct gov_data *gd = policy->governor_data; - - /* avoid race with cpufreq_sched_stop */ - if (!down_write_trylock(&policy->rwsem)) - return; - - __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); - - gd->up_throttle = ktime_add_ns(ktime_get(), - gd->tunables->up_throttle_nsec); - gd->down_throttle = ktime_add_ns(ktime_get(), - gd->tunables->down_throttle_nsec); - up_write(&policy->rwsem); -} - -static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq) -{ - ktime_t now = ktime_get(); - - ktime_t throttle = gd->requested_freq < cur_freq ? - gd->down_throttle : gd->up_throttle; - - if (ktime_after(now, throttle)) - return false; - - while (1) { - int usec_left = ktime_to_ns(ktime_sub(throttle, now)); - - usec_left /= NSEC_PER_USEC; - trace_cpufreq_sched_throttled(usec_left); - usleep_range(usec_left, usec_left + 100); - now = ktime_get(); - if (ktime_after(now, throttle)) - return true; - } -} - -/* - * we pass in struct cpufreq_policy. This is safe because changing out the - * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP), - * which tears down all of the data structures and __cpufreq_governor(policy, - * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the - * new policy pointer - */ -static int cpufreq_sched_thread(void *data) -{ - struct sched_param param; - struct cpufreq_policy *policy; - struct gov_data *gd; - unsigned int new_request = 0; - unsigned int last_request = 0; - int ret; - - policy = (struct cpufreq_policy *) data; - gd = policy->governor_data; - - param.sched_priority = 50; - ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m); - if (ret) { - pr_warn("%s: failed to set SCHED_FIFO\n", __func__); - do_exit(-EINVAL); - } else { - pr_debug("%s: kthread (%d) set to SCHED_FIFO\n", - __func__, gd->task->pid); - } - - do { - new_request = gd->requested_freq; - if (new_request == last_request) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } else { - /* - * if the frequency thread sleeps while waiting to be - * unthrottled, start over to check for a newer request - */ - if (finish_last_request(gd, policy->cur)) - continue; - last_request = new_request; - cpufreq_sched_try_driver_target(policy, new_request); - } - } while (!kthread_should_stop()); - - return 0; -} - -static void cpufreq_sched_irq_work(struct irq_work *irq_work) -{ - struct gov_data *gd; - - gd = container_of(irq_work, struct gov_data, irq_work); - if (!gd) - return; - - wake_up_process(gd->task); -} - -static void update_fdomain_capacity_request(int cpu) -{ - unsigned int freq_new, index_new, cpu_tmp; - struct cpufreq_policy *policy; - struct gov_data *gd; - unsigned long capacity = 0; - - /* - * Avoid grabbing the policy if possible. A test is still - * required after locking the CPU's policy to avoid racing - * with the governor changing. - */ - if (!per_cpu(enabled, cpu)) - return; - - policy = cpufreq_cpu_get(cpu); - if (IS_ERR_OR_NULL(policy)) - return; - - if (policy->governor != &cpufreq_gov_sched || - !policy->governor_data) - goto out; - - gd = policy->governor_data; - - /* find max capacity requested by cpus in this policy */ - for_each_cpu(cpu_tmp, policy->cpus) { - struct sched_capacity_reqs *scr; - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp); - capacity = max(capacity, scr->total); - } - - /* Convert the new maximum capacity request into a cpu frequency */ - freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT; - if (cpufreq_frequency_table_target(policy, policy->freq_table, - freq_new, CPUFREQ_RELATION_L, - &index_new)) - goto out; - freq_new = policy->freq_table[index_new].frequency; - - if (freq_new > policy->max) - freq_new = policy->max; - - if (freq_new < policy->min) - freq_new = policy->min; - - trace_cpufreq_sched_request_opp(cpu, capacity, freq_new, - gd->requested_freq); - if (freq_new == gd->requested_freq) - goto out; - - gd->requested_freq = freq_new; - - /* - * Throttling is not yet supported on platforms with fast cpufreq - * drivers. - */ - if (cpufreq_driver_slow) - irq_work_queue_on(&gd->irq_work, cpu); - else - cpufreq_sched_try_driver_target(policy, freq_new); - -out: - cpufreq_cpu_put(policy); -} - -#ifdef CONFIG_SCHED_WALT -static inline unsigned long -requested_capacity(struct sched_capacity_reqs *scr) -{ - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) - return scr->cfs; - return scr->cfs + scr->rt; -} -#else -#define requested_capacity(scr) (scr->cfs + scr->rt) -#endif - -void update_cpu_capacity_request(int cpu, bool request) -{ - unsigned long new_capacity; - struct sched_capacity_reqs *scr; - - /* The rq lock serializes access to the CPU's sched_capacity_reqs. */ - lockdep_assert_held(&cpu_rq(cpu)->lock); - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - - new_capacity = requested_capacity(scr); - new_capacity = new_capacity * capacity_margin - / SCHED_CAPACITY_SCALE; - new_capacity += scr->dl; - - if (new_capacity == scr->total) - return; - - trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity); - - scr->total = new_capacity; - if (request) - update_fdomain_capacity_request(cpu); -} - -static inline void set_sched_freq(void) -{ - static_key_slow_inc(&__sched_freq); -} - -static inline void clear_sched_freq(void) -{ - static_key_slow_dec(&__sched_freq); -} - -/* Tunables */ -static struct gov_tunables *global_tunables; - -static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set) -{ - return container_of(attr_set, struct gov_tunables, attr_set); -} - -static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->up_throttle_nsec); -} - -static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set, - const char *buf, size_t count) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - int ret; - long unsigned int val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - tunables->up_throttle_nsec = val; - return count; -} - -static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->down_throttle_nsec); -} - -static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set, - const char *buf, size_t count) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - int ret; - long unsigned int val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - tunables->down_throttle_nsec = val; - return count; -} - -static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec); -static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec); - -static struct attribute *schedfreq_attributes[] = { - &up_throttle_nsec.attr, - &down_throttle_nsec.attr, - NULL -}; - -static struct kobj_type tunables_ktype = { - .default_attrs = schedfreq_attributes, - .sysfs_ops = &governor_sysfs_ops, -}; - -static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) -{ - struct gov_data *gd; - int cpu; - int rc; - - for_each_cpu(cpu, policy->cpus) - memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0, - sizeof(struct sched_capacity_reqs)); - - gd = kzalloc(sizeof(*gd), GFP_KERNEL); - if (!gd) - return -ENOMEM; - - policy->governor_data = gd; - - if (!global_tunables) { - gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL); - if (!gd->tunables) - goto free_gd; - - gd->tunables->up_throttle_nsec = - policy->cpuinfo.transition_latency ? - policy->cpuinfo.transition_latency : - THROTTLE_UP_NSEC; - gd->tunables->down_throttle_nsec = - THROTTLE_DOWN_NSEC; - - rc = kobject_init_and_add(&gd->tunables->attr_set.kobj, - &tunables_ktype, - get_governor_parent_kobj(policy), - "%s", cpufreq_gov_sched.name); - if (rc) - goto free_tunables; - - gov_attr_set_init(&gd->tunables->attr_set, - &gd->tunables_hook); - - pr_debug("%s: throttle_threshold = %u [ns]\n", - __func__, gd->tunables->up_throttle_nsec); - - if (!have_governor_per_policy()) - global_tunables = gd->tunables; - } else { - gd->tunables = global_tunables; - gov_attr_set_get(&global_tunables->attr_set, - &gd->tunables_hook); - } - - policy->governor_data = gd; - if (cpufreq_driver_is_slow()) { - cpufreq_driver_slow = true; - gd->task = kthread_create(cpufreq_sched_thread, policy, - "kschedfreq:%d", - cpumask_first(policy->related_cpus)); - if (IS_ERR_OR_NULL(gd->task)) { - pr_err("%s: failed to create kschedfreq thread\n", - __func__); - goto free_tunables; - } - get_task_struct(gd->task); - kthread_bind_mask(gd->task, policy->related_cpus); - wake_up_process(gd->task); - init_irq_work(&gd->irq_work, cpufreq_sched_irq_work); - } - - set_sched_freq(); - - return 0; - -free_tunables: - kfree(gd->tunables); -free_gd: - policy->governor_data = NULL; - kfree(gd); - return -ENOMEM; -} - -static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy) -{ - unsigned int count; - struct gov_data *gd = policy->governor_data; - - clear_sched_freq(); - if (cpufreq_driver_slow) { - kthread_stop(gd->task); - put_task_struct(gd->task); - } - - count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook); - if (!count) { - if (!have_governor_per_policy()) - global_tunables = NULL; - kfree(gd->tunables); - } - - policy->governor_data = NULL; - - kfree(gd); - return 0; -} - -static int cpufreq_sched_start(struct cpufreq_policy *policy) -{ - int cpu; - - for_each_cpu(cpu, policy->cpus) - per_cpu(enabled, cpu) = 1; - - return 0; -} - -static void cpufreq_sched_limits(struct cpufreq_policy *policy) -{ - unsigned int clamp_freq; - struct gov_data *gd = policy->governor_data;; - - pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n", - policy->cpu, policy->min, policy->max, - policy->cur); - - clamp_freq = clamp(gd->requested_freq, policy->min, policy->max); - - if (policy->cur != clamp_freq) - __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L); -} - -static int cpufreq_sched_stop(struct cpufreq_policy *policy) -{ - int cpu; - - for_each_cpu(cpu, policy->cpus) - per_cpu(enabled, cpu) = 0; - - return 0; -} - -static int cpufreq_sched_setup(struct cpufreq_policy *policy, - unsigned int event) -{ - switch (event) { - case CPUFREQ_GOV_POLICY_INIT: - return cpufreq_sched_policy_init(policy); - case CPUFREQ_GOV_POLICY_EXIT: - return cpufreq_sched_policy_exit(policy); - case CPUFREQ_GOV_START: - return cpufreq_sched_start(policy); - case CPUFREQ_GOV_STOP: - return cpufreq_sched_stop(policy); - case CPUFREQ_GOV_LIMITS: - cpufreq_sched_limits(policy); - break; - } - return 0; -} - - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED -static -#endif -struct cpufreq_governor cpufreq_gov_sched = { - .name = "sched", - .governor = cpufreq_sched_setup, - .owner = THIS_MODULE, -}; - -static int __init cpufreq_sched_init(void) -{ - int cpu; - - for_each_cpu(cpu, cpu_possible_mask) - per_cpu(enabled, cpu) = 0; - return cpufreq_register_governor(&cpufreq_gov_sched); -} - -/* Try to make this the default governor */ -fs_initcall(cpufreq_sched_init); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5c65f3ad6da1..b5ea66e5551c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -54,7 +54,6 @@ unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; unsigned int sysctl_sched_sync_hint_enable = 1; -unsigned int sysctl_sched_initial_task_util = 0; unsigned int sysctl_sched_cstate_aware = 1; #ifdef CONFIG_SCHED_WALT @@ -750,9 +749,7 @@ void init_entity_runnable_average(struct sched_entity *se) sa->load_sum = sa->load_avg * LOAD_AVG_MAX; /* * In previous Android versions, we used to have: - * sa->util_avg = sched_freq() ? - * sysctl_sched_initial_task_util : - * scale_load_down(SCHED_LOAD_SCALE); + * sa->util_avg = scale_load_down(SCHED_LOAD_SCALE); * sa->util_sum = sa->util_avg * LOAD_AVG_MAX; * However, that functionality has been moved to enqueue. * It is unclear if we should restore this in enqueue. @@ -4668,21 +4665,6 @@ unsigned long boosted_cpu_util(int cpu); #define boosted_cpu_util(cpu) cpu_util_freq(cpu) #endif -#ifdef CONFIG_SMP -static void update_capacity_of(int cpu) -{ - unsigned long req_cap; - - if (!sched_freq()) - return; - - /* Normalize scale-invariant capacity to cpu. */ - req_cap = boosted_cpu_util(cpu); - req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, req_cap); -} -#endif - /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -4695,7 +4677,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; #ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; - int task_wakeup = flags & ENQUEUE_WAKEUP; #endif /* @@ -4769,16 +4750,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) rq->rd->overutilized = true; trace_sched_overutilized(true); } - - /* - * We want to potentially trigger a freq switch - * request only for tasks that are waking up; this is - * because we get here also during load balancing, but - * in these cases it seems wise to trigger as single - * request after load balancing is done. - */ - if (task_new || task_wakeup) - update_capacity_of(cpu_of(rq)); } #endif /* CONFIG_SMP */ @@ -4854,25 +4825,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) */ schedtune_dequeue_task(p, cpu_of(rq)); - if (!se) { + if (!se) walt_dec_cumulative_runnable_avg(rq, p); - - /* - * We want to potentially trigger a freq switch - * request only for tasks that are going to sleep; - * this is because we get here also during load - * balancing, but in these cases it seems wise to - * trigger as single request after load balancing is - * done. - */ - if (task_sleep) { - if (rq->cfs.nr_running) - update_capacity_of(cpu_of(rq)); - else if (sched_freq()) - set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */ - } - } - #endif /* CONFIG_SMP */ hrtick_update(rq); @@ -7709,10 +7663,6 @@ static void attach_one_task(struct rq *rq, struct task_struct *p) { raw_spin_lock(&rq->lock); attach_task(rq, p); - /* - * We want to potentially raise target_cpu's OPP. - */ - update_capacity_of(cpu_of(rq)); raw_spin_unlock(&rq->lock); } @@ -7734,11 +7684,6 @@ static void attach_tasks(struct lb_env *env) attach_task(env->dst_rq, p); } - /* - * We want to potentially raise env.dst_cpu's OPP. - */ - update_capacity_of(env->dst_cpu); - raw_spin_unlock(&env->dst_rq->lock); } @@ -9081,11 +9026,6 @@ more_balance: * ld_moved - cumulative load moved across iterations */ cur_ld_moved = detach_tasks(&env); - /* - * We want to potentially lower env.src_cpu's OPP. - */ - if (cur_ld_moved) - update_capacity_of(env.src_cpu); /* * We've detached some tasks from busiest_rq. Every @@ -9310,7 +9250,6 @@ static int idle_balance(struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; u64 curr_cost = 0; - long removed_util=0; idle_enter_fair(this_rq); @@ -9334,17 +9273,6 @@ static int idle_balance(struct rq *this_rq) raw_spin_unlock(&this_rq->lock); - /* - * If removed_util_avg is !0 we most probably migrated some task away - * from this_cpu. In this case we might be willing to trigger an OPP - * update, but we want to do so if we don't find anybody else to pull - * here (we will trigger an OPP update with the pulled task's enqueue - * anyway). - * - * Record removed_util before calling update_blocked_averages, and use - * it below (before returning) to see if an OPP update is required. - */ - removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg); update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { @@ -9409,12 +9337,6 @@ out: if (pulled_task) { idle_exit_fair(this_rq); this_rq->idle_stamp = 0; - } else if (removed_util) { - /* - * No task pulled and someone has been migrated away. - * Good case to trigger an OPP update. - */ - update_capacity_of(this_cpu); } return pulled_task; @@ -9488,13 +9410,8 @@ static int active_load_balance_cpu_stop(void *data) update_rq_clock(busiest_rq); p = detach_one_task(&env); - if (p) { + if (p) schedstat_inc(sd, alb_pushed); - /* - * We want to potentially lower env.src_cpu's OPP. - */ - update_capacity_of(env.src_cpu); - } else schedstat_inc(sd, alb_failed); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f41435e7f75d..ebf0d9329c86 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1636,41 +1636,6 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag #endif } -#ifdef CONFIG_SMP -static void sched_rt_update_capacity_req(struct rq *rq) -{ - u64 total, used, age_stamp, avg; - s64 delta; - - if (!sched_freq()) - return; - - sched_avg_update(rq); - /* - * Since we're reading these variables without serialization make sure - * we read them once before doing sanity checks on them. - */ - age_stamp = READ_ONCE(rq->age_stamp); - avg = READ_ONCE(rq->rt_avg); - delta = rq_clock(rq) - age_stamp; - - if (unlikely(delta < 0)) - delta = 0; - - total = sched_avg_period() + delta; - - used = div_u64(avg, total); - if (unlikely(used > SCHED_CAPACITY_SCALE)) - used = SCHED_CAPACITY_SCALE; - - set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used)); -} -#else -static inline void sched_rt_update_capacity_req(struct rq *rq) -{ } - -#endif - static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, struct rt_rq *rt_rq) { @@ -1739,17 +1704,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) if (prev->sched_class == &rt_sched_class) update_curr_rt(rq); - if (!rt_rq->rt_queued) { - /* - * The next task to be picked on this rq will have a lower - * priority than rt tasks so we can spend some time to update - * the capacity used by rt tasks based on the last activity. - * This value will be the used as an estimation of the next - * activity. - */ - sched_rt_update_capacity_req(rq); + if (!rt_rq->rt_queued) return NULL; - } put_prev_task(rq, prev); @@ -2476,9 +2432,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) update_curr_rt(rq); - if (rq->rt.rt_nr_running) - sched_rt_update_capacity_req(rq); - watchdog(rq, p); /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 028e232103c2..782746140711 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1629,81 +1629,6 @@ static inline unsigned long cpu_util_freq(int cpu) #endif -#ifdef CONFIG_CPU_FREQ_GOV_SCHED -#define capacity_max SCHED_CAPACITY_SCALE -extern unsigned int capacity_margin; -extern struct static_key __sched_freq; - -static inline bool sched_freq(void) -{ - return static_key_false(&__sched_freq); -} - -/* - * sched_capacity_reqs expects capacity requests to be normalised. - * All capacities should sum to the range of 0-1024. - */ -DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); -void update_cpu_capacity_request(int cpu, bool request); - -static inline void set_cfs_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - -#ifdef CONFIG_SCHED_WALT - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { - int rtdl = scr->rt + scr->dl; - /* - * WALT tracks the utilization of a CPU considering the load - * generated by all the scheduling classes. - * Since the following call to: - * update_cpu_capacity - * is already adding the RT and DL utilizations let's remove - * these contributions from the WALT signal. - */ - if (capacity > rtdl) - capacity -= rtdl; - else - capacity = 0; - } -#endif - if (scr->cfs != capacity) { - scr->cfs = capacity; - update_cpu_capacity_request(cpu, request); - } -} - -static inline void set_rt_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity; - update_cpu_capacity_request(cpu, request); - } -} - -static inline void set_dl_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity; - update_cpu_capacity_request(cpu, request); - } -} -#else -static inline bool sched_freq(void) { return false; } -static inline void set_cfs_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -static inline void set_rt_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -static inline void set_dl_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -#endif - static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 55caf81a833f..4e2f98dd2052 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -342,13 +342,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "sched_initial_task_util", - .data = &sysctl_sched_initial_task_util, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_cstate_aware", .data = &sysctl_sched_cstate_aware, -- cgit v1.2.3