summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/boost.c9
-rw-r--r--kernel/sched/core.c11
-rw-r--r--kernel/sched/fair.c29
-rw-r--r--kernel/sched/hmp.c322
-rw-r--r--kernel/sched/rt.c20
-rw-r--r--kernel/sched/sched.h35
-rw-r--r--kernel/sysctl.c5
7 files changed, 107 insertions, 324 deletions
diff --git a/kernel/sched/boost.c b/kernel/sched/boost.c
index fcfda385b74a..5bdd51b1e55e 100644
--- a/kernel/sched/boost.c
+++ b/kernel/sched/boost.c
@@ -156,9 +156,6 @@ void sched_boost_parse_dt(void)
struct device_node *sn;
const char *boost_policy;
- if (!sched_enable_hmp)
- return;
-
sn = of_find_node_by_path("/sched-hmp");
if (!sn)
return;
@@ -175,9 +172,6 @@ int sched_set_boost(int type)
{
int ret = 0;
- if (!sched_enable_hmp)
- return -EINVAL;
-
mutex_lock(&boost_mutex);
if (verify_boost_params(sysctl_sched_boost, type))
@@ -197,9 +191,6 @@ int sched_boost_handler(struct ctl_table *table, int write,
unsigned int *data = (unsigned int *)table->data;
unsigned int old_val;
- if (!sched_enable_hmp)
- return -EINVAL;
-
mutex_lock(&boost_mutex);
old_val = *data;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 519aee32e122..3fcadbae663d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3025,8 +3025,9 @@ void sched_exec(void)
unsigned long flags;
int dest_cpu, curr_cpu;
- if (sched_enable_hmp)
- return;
+#ifdef CONFIG_SCHED_HMP
+ return;
+#endif
raw_spin_lock_irqsave(&p->pi_lock, flags);
curr_cpu = task_cpu(p);
@@ -8215,8 +8216,9 @@ void __init sched_init(void)
int i, j;
unsigned long alloc_size = 0, ptr;
- if (sched_enable_hmp)
- pr_info("HMP scheduling enabled.\n");
+#ifdef CONFIG_SCHED_HMP
+ pr_info("HMP scheduling enabled.\n");
+#endif
BUG_ON(num_possible_cpus() > BITS_PER_LONG);
@@ -8362,6 +8364,7 @@ void __init sched_init(void)
rq->cluster = &init_cluster;
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
+ memset(&rq->grp_time, 0, sizeof(struct group_cpu_time));
rq->old_busy_time = 0;
rq->old_estimated_time = 0;
rq->old_busy_time_group = 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ac4c3f1d144a..6f68b0e19c4a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3332,9 +3332,9 @@ void _inc_hmp_sched_stats_fair(struct rq *rq,
* inc/dec_nr_big_task and inc/dec_cumulative_runnable_avg called
* from inc_cfs_rq_hmp_stats() have similar checks), we gain a bit on
* efficiency by short-circuiting for_each_sched_entity() loop when
- * !sched_enable_hmp || sched_disable_window_stats
+ * sched_disable_window_stats
*/
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
for_each_sched_entity(se) {
@@ -3357,7 +3357,7 @@ _dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p, int change_cra)
struct sched_entity *se = &p->se;
/* See comment on efficiency in _inc_hmp_sched_stats_fair */
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
for_each_sched_entity(se) {
@@ -3482,8 +3482,7 @@ static inline int migration_needed(struct task_struct *p, int cpu)
int nice;
struct related_thread_group *grp;
- if (!sched_enable_hmp || p->state != TASK_RUNNING ||
- p->nr_cpus_allowed == 1)
+ if (p->state != TASK_RUNNING || p->nr_cpus_allowed == 1)
return 0;
/* No need to migrate task that is about to be throttled */
@@ -7024,8 +7023,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
int want_affine = 0;
int sync = wake_flags & WF_SYNC;
- if (sched_enable_hmp)
- return select_best_cpu(p, prev_cpu, 0, sync);
+#ifdef CONFIG_SCHED_HMP
+ return select_best_cpu(p, prev_cpu, 0, sync);
+#endif
if (sd_flag & SD_BALANCE_WAKE)
want_affine = (!wake_wide(p) && task_fits_max(p, cpu) &&
@@ -9313,8 +9313,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
unsigned long busiest_load = 0, busiest_capacity = 1;
int i;
- if (sched_enable_hmp)
- return find_busiest_queue_hmp(env, group);
+#ifdef CONFIG_SCHED_HMP
+ return find_busiest_queue_hmp(env, group);
+#endif
for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
unsigned long capacity, wl;
@@ -10120,8 +10121,9 @@ static inline int find_new_ilb(int type)
{
int ilb;
- if (sched_enable_hmp)
- return find_new_hmp_ilb(type);
+#ifdef CONFIG_SCHED_HMP
+ return find_new_hmp_ilb(type);
+#endif
ilb = cpumask_first(nohz.idle_cpus_mask);
@@ -10496,8 +10498,9 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
if (likely(!atomic_read(&nohz.nr_cpus)))
return 0;
- if (sched_enable_hmp)
- return _nohz_kick_needed_hmp(rq, cpu, type);
+#ifdef CONFIG_SCHED_HMP
+ return _nohz_kick_needed_hmp(rq, cpu, type);
+#endif
if (time_before(now, nohz.next_balance))
return 0;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index d54db37a7d0c..6379de764236 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -28,8 +28,7 @@ const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK",
"TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE",
"IRQ_UPDATE"};
-const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
- "RQ_TO_RQ", "GROUP_TO_GROUP"};
+const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP"};
static ktime_t ktime_last;
static bool sched_ktime_suspended;
@@ -616,19 +615,6 @@ int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
return 0;
}
-int __init set_sched_enable_hmp(char *str)
-{
- int enable_hmp = 0;
-
- get_option(&str, &enable_hmp);
-
- sched_enable_hmp = !!enable_hmp;
-
- return 0;
-}
-
-early_param("sched_enable_hmp", set_sched_enable_hmp);
-
/* Clear any HMP scheduler related requests pending from or on cpu */
void clear_hmp_request(int cpu)
{
@@ -870,9 +856,6 @@ unsigned int max_task_load(void)
return sched_ravg_window;
}
-/* Use this knob to turn on or off HMP-aware task placement logic */
-unsigned int __read_mostly sched_enable_hmp;
-
/* A cpu can no longer accommodate more tasks if:
*
* rq->nr_running > sysctl_sched_spill_nr_run ||
@@ -1245,7 +1228,7 @@ unlock:
void inc_nr_big_task(struct hmp_sched_stats *stats, struct task_struct *p)
{
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
if (is_big_task(p))
@@ -1254,7 +1237,7 @@ void inc_nr_big_task(struct hmp_sched_stats *stats, struct task_struct *p)
void dec_nr_big_task(struct hmp_sched_stats *stats, struct task_struct *p)
{
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
if (is_big_task(p))
@@ -1323,7 +1306,7 @@ void fixup_nr_big_tasks(struct hmp_sched_stats *stats,
u64 new_task_load;
u64 old_task_load;
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
old_task_load = scale_load_to_cpu(task_load(p), task_cpu(p));
@@ -1433,9 +1416,6 @@ int sched_window_update_handler(struct ctl_table *table, int write,
unsigned int *data = (unsigned int *)table->data;
unsigned int old_val;
- if (!sched_enable_hmp)
- return -EINVAL;
-
mutex_lock(&policy_mutex);
old_val = *data;
@@ -1471,9 +1451,6 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
unsigned int *data = (unsigned int *)table->data;
int update_task_count = 0;
- if (!sched_enable_hmp)
- return 0;
-
/*
* The policy mutex is acquired with cpu_hotplug.lock
* held from cpu_up()->cpufreq_governor_interactive()->
@@ -1713,45 +1690,19 @@ static inline unsigned int load_to_freq(struct rq *rq, u64 load)
return freq;
}
-static inline struct group_cpu_time *
-_group_cpu_time(struct related_thread_group *grp, int cpu);
-
-/*
- * Return load from all related group in given cpu.
- * Caller must ensure that related_thread_group_lock is held.
- */
-static void _group_load_in_cpu(int cpu, u64 *grp_load, u64 *new_grp_load)
-{
- struct related_thread_group *grp;
-
- for_each_related_thread_group(grp) {
- struct group_cpu_time *cpu_time;
-
- cpu_time = _group_cpu_time(grp, cpu);
- *grp_load += cpu_time->prev_runnable_sum;
- if (new_grp_load)
- *new_grp_load += cpu_time->nt_prev_runnable_sum;
- }
-}
-
/*
* Return load from all related groups in given frequency domain.
- * Caller must ensure that related_thread_group_lock is held.
*/
static void group_load_in_freq_domain(struct cpumask *cpus,
u64 *grp_load, u64 *new_grp_load)
{
- struct related_thread_group *grp;
int j;
- for_each_related_thread_group(grp) {
- for_each_cpu(j, cpus) {
- struct group_cpu_time *cpu_time;
+ for_each_cpu(j, cpus) {
+ struct rq *rq = cpu_rq(j);
- cpu_time = _group_cpu_time(grp, j);
- *grp_load += cpu_time->prev_runnable_sum;
- *new_grp_load += cpu_time->nt_prev_runnable_sum;
- }
+ *grp_load += rq->grp_time.prev_runnable_sum;
+ *new_grp_load += rq->grp_time.nt_prev_runnable_sum;
}
}
@@ -1776,9 +1727,6 @@ static int send_notification(struct rq *rq, int check_pred, int check_groups)
int rc = 0;
u64 group_load = 0, new_load = 0;
- if (!sched_enable_hmp)
- return 0;
-
if (check_pred) {
u64 prev = rq->old_busy_time;
u64 predicted = rq->hmp_stats.pred_demands_sum;
@@ -1796,20 +1744,18 @@ static int send_notification(struct rq *rq, int check_pred, int check_groups)
if (freq_required < cur_freq + sysctl_sched_pred_alert_freq)
return 0;
} else {
- read_lock_irqsave(&related_thread_group_lock, flags);
/*
* Protect from concurrent update of rq->prev_runnable_sum and
* group cpu load
*/
- raw_spin_lock(&rq->lock);
+ raw_spin_lock_irqsave(&rq->lock, flags);
if (check_groups)
- _group_load_in_cpu(cpu_of(rq), &group_load, NULL);
+ group_load = rq->grp_time.prev_runnable_sum;
new_load = rq->prev_runnable_sum + group_load;
new_load = freq_policy_load(rq, new_load);
- raw_spin_unlock(&rq->lock);
- read_unlock_irqrestore(&related_thread_group_lock, flags);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
cur_freq = load_to_freq(rq, rq->old_busy_time);
freq_required = load_to_freq(rq, new_load);
@@ -2283,6 +2229,31 @@ static void rollover_task_window(struct task_struct *p, bool full_window)
}
}
+static void rollover_cpu_window(struct rq *rq, bool full_window)
+{
+ u64 curr_sum = rq->curr_runnable_sum;
+ u64 nt_curr_sum = rq->nt_curr_runnable_sum;
+ u64 grp_curr_sum = rq->grp_time.curr_runnable_sum;
+ u64 grp_nt_curr_sum = rq->grp_time.nt_curr_runnable_sum;
+
+ if (unlikely(full_window)) {
+ curr_sum = 0;
+ nt_curr_sum = 0;
+ grp_curr_sum = 0;
+ grp_nt_curr_sum = 0;
+ }
+
+ rq->prev_runnable_sum = curr_sum;
+ rq->nt_prev_runnable_sum = nt_curr_sum;
+ rq->grp_time.prev_runnable_sum = grp_curr_sum;
+ rq->grp_time.nt_prev_runnable_sum = grp_nt_curr_sum;
+
+ rq->curr_runnable_sum = 0;
+ rq->nt_curr_runnable_sum = 0;
+ rq->grp_time.curr_runnable_sum = 0;
+ rq->grp_time.nt_curr_runnable_sum = 0;
+}
+
/*
* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
*/
@@ -2299,8 +2270,6 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
u64 *prev_runnable_sum = &rq->prev_runnable_sum;
u64 *nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
- int flip_counters = 0;
- int prev_sum_reset = 0;
bool new_task;
struct related_thread_group *grp;
int cpu = rq->cpu;
@@ -2315,51 +2284,6 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
new_task = is_new_task(p);
- grp = p->grp;
- if (grp && sched_freq_aggregate) {
- /* cpu_time protected by rq_lock */
- struct group_cpu_time *cpu_time =
- _group_cpu_time(grp, cpu_of(rq));
-
- curr_runnable_sum = &cpu_time->curr_runnable_sum;
- prev_runnable_sum = &cpu_time->prev_runnable_sum;
-
- nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
- nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
-
- if (cpu_time->window_start != rq->window_start) {
- int nr_windows;
-
- delta = rq->window_start - cpu_time->window_start;
- nr_windows = div64_u64(delta, window_size);
- if (nr_windows > 1)
- prev_sum_reset = 1;
-
- cpu_time->window_start = rq->window_start;
- flip_counters = 1;
- }
-
- if (p_is_curr_task && new_window) {
- u64 curr_sum = rq->curr_runnable_sum;
- u64 nt_curr_sum = rq->nt_curr_runnable_sum;
-
- if (full_window)
- curr_sum = nt_curr_sum = 0;
-
- rq->prev_runnable_sum = curr_sum;
- rq->nt_prev_runnable_sum = nt_curr_sum;
-
- rq->curr_runnable_sum = 0;
- rq->nt_curr_runnable_sum = 0;
- }
- } else {
- if (p_is_curr_task && new_window) {
- flip_counters = 1;
- if (full_window)
- prev_sum_reset = 1;
- }
- }
-
/*
* Handle per-task window rollover. We don't care about the idle
* task or exiting tasks.
@@ -2369,26 +2293,25 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
rollover_task_window(p, full_window);
}
- if (flip_counters) {
- u64 curr_sum = *curr_runnable_sum;
- u64 nt_curr_sum = *nt_curr_runnable_sum;
+ if (p_is_curr_task && new_window) {
+ rollover_cpu_window(rq, full_window);
+ rollover_top_tasks(rq, full_window);
+ }
- if (prev_sum_reset)
- curr_sum = nt_curr_sum = 0;
+ if (!account_busy_for_cpu_time(rq, p, irqtime, event))
+ goto done;
- *prev_runnable_sum = curr_sum;
- *nt_prev_runnable_sum = nt_curr_sum;
+ grp = p->grp;
+ if (grp && sched_freq_aggregate) {
+ struct group_cpu_time *cpu_time = &rq->grp_time;
- *curr_runnable_sum = 0;
- *nt_curr_runnable_sum = 0;
+ curr_runnable_sum = &cpu_time->curr_runnable_sum;
+ prev_runnable_sum = &cpu_time->prev_runnable_sum;
- if (p_is_curr_task)
- rollover_top_tasks(rq, full_window);
+ nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+ nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
}
- if (!account_busy_for_cpu_time(rq, p, irqtime, event))
- goto done;
-
if (!new_window) {
/*
* account_busy_for_cpu_time() = 1 so busy time needs
@@ -2905,7 +2828,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
done:
trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime,
rq->cc.cycles, rq->cc.time,
- _group_cpu_time(p->grp, cpu_of(rq)));
+ p->grp ? &rq->grp_time : NULL);
p->ravg.mark_start = wallclock;
}
@@ -3012,7 +2935,7 @@ void set_window_start(struct rq *rq)
{
static int sync_cpu_available;
- if (rq->window_start || !sched_enable_hmp)
+ if (rq->window_start)
return;
if (!sync_cpu_available) {
@@ -3063,7 +2986,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
u64 start_ts = sched_ktime_clock();
int reason = WINDOW_CHANGE;
unsigned int old = 0, new = 0;
- struct related_thread_group *grp;
local_irq_save(flags);
@@ -3081,19 +3003,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
read_unlock(&tasklist_lock);
- list_for_each_entry(grp, &active_related_thread_groups, list) {
- int j;
-
- for_each_possible_cpu(j) {
- struct group_cpu_time *cpu_time;
- /* Protected by rq lock */
- cpu_time = _group_cpu_time(grp, j);
- memset(cpu_time, 0, sizeof(struct group_cpu_time));
- if (window_start)
- cpu_time->window_start = window_start;
- }
- }
-
if (window_size) {
sched_ravg_window = window_size * TICK_NSEC;
set_hmp_defaults();
@@ -3109,6 +3018,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
rq->window_start = window_start;
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
+ memset(&rq->grp_time, 0, sizeof(struct group_cpu_time));
for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
memset(&rq->load_subs[i], 0,
sizeof(struct load_subtractions));
@@ -3198,15 +3108,12 @@ static inline u64 freq_policy_load(struct rq *rq, u64 load)
case FREQ_REPORT_CPU_LOAD:
break;
default:
- WARN_ON_ONCE(1);
+ break;
}
return load;
}
-static inline void
-sync_window_start(struct rq *rq, struct group_cpu_time *cpu_time);
-
void sched_get_cpus_busy(struct sched_load *busy,
const struct cpumask *query_cpus)
{
@@ -3223,7 +3130,6 @@ void sched_get_cpus_busy(struct sched_load *busy,
unsigned int window_size;
u64 max_prev_sum = 0;
int max_busy_cpu = cpumask_first(query_cpus);
- struct related_thread_group *grp;
u64 total_group_load = 0, total_ngload = 0;
bool aggregate_load = false;
struct sched_cluster *cluster = cpu_cluster(cpumask_first(query_cpus));
@@ -3233,8 +3139,6 @@ void sched_get_cpus_busy(struct sched_load *busy,
local_irq_save(flags);
- read_lock(&related_thread_group_lock);
-
/*
* This function could be called in timer context, and the
* current task may have been executing for a long time. Ensure
@@ -3287,15 +3191,6 @@ void sched_get_cpus_busy(struct sched_load *busy,
raw_spin_unlock(&cluster->load_lock);
- for_each_related_thread_group(grp) {
- for_each_cpu(cpu, query_cpus) {
- /* Protected by rq_lock */
- struct group_cpu_time *cpu_time =
- _group_cpu_time(grp, cpu);
- sync_window_start(cpu_rq(cpu), cpu_time);
- }
- }
-
group_load_in_freq_domain(
&cpu_rq(max_busy_cpu)->freq_domain_cpumask,
&total_group_load, &total_ngload);
@@ -3316,7 +3211,8 @@ void sched_get_cpus_busy(struct sched_load *busy,
ngload[i] = total_ngload;
}
} else {
- _group_load_in_cpu(cpu, &group_load[i], &ngload[i]);
+ group_load[i] = rq->grp_time.prev_runnable_sum;
+ ngload[i] = rq->grp_time.nt_prev_runnable_sum;
}
load[i] += group_load[i];
@@ -3341,8 +3237,6 @@ skip_early:
for_each_cpu(cpu, query_cpus)
raw_spin_unlock(&(cpu_rq(cpu))->lock);
- read_unlock(&related_thread_group_lock);
-
local_irq_restore(flags);
i = 0;
@@ -3620,7 +3514,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
bool new_task;
struct related_thread_group *grp;
- if (!sched_enable_hmp || (!p->on_rq && p->state != TASK_WAKING))
+ if (!p->on_rq && p->state != TASK_WAKING)
return;
if (exiting_task(p)) {
@@ -3659,18 +3553,17 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
if (grp && sched_freq_aggregate) {
struct group_cpu_time *cpu_time;
- cpu_time = _group_cpu_time(grp, cpu_of(src_rq));
+ cpu_time = &src_rq->grp_time;
src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
- cpu_time = _group_cpu_time(grp, cpu_of(dest_rq));
+ cpu_time = &dest_rq->grp_time;
dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
- sync_window_start(dest_rq, cpu_time);
if (p->ravg.curr_window) {
*src_curr_runnable_sum -= p->ravg.curr_window;
@@ -3799,61 +3692,6 @@ void set_preferred_cluster(struct related_thread_group *grp)
#define DEFAULT_CGROUP_COLOC_ID 1
-static inline void free_group_cputime(struct related_thread_group *grp)
-{
- free_percpu(grp->cpu_time);
-}
-
-static int alloc_group_cputime(struct related_thread_group *grp)
-{
- int i;
- struct group_cpu_time *cpu_time;
- int cpu = raw_smp_processor_id();
- struct rq *rq = cpu_rq(cpu);
- u64 window_start = rq->window_start;
-
- grp->cpu_time = alloc_percpu_gfp(struct group_cpu_time, GFP_ATOMIC);
- if (!grp->cpu_time)
- return -ENOMEM;
-
- for_each_possible_cpu(i) {
- cpu_time = per_cpu_ptr(grp->cpu_time, i);
- memset(cpu_time, 0, sizeof(struct group_cpu_time));
- cpu_time->window_start = window_start;
- }
-
- return 0;
-}
-
-/*
- * A group's window_start may be behind. When moving it forward, flip prev/curr
- * counters. When moving forward > 1 window, prev counter is set to 0
- */
-static inline void
-sync_window_start(struct rq *rq, struct group_cpu_time *cpu_time)
-{
- u64 delta;
- int nr_windows;
- u64 curr_sum = cpu_time->curr_runnable_sum;
- u64 nt_curr_sum = cpu_time->nt_curr_runnable_sum;
-
- delta = rq->window_start - cpu_time->window_start;
- if (!delta)
- return;
-
- nr_windows = div64_u64(delta, sched_ravg_window);
- if (nr_windows > 1)
- curr_sum = nt_curr_sum = 0;
-
- cpu_time->prev_runnable_sum = curr_sum;
- cpu_time->curr_runnable_sum = 0;
-
- cpu_time->nt_prev_runnable_sum = nt_curr_sum;
- cpu_time->nt_curr_runnable_sum = 0;
-
- cpu_time->window_start = rq->window_start;
-}
-
/*
* Task's cpu usage is accounted in:
* rq->curr/prev_runnable_sum, when its ->grp is NULL
@@ -3871,7 +3709,6 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
- struct migration_sum_data d;
int migrate_type;
int cpu = cpu_of(rq);
bool new_task;
@@ -3886,15 +3723,10 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
new_task = is_new_task(p);
- /* cpu_time protected by related_thread_group_lock, grp->lock rq_lock */
- cpu_time = _group_cpu_time(grp, cpu);
+ cpu_time = &rq->grp_time;
if (event == ADD_TASK) {
- sync_window_start(rq, cpu_time);
migrate_type = RQ_TO_GROUP;
- d.src_rq = rq;
- d.src_cpu_time = NULL;
- d.dst_rq = NULL;
- d.dst_cpu_time = cpu_time;
+
src_curr_runnable_sum = &rq->curr_runnable_sum;
dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
src_prev_runnable_sum = &rq->prev_runnable_sum;
@@ -3919,17 +3751,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
} else {
migrate_type = GROUP_TO_RQ;
- d.src_rq = NULL;
- d.src_cpu_time = cpu_time;
- d.dst_rq = rq;
- d.dst_cpu_time = NULL;
- /*
- * In case of REM_TASK, cpu_time->window_start would be
- * uptodate, because of the update_task_ravg() we called
- * above on the moving task. Hence no need for
- * sync_window_start()
- */
src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
dst_curr_runnable_sum = &rq->curr_runnable_sum;
src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
@@ -3975,7 +3797,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window;
p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window;
- trace_sched_migration_update_sum(p, migrate_type, &d);
+ trace_sched_migration_update_sum(p, migrate_type, rq);
BUG_ON((s64)*src_curr_runnable_sum < 0);
BUG_ON((s64)*src_prev_runnable_sum < 0);
@@ -3983,18 +3805,6 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
BUG_ON((s64)*src_nt_prev_runnable_sum < 0);
}
-static inline struct group_cpu_time *
-task_group_cpu_time(struct task_struct *p, int cpu)
-{
- return _group_cpu_time(rcu_dereference(p->grp), cpu);
-}
-
-static inline struct group_cpu_time *
-_group_cpu_time(struct related_thread_group *grp, int cpu)
-{
- return grp ? per_cpu_ptr(grp->cpu_time, cpu) : NULL;
-}
-
static inline struct related_thread_group*
lookup_related_thread_group(unsigned int group_id)
{
@@ -4014,12 +3824,6 @@ int alloc_related_thread_groups(void)
goto err;
}
- if (alloc_group_cputime(grp)) {
- kfree(grp);
- ret = -ENOMEM;
- goto err;
- }
-
grp->id = i;
INIT_LIST_HEAD(&grp->tasks);
INIT_LIST_HEAD(&grp->list);
@@ -4034,7 +3838,6 @@ err:
for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
grp = lookup_related_thread_group(i);
if (grp) {
- free_group_cputime(grp);
kfree(grp);
related_thread_groups[i] = NULL;
} else {
@@ -4418,9 +4221,6 @@ static int register_sched_callback(void)
{
int ret;
- if (!sched_enable_hmp)
- return 0;
-
ret = cpufreq_register_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3fe00d6fa335..b72352bbd752 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1406,6 +1406,7 @@ static void yield_task_rt(struct rq *rq)
#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);
+#ifdef CONFIG_SCHED_HMP
static int
select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)
{
@@ -1419,6 +1420,7 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)
return cpu;
}
+#endif
static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
@@ -1426,8 +1428,9 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
struct task_struct *curr;
struct rq *rq;
- if (sched_enable_hmp)
- return select_task_rq_rt_hmp(p, cpu, sd_flag, flags);
+#ifdef CONFIG_SCHED_HMP
+ return select_task_rq_rt_hmp(p, cpu, sd_flag, flags);
+#endif
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
@@ -1796,14 +1799,6 @@ static int find_lowest_rq_hmp(struct task_struct *task)
return best_cpu;
}
-
-#else /* CONFIG_SCHED_HMP */
-
-static int find_lowest_rq_hmp(struct task_struct *task)
-{
- return -1;
-}
-
#endif /* CONFIG_SCHED_HMP */
static int find_lowest_rq(struct task_struct *task)
@@ -1813,8 +1808,9 @@ static int find_lowest_rq(struct task_struct *task)
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
- if (sched_enable_hmp)
- return find_lowest_rq_hmp(task);
+#ifdef CONFIG_SCHED_HMP
+ return find_lowest_rq_hmp(task);
+#endif
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d907eeb297a3..360e298398fb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -366,6 +366,13 @@ struct load_subtractions {
u64 new_subs;
};
+struct group_cpu_time {
+ u64 curr_runnable_sum;
+ u64 prev_runnable_sum;
+ u64 nt_curr_runnable_sum;
+ u64 nt_prev_runnable_sum;
+};
+
struct sched_cluster {
raw_spinlock_t load_lock;
struct list_head list;
@@ -407,12 +414,6 @@ struct related_thread_group {
struct sched_cluster *preferred_cluster;
struct rcu_head rcu;
u64 last_update;
- struct group_cpu_time __percpu *cpu_time; /* one per cluster */
-};
-
-struct migration_sum_data {
- struct rq *src_rq, *dst_rq;
- struct group_cpu_time *src_cpu_time, *dst_cpu_time;
};
extern struct list_head cluster_head;
@@ -776,6 +777,7 @@ struct rq {
u64 prev_runnable_sum;
u64 nt_curr_runnable_sum;
u64 nt_prev_runnable_sum;
+ struct group_cpu_time grp_time;
struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
NUM_TRACKED_WINDOWS, NUM_LOAD_INDICES);
@@ -1069,10 +1071,6 @@ enum sched_boost_policy {
#define WINDOW_STATS_AVG 3
#define WINDOW_STATS_INVALID_POLICY 4
-#define FREQ_REPORT_MAX_CPU_LOAD_TOP_TASK 0
-#define FREQ_REPORT_CPU_LOAD 1
-#define FREQ_REPORT_TOP_TASK 2
-
#define SCHED_UPMIGRATE_MIN_NICE 15
#define EXITING_TASK_MARKER 0xdeaddead
@@ -1083,7 +1081,6 @@ enum sched_boost_policy {
extern struct mutex policy_mutex;
extern unsigned int sched_ravg_window;
extern unsigned int sched_disable_window_stats;
-extern unsigned int sched_enable_hmp;
extern unsigned int max_possible_freq;
extern unsigned int min_max_freq;
extern unsigned int pct_task_load(struct task_struct *p);
@@ -1127,7 +1124,6 @@ extern void update_cluster_topology(void);
extern void note_task_waking(struct task_struct *p, u64 wallclock);
extern void set_task_last_switch_out(struct task_struct *p, u64 wallclock);
extern void init_clusters(void);
-extern int __init set_sched_enable_hmp(char *str);
extern void reset_cpu_hmp_stats(int cpu, int reset_cra);
extern unsigned int max_task_load(void);
extern void sched_account_irqtime(int cpu, struct task_struct *curr,
@@ -1257,7 +1253,7 @@ inc_cumulative_runnable_avg(struct hmp_sched_stats *stats,
{
u32 task_load;
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
task_load = sched_disable_window_stats ? 0 : p->ravg.demand;
@@ -1272,7 +1268,7 @@ dec_cumulative_runnable_avg(struct hmp_sched_stats *stats,
{
u32 task_load;
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
task_load = sched_disable_window_stats ? 0 : p->ravg.demand;
@@ -1290,7 +1286,7 @@ fixup_cumulative_runnable_avg(struct hmp_sched_stats *stats,
struct task_struct *p, s64 task_load_delta,
s64 pred_demand_delta)
{
- if (!sched_enable_hmp || sched_disable_window_stats)
+ if (sched_disable_window_stats)
return;
stats->cumulative_runnable_avg += task_load_delta;
@@ -1350,14 +1346,6 @@ check_for_freq_change(struct rq *rq, bool check_pred, bool check_groups);
extern void notify_migration(int src_cpu, int dest_cpu,
bool src_cpu_dead, struct task_struct *p);
-struct group_cpu_time {
- u64 curr_runnable_sum;
- u64 prev_runnable_sum;
- u64 nt_curr_runnable_sum;
- u64 nt_prev_runnable_sum;
- u64 window_start;
-};
-
/* Is frequency of two cpus synchronized with each other? */
static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
@@ -1667,7 +1655,6 @@ static inline int update_preferred_cluster(struct related_thread_group *grp,
static inline void add_new_task_to_grp(struct task_struct *new) {}
-#define sched_enable_hmp 0
#define PRED_DEMAND_DELTA (0)
static inline void
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a2a87c3ad44e..7112dc54d88e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -133,6 +133,7 @@ static int ten_thousand = 10000;
#endif
#ifdef CONFIG_SCHED_HMP
static int one_thousand = 1000;
+static int max_freq_reporting_policy = FREQ_REPORT_INVALID_POLICY - 1;
#endif
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
@@ -297,6 +298,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
+ .extra2 = &max_freq_reporting_policy,
},
{
.procname = "sched_freq_inc_notify",
@@ -591,7 +593,8 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_sched_time_avg,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
},
{
.procname = "sched_shares_window_ns",