summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSrivatsa Vaddagiri <vatsa@codeaurora.org>2014-07-21 02:05:24 -0700
committerDavid Keitel <dkeitel@codeaurora.org>2016-03-23 19:59:54 -0700
commitad25ca2afbd5b3f483a8fd28386a45a4db2b007a (patch)
treee231b95ed7c24ca5dcc4b9748504469f8caf4193
parent7c9b849b11484ba96adc4cb85fcf18097fc442ae (diff)
sched: support legacy mode better
It should be possible to bypass all HMP scheduler changes at runtime by setting sysctl_sched_enable_hmp_task_placement and sysctl_sched_enable_power_aware to 0. Fix various code paths to honor this requirement. Change-Id: I74254e68582b3f9f1b84661baf7dae14f981c025 Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org> [joonwoop@codeaurora.org: fixed conflict in rt.c, p->nr_cpus_allowed == 1 is now moved in core.c] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/sched/fair.c62
-rw-r--r--kernel/sched/rt.c34
-rw-r--r--kernel/sched/sched.h6
5 files changed, 81 insertions, 42 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d4ac19e3bd39..c53c9e2e4963 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2390,7 +2390,7 @@ extern unsigned long long
task_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
-#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_HMP)
+#if defined(CONFIG_SMP)
extern void sched_exec(void);
#else
#define sched_exec() {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 97f4cc268f9f..843ed0213eba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1135,6 +1135,9 @@ int rq_freq_margin(struct rq *rq)
int margin;
u64 demand;
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return INT_MAX;
+
demand = scale_load_to_cpu(rq->prev_runnable_sum, rq->cpu);
demand *= 128;
demand = div64_u64(demand, max_task_load());
@@ -1390,6 +1393,9 @@ static void init_cpu_efficiency(void)
int i, efficiency;
unsigned int max = 0, min = UINT_MAX;
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return;
+
for_each_possible_cpu(i) {
efficiency = arch_get_cpu_efficiency(i);
cpu_rq(i)->efficiency = efficiency;
@@ -1430,7 +1436,7 @@ static inline void set_window_start(struct rq *rq)
int cpu = cpu_of(rq);
struct rq *sync_rq = cpu_rq(sync_cpu);
- if (likely(rq->window_start))
+ if (rq->window_start || !sysctl_sched_enable_hmp_task_placement)
return;
if (cpu == sync_cpu) {
@@ -1714,6 +1720,9 @@ static int register_sched_callback(void)
{
int ret;
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return 0;
+
ret = cpufreq_register_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
@@ -2103,7 +2112,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.nr_migrations++;
perf_event_task_migrate(p);
- if (p->on_rq || p->state == TASK_WAKING)
+ if (sysctl_sched_enable_hmp_task_placement &&
+ (p->on_rq || p->state == TASK_WAKING))
fixup_busy_time(p, new_cpu);
}
@@ -3650,7 +3660,7 @@ void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
*load = rq->load.weight;
}
-#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_HMP)
+#if defined(CONFIG_SMP)
/*
* sched_exec - execve() is a valuable balancing opportunity, because at
@@ -3662,6 +3672,9 @@ void sched_exec(void)
unsigned long flags;
int dest_cpu;
+ if (sysctl_sched_enable_hmp_task_placement)
+ return;
+
raw_spin_lock_irqsave(&p->pi_lock, flags);
dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
if (dest_cpu == smp_processor_id())
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa7d8281e0db..76204fa529f2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2767,6 +2767,9 @@ int sched_set_boost(int enable)
unsigned long flags;
int ret = 0;
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return -EINVAL;
+
spin_lock_irqsave(&boost_lock, flags);
if (enable == 1) {
@@ -3072,6 +3075,9 @@ done:
void inc_nr_big_small_task(struct rq *rq, struct task_struct *p)
{
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return;
+
if (is_big_task(p))
rq->nr_big_tasks++;
else if (is_small_task(p))
@@ -3080,6 +3086,9 @@ void inc_nr_big_small_task(struct rq *rq, struct task_struct *p)
void dec_nr_big_small_task(struct rq *rq, struct task_struct *p)
{
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return;
+
if (is_big_task(p))
rq->nr_big_tasks--;
else if (is_small_task(p))
@@ -3145,7 +3154,7 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
unsigned int old_val = *data;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (ret || !write)
+ if (ret || !write || !sysctl_sched_enable_hmp_task_placement)
return ret;
if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) ||
@@ -3268,7 +3277,8 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p)
{
int nice = task_nice(p);
- if (is_small_task(p) || p->state != TASK_RUNNING)
+ if (is_small_task(p) || p->state != TASK_RUNNING ||
+ !sysctl_sched_enable_hmp_task_placement)
return 0;
/* Todo: cgroup-based control? */
@@ -3349,11 +3359,6 @@ static inline int power_cost(struct task_struct *p, int cpu)
return SCHED_CAPACITY_SCALE;
}
-static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
-{
- return 1;
-}
-
static inline int
spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu)
{
@@ -3814,6 +3819,9 @@ add_to_scaled_stat(int cpu, struct sched_avg *sa, u64 delta)
u64 scaled_delta;
int sf;
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return;
+
if (unlikely(cur_freq > max_possible_freq ||
(cur_freq == max_freq &&
max_freq < cpu_max_possible_freq)))
@@ -3828,6 +3836,9 @@ add_to_scaled_stat(int cpu, struct sched_avg *sa, u64 delta)
static inline void decay_scaled_stat(struct sched_avg *sa, u64 periods)
{
+ if (!sysctl_sched_enable_hmp_task_placement)
+ return;
+
sa->runnable_avg_sum_scaled =
decay_load(sa->runnable_avg_sum_scaled,
periods);
@@ -7868,11 +7879,8 @@ out_balanced:
return NULL;
}
-/*
- * find_busiest_queue - find the busiest runqueue among the cpus in group.
- */
#ifdef CONFIG_SCHED_HMP
-static struct rq *find_busiest_queue(struct lb_env *env,
+static struct rq *find_busiest_queue_hmp(struct lb_env *env,
struct sched_group *group)
{
struct rq *busiest = NULL, *rq;
@@ -7893,7 +7901,17 @@ static struct rq *find_busiest_queue(struct lb_env *env,
return busiest;
}
-#else /* CONFIG_SCHED_HMP */
+#else
+static inline struct rq *find_busiest_queue_hmp(struct lb_env *env,
+ struct sched_group *group)
+{
+ return NULL;
+}
+#endif
+
+/*
+ * find_busiest_queue - find the busiest runqueue among the cpus in group.
+ */
static struct rq *find_busiest_queue(struct lb_env *env,
struct sched_group *group)
{
@@ -7901,6 +7919,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
unsigned long busiest_load = 0, busiest_capacity = 1;
int i;
+ if (sysctl_sched_enable_hmp_task_placement)
+ return find_busiest_queue_hmp(env, group);
+
for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
unsigned long capacity, wl;
enum fbq_type rt;
@@ -7963,7 +7984,6 @@ static struct rq *find_busiest_queue(struct lb_env *env,
return busiest;
}
-#endif /* CONFIG_SCHED_HMP */
/*
* Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
@@ -8964,8 +8984,7 @@ end:
}
#ifdef CONFIG_SCHED_HMP
-
-static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
+static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
{
struct sched_domain *sd;
int i;
@@ -8999,13 +9018,20 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
return 0;
}
-
-#else /* CONFIG_SCHED_HMP */
+#else
+static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
+{
+ return 0;
+}
+#endif
static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
{
unsigned long now = jiffies;
+ if (sysctl_sched_enable_hmp_task_placement)
+ return _nohz_kick_needed_hmp(rq, cpu, type);
+
/*
* None are in tickless mode and hence no need for NOHZ idle load
* balancing.
@@ -9019,8 +9045,6 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
return (rq->nr_running >= 2);
}
-#endif /* CONFIG_SCHED_HMP */
-
/*
* Current heuristic for kicking the idle load balancer in the presence
* of an idle cpu in the system.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 18e823250708..cbe16bbd4fae 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1365,36 +1365,29 @@ static void yield_task_rt(struct rq *rq)
#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);
-/* TODO: Move this to a power aware config feature. There's
- * no strict dependency between SCHED_HMP and this. Its just
- * a different algorithm optimizing for power
- */
-#ifdef CONFIG_SCHED_HMP
static int
-select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)
{
int target;
- if (p->nr_cpus_allowed == 1)
- goto out;
-
rcu_read_lock();
target = find_lowest_rq(p);
if (target != -1)
cpu = target;
rcu_read_unlock();
-out:
return cpu;
}
-#else /* CONFIG_SCHED_HMP */
static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
+ if (sysctl_sched_enable_hmp_task_placement)
+ return select_task_rq_rt_hmp(p, cpu, sd_flag, flags);
+
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
goto out;
@@ -1444,7 +1437,6 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
out:
return cpu;
}
-#endif /* CONFIG_SCHED_HMP */
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
{
@@ -1633,12 +1625,8 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
-/* TODO: Move this to a power aware config feature. There's
- * no strict dependency between SCHED_HMP and this. Its just
- * a different algorithm optimizing for power
- */
#ifdef CONFIG_SCHED_HMP
-static int find_lowest_rq(struct task_struct *task)
+static int find_lowest_rq_hmp(struct task_struct *task)
{
struct cpumask *lowest_mask = *this_cpu_ptr(&local_cpu_mask);
int cpu_cost, min_cost = INT_MAX;
@@ -1678,7 +1666,13 @@ static int find_lowest_rq(struct task_struct *task)
}
return best_cpu;
}
-#else /* CONFIG_SCHED_HMP */
+#else
+static int find_lowest_rq_hmp(struct task_struct *task)
+{
+ return -1;
+}
+#endif
+
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
@@ -1686,6 +1680,9 @@ static int find_lowest_rq(struct task_struct *task)
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ if (sysctl_sched_enable_hmp_task_placement)
+ return find_lowest_rq_hmp(task);
+
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
return -1;
@@ -1752,7 +1749,6 @@ static int find_lowest_rq(struct task_struct *task)
return cpu;
return -1;
}
-#endif /* CONFIG_SCHED_HMP */
/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4ae45517234f..351f69457a27 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1016,6 +1016,8 @@ static inline unsigned long capacity_scale_cpu_freq(int cpu)
#ifdef CONFIG_SCHED_HMP
+extern unsigned int sysctl_sched_enable_hmp_task_placement;
+
int mostly_idle_cpu(int cpu);
extern void check_for_migration(struct rq *rq, struct task_struct *p);
extern void pre_big_small_task_count_change(void);
@@ -1027,6 +1029,8 @@ extern unsigned int power_cost_at_freq(int cpu, unsigned int freq);
#else /* CONFIG_SCHED_HMP */
+#define sysctl_sched_enable_hmp_task_placement 0
+
static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
static inline void pre_big_small_task_count_change(void) { }
static inline void post_big_small_task_count_change(void) { }
@@ -1040,6 +1044,8 @@ static inline void dec_nr_big_small_task(struct rq *rq, struct task_struct *p)
{
}
+#define power_cost_at_freq(...) 0
+
#define trace_sched_cpu_load(...)
#endif /* CONFIG_SCHED_HMP */