sched: support legacy mode better

It should be possible to bypass all HMP scheduler changes at runtime by setting sysctl_sched_enable_hmp_task_placement and sysctl_sched_enable_power_aware to 0. Fix various code paths to honor this requirement. Change-Id: I74254e68582b3f9f1b84661baf7dae14f981c025 Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org> [joonwoop@codeaurora.org: fixed conflict in rt.c, p->nr_cpus_allowed == 1 is now moved in core.c] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
author: Srivatsa Vaddagiri <vatsa@codeaurora.org> 2014-07-21 02:05:24 -0700
committer: David Keitel <dkeitel@codeaurora.org> 2016-03-23 19:59:54 -0700
commit: ad25ca2afbd5b3f483a8fd28386a45a4db2b007a (patch)
tree: e231b95ed7c24ca5dcc4b9748504469f8caf4193
parent: 7c9b849b11484ba96adc4cb85fcf18097fc442ae (diff)
5 files changed, 81 insertions, 42 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d4ac19e3bd39..c53c9e2e4963 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2390,7 +2390,7 @@ extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
-#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_HMP)
+#if defined(CONFIG_SMP)
 extern void sched_exec(void);
 #else
 #define sched_exec()   {}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 97f4cc268f9f..843ed0213eba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1135,6 +1135,9 @@ int rq_freq_margin(struct rq *rq)
 	int margin;
 	u64 demand;
 
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return INT_MAX;
+
 	demand = scale_load_to_cpu(rq->prev_runnable_sum, rq->cpu);
 	demand *= 128;
 	demand = div64_u64(demand, max_task_load());
@@ -1390,6 +1393,9 @@ static void init_cpu_efficiency(void)
 	int i, efficiency;
 	unsigned int max = 0, min = UINT_MAX;
 
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return;
+
 	for_each_possible_cpu(i) {
 		efficiency = arch_get_cpu_efficiency(i);
 		cpu_rq(i)->efficiency = efficiency;
@@ -1430,7 +1436,7 @@ static inline void set_window_start(struct rq *rq)
 	int cpu = cpu_of(rq);
 	struct rq *sync_rq = cpu_rq(sync_cpu);
 
-	if (likely(rq->window_start))
+	if (rq->window_start || !sysctl_sched_enable_hmp_task_placement)
 		return;
 
 	if (cpu == sync_cpu) {
@@ -1714,6 +1720,9 @@ static int register_sched_callback(void)
 {
 	int ret;
 
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return 0;
+
 	ret = cpufreq_register_notifier(&notifier_policy_block,
 						CPUFREQ_POLICY_NOTIFIER);
 
@@ -2103,7 +2112,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		p->se.nr_migrations++;
 		perf_event_task_migrate(p);
 
-		if (p->on_rq || p->state == TASK_WAKING)
+		if (sysctl_sched_enable_hmp_task_placement &&
+		    (p->on_rq || p->state == TASK_WAKING))
 			fixup_busy_time(p, new_cpu);
 	}
 
@@ -3650,7 +3660,7 @@ void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
 	*load = rq->load.weight;
 }
 
-#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_HMP)
+#if defined(CONFIG_SMP)
 
 /*
  * sched_exec - execve() is a valuable balancing opportunity, because at
@@ -3662,6 +3672,9 @@ void sched_exec(void)
 	unsigned long flags;
 	int dest_cpu;
 
+	if (sysctl_sched_enable_hmp_task_placement)
+		return;
+
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa7d8281e0db..76204fa529f2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2767,6 +2767,9 @@ int sched_set_boost(int enable)
 	unsigned long flags;
 	int ret = 0;
 
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return -EINVAL;
+
 	spin_lock_irqsave(&boost_lock, flags);
 
 	if (enable == 1) {
@@ -3072,6 +3075,9 @@ done:
 
 void inc_nr_big_small_task(struct rq *rq, struct task_struct *p)
 {
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return;
+
 	if (is_big_task(p))
 		rq->nr_big_tasks++;
 	else if (is_small_task(p))
@@ -3080,6 +3086,9 @@ void inc_nr_big_small_task(struct rq *rq, struct task_struct *p)
 
 void dec_nr_big_small_task(struct rq *rq, struct task_struct *p)
 {
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return;
+
 	if (is_big_task(p))
 		rq->nr_big_tasks--;
 	else if (is_small_task(p))
@@ -3145,7 +3154,7 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
 	unsigned int old_val = *data;
 
 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-	if (ret || !write)
+	if (ret || !write || !sysctl_sched_enable_hmp_task_placement)
 		return ret;
 
 	if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) ||
@@ -3268,7 +3277,8 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p)
 {
 	int nice = task_nice(p);
 
-	if (is_small_task(p) || p->state != TASK_RUNNING)
+	if (is_small_task(p) || p->state != TASK_RUNNING ||
+			!sysctl_sched_enable_hmp_task_placement)
 		return 0;
 
 	/* Todo: cgroup-based control? */
@@ -3349,11 +3359,6 @@ static inline int power_cost(struct task_struct *p, int cpu)
 	return SCHED_CAPACITY_SCALE;
 }
 
-static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
-{
-	return 1;
-}
-
 static inline int
 spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu)
 {
@@ -3814,6 +3819,9 @@ add_to_scaled_stat(int cpu, struct sched_avg *sa, u64 delta)
 	u64 scaled_delta;
 	int sf;
 
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return;
+
 	if (unlikely(cur_freq > max_possible_freq ||
 		     (cur_freq == max_freq &&
 		      max_freq < cpu_max_possible_freq)))
@@ -3828,6 +3836,9 @@ add_to_scaled_stat(int cpu, struct sched_avg *sa, u64 delta)
 
 static inline void decay_scaled_stat(struct sched_avg *sa, u64 periods)
 {
+	if (!sysctl_sched_enable_hmp_task_placement)
+		return;
+
 	sa->runnable_avg_sum_scaled =
 		decay_load(sa->runnable_avg_sum_scaled,
 			   periods);
@@ -7868,11 +7879,8 @@ out_balanced:
 	return NULL;
 }
 
-/*
- * find_busiest_queue - find the busiest runqueue among the cpus in group.
- */
 #ifdef CONFIG_SCHED_HMP
-static struct rq *find_busiest_queue(struct lb_env *env,
+static struct rq *find_busiest_queue_hmp(struct lb_env *env,
 				     struct sched_group *group)
 {
 	struct rq *busiest = NULL, *rq;
@@ -7893,7 +7901,17 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 
 	return busiest;
 }
-#else /* CONFIG_SCHED_HMP */
+#else
+static inline struct rq *find_busiest_queue_hmp(struct lb_env *env,
+                                    struct sched_group *group)
+{
+	return NULL;
+}
+#endif
+
+/*
+ * find_busiest_queue - find the busiest runqueue among the cpus in group.
+ */
 static struct rq *find_busiest_queue(struct lb_env *env,
 				     struct sched_group *group)
 {
@@ -7901,6 +7919,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 	unsigned long busiest_load = 0, busiest_capacity = 1;
 	int i;
 
+	if (sysctl_sched_enable_hmp_task_placement)
+		return find_busiest_queue_hmp(env, group);
+
 	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
 		unsigned long capacity, wl;
 		enum fbq_type rt;
@@ -7963,7 +7984,6 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 
 	return busiest;
 }
-#endif /* CONFIG_SCHED_HMP */
 
 /*
  * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
@@ -8964,8 +8984,7 @@ end:
 }
 
 #ifdef CONFIG_SCHED_HMP
-
-static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
+static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
 {
 	struct sched_domain *sd;
 	int i;
@@ -8999,13 +9018,20 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
 
 	return 0;
 }
-
-#else /* CONFIG_SCHED_HMP */
+#else
+static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
+{
+	return 0;
+}
+#endif
 
 static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
 {
 	unsigned long now = jiffies;
 
+	if (sysctl_sched_enable_hmp_task_placement)
+		return _nohz_kick_needed_hmp(rq, cpu, type);
+
 	/*
 	 * None are in tickless mode and hence no need for NOHZ idle load
 	 * balancing.
@@ -9019,8 +9045,6 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type)
 	return (rq->nr_running >= 2);
 }
 
-#endif /* CONFIG_SCHED_HMP */
-
 /*
  * Current heuristic for kicking the idle load balancer in the presence
  * of an idle cpu in the system.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 18e823250708..cbe16bbd4fae 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1365,36 +1365,29 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-/* TODO: Move this to a power aware config feature. There's
- * no strict dependency between SCHED_HMP and this. Its just
- * a different algorithm optimizing for power
- */
-#ifdef CONFIG_SCHED_HMP
 static int
-select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	int target;
 
-	if (p->nr_cpus_allowed == 1)
-		goto out;
-
 	rcu_read_lock();
 	target = find_lowest_rq(p);
 	if (target != -1)
 		cpu = target;
 	rcu_read_unlock();
 
-out:
 	return cpu;
 }
 
-#else /* CONFIG_SCHED_HMP */
 static int
 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	struct task_struct *curr;
 	struct rq *rq;
 
+	if (sysctl_sched_enable_hmp_task_placement)
+		return select_task_rq_rt_hmp(p, cpu, sd_flag, flags);
+
 	/* For anything but wake ups, just return the task_cpu */
 	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
 		goto out;
@@ -1444,7 +1437,6 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 out:
 	return cpu;
 }
-#endif /* CONFIG_SCHED_HMP */
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
@@ -1633,12 +1625,8 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
 
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 
-/* TODO: Move this to a power aware config feature. There's
- * no strict dependency between SCHED_HMP and this. Its just
- * a different algorithm optimizing for power
- */
 #ifdef CONFIG_SCHED_HMP
-static int find_lowest_rq(struct task_struct *task)
+static int find_lowest_rq_hmp(struct task_struct *task)
 {
 	struct cpumask *lowest_mask = *this_cpu_ptr(&local_cpu_mask);
 	int cpu_cost, min_cost = INT_MAX;
@@ -1678,7 +1666,13 @@ static int find_lowest_rq(struct task_struct *task)
 	}
 	return best_cpu;
 }
-#else /* CONFIG_SCHED_HMP */
+#else
+static int find_lowest_rq_hmp(struct task_struct *task)
+{
+	return -1;
+}
+#endif
+
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
@@ -1686,6 +1680,9 @@ static int find_lowest_rq(struct task_struct *task)
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
+	if (sysctl_sched_enable_hmp_task_placement)
+		return find_lowest_rq_hmp(task);
+
 	/* Make sure the mask is initialized first */
 	if (unlikely(!lowest_mask))
 		return -1;
@@ -1752,7 +1749,6 @@ static int find_lowest_rq(struct task_struct *task)
 		return cpu;
 	return -1;
 }
-#endif /* CONFIG_SCHED_HMP */
 
 /* Will lock the rq it finds */
 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4ae45517234f..351f69457a27 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1016,6 +1016,8 @@ static inline unsigned long capacity_scale_cpu_freq(int cpu)
 
 #ifdef CONFIG_SCHED_HMP
 
+extern unsigned int sysctl_sched_enable_hmp_task_placement;
+
 int mostly_idle_cpu(int cpu);
 extern void check_for_migration(struct rq *rq, struct task_struct *p);
 extern void pre_big_small_task_count_change(void);
@@ -1027,6 +1029,8 @@ extern unsigned int power_cost_at_freq(int cpu, unsigned int freq);
 
 #else /* CONFIG_SCHED_HMP */
 
+#define sysctl_sched_enable_hmp_task_placement 0
+
 static inline void check_for_migration(struct rq *rq, struct task_struct *p) { }
 static inline void pre_big_small_task_count_change(void) { }
 static inline void post_big_small_task_count_change(void) { }
@@ -1040,6 +1044,8 @@ static inline void dec_nr_big_small_task(struct rq *rq, struct task_struct *p)
 {
 }
 
+#define power_cost_at_freq(...) 0
+
 #define trace_sched_cpu_load(...)
 
 #endif /* CONFIG_SCHED_HMP */
author	Srivatsa Vaddagiri <vatsa@codeaurora.org>	2014-07-21 02:05:24 -0700
committer	David Keitel <dkeitel@codeaurora.org>	2016-03-23 19:59:54 -0700
commit	ad25ca2afbd5b3f483a8fd28386a45a4db2b007a (patch)
tree	e231b95ed7c24ca5dcc4b9748504469f8caf4193
parent	7c9b849b11484ba96adc4cb85fcf18097fc442ae (diff)