Merge "core_ctl: un-isolate BIG CPUs more aggressively"

author: Linux Build Service Account <lnxbuild@quicinc.com> 2017-06-01 01:53:23 -0700
committer: Gerrit - the friendly Code Review server <code-review@localhost> 2017-06-01 01:53:22 -0700
commit: 840d1a232cfd67867a61a31ccc5d81c5c2192474 (patch)
tree: 1ebb3ac499232366bb72640512366e4f76fd74e6
parent: f53ee91e04c504b4e3fde9a0b11c7c1cecc33f94 (diff)
parent: 57fd979fc92aac87bc6745883940d32fbdeb4ac4 (diff)
6 files changed, 76 insertions, 41 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c71978453864..138fcf72508a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -178,7 +178,9 @@ extern u64 nr_running_integral(unsigned int cpu);
 #endif
 
 extern void sched_update_nr_prod(int cpu, long delta, bool inc);
-extern void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg);
+extern void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg,
+				     unsigned int *max_nr,
+				     unsigned int *big_max_nr);
 
 extern void calc_global_load(unsigned long ticks);
 
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0b92317f6263..9a1ff42a377e 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -1312,24 +1312,30 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
 
 TRACE_EVENT(sched_get_nr_running_avg,
 
-	TP_PROTO(int avg, int big_avg, int iowait_avg),
+	TP_PROTO(int avg, int big_avg, int iowait_avg,
+		 unsigned int max_nr, unsigned int big_max_nr),
 
-	TP_ARGS(avg, big_avg, iowait_avg),
+	TP_ARGS(avg, big_avg, iowait_avg, max_nr, big_max_nr),
 
 	TP_STRUCT__entry(
 		__field( int,	avg			)
 		__field( int,	big_avg			)
 		__field( int,	iowait_avg		)
+		__field( unsigned int,	max_nr		)
+		__field( unsigned int,	big_max_nr	)
 	),
 
 	TP_fast_assign(
 		__entry->avg		= avg;
 		__entry->big_avg	= big_avg;
 		__entry->iowait_avg	= iowait_avg;
+		__entry->max_nr		= max_nr;
+		__entry->big_max_nr	= big_max_nr;
 	),
 
-	TP_printk("avg=%d big_avg=%d iowait_avg=%d",
-		__entry->avg, __entry->big_avg, __entry->iowait_avg)
+	TP_printk("avg=%d big_avg=%d iowait_avg=%d max_nr=%u big_max_nr=%u",
+		__entry->avg, __entry->big_avg, __entry->iowait_avg,
+		__entry->max_nr, __entry->big_max_nr)
 );
 
 TRACE_EVENT(core_ctl_eval_need,
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index 0b5f2dea18a1..8233039ddf67 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -39,6 +39,7 @@ struct cluster_data {
 	cpumask_t cpu_mask;
 	unsigned int need_cpus;
 	unsigned int task_thres;
+	unsigned int max_nr;
 	s64 need_ts;
 	struct list_head lru;
 	bool pending;
@@ -60,6 +61,7 @@ struct cpu_data {
 	struct cluster_data *cluster;
 	struct list_head sib;
 	bool isolated_by_us;
+	unsigned int max_nr;
 };
 
 static DEFINE_PER_CPU(struct cpu_data, cpu_state);
@@ -429,7 +431,6 @@ static struct kobj_type ktype_core_ctl = {
 
 #define RQ_AVG_TOLERANCE 2
 #define RQ_AVG_DEFAULT_MS 20
-#define NR_RUNNING_TOLERANCE 5
 static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS;
 
 static s64 rq_avg_timestamp_ms;
@@ -437,6 +438,7 @@ static s64 rq_avg_timestamp_ms;
 static void update_running_avg(bool trigger_update)
 {
 	int avg, iowait_avg, big_avg, old_nrrun;
+	int old_max_nr, max_nr, big_max_nr;
 	s64 now;
 	unsigned long flags;
 	struct cluster_data *cluster;
@@ -450,40 +452,23 @@ static void update_running_avg(bool trigger_update)
 		return;
 	}
 	rq_avg_timestamp_ms = now;
-	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg);
+	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
+				 &max_nr, &big_max_nr);
 
 	spin_unlock_irqrestore(&state_lock, flags);
 
-	/*
-	 * Round up to the next integer if the average nr running tasks
-	 * is within NR_RUNNING_TOLERANCE/100 of the next integer.
-	 * If normal rounding up is used, it will allow a transient task
-	 * to trigger online event. By the time core is onlined, the task
-	 * has finished.
-	 * Rounding to closest suffers same problem because scheduler
-	 * might only provide running stats per jiffy, and a transient
-	 * task could skew the number for one jiffy. If core control
-	 * samples every 2 jiffies, it will observe 0.5 additional running
-	 * average which rounds up to 1 task.
-	 */
-	avg = (avg + NR_RUNNING_TOLERANCE) / 100;
-	big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100;
-
 	for_each_cluster(cluster, index) {
 		if (!cluster->inited)
 			continue;
+
 		old_nrrun = cluster->nrrun;
-		/*
-		 * Big cluster only need to take care of big tasks, but if
-		 * there are not enough big cores, big tasks need to be run
-		 * on little as well. Thus for little's runqueue stat, it
-		 * has to use overall runqueue average, or derive what big
-		 * tasks would have to be run on little. The latter approach
-		 * is not easy to get given core control reacts much slower
-		 * than scheduler, and can't predict scheduler's behavior.
-		 */
+		old_max_nr = cluster->max_nr;
 		cluster->nrrun = cluster->is_big_cluster ? big_avg : avg;
-		if (cluster->nrrun != old_nrrun) {
+		cluster->max_nr = cluster->is_big_cluster ? big_max_nr : max_nr;
+
+		if (cluster->nrrun != old_nrrun ||
+			cluster->max_nr != old_max_nr) {
+
 			if (trigger_update)
 				apply_need(cluster);
 			else
@@ -493,6 +478,7 @@ static void update_running_avg(bool trigger_update)
 	return;
 }
 
+#define MAX_NR_THRESHOLD	4
 /* adjust needed CPUs based on current runqueue information */
 static unsigned int apply_task_need(const struct cluster_data *cluster,
 				    unsigned int new_need)
@@ -503,7 +489,15 @@ static unsigned int apply_task_need(const struct cluster_data *cluster,
 
 	/* only unisolate more cores if there are tasks to run */
 	if (cluster->nrrun > new_need)
-		return new_need + 1;
+		new_need = new_need + 1;
+
+	/*
+	 * We don't want tasks to be overcrowded in a cluster.
+	 * If any CPU has more than MAX_NR_THRESHOLD in the last
+	 * window, bring another CPU to help out.
+	 */
+	if (cluster->max_nr > MAX_NR_THRESHOLD)
+		new_need = new_need + 1;
 
 	return new_need;
 }
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 0591975de3bf..37d24bb17c76 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -1602,7 +1602,7 @@ unsigned int nr_eligible_big_tasks(int cpu)
 	int nr_big = rq->hmp_stats.nr_big_tasks;
 	int nr = rq->nr_running;
 
-	if (cpu_max_possible_capacity(cpu) != max_possible_capacity)
+	if (!is_max_capacity_cpu(cpu))
 		return nr_big;
 
 	return nr;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 276a2387f06f..2beda41af443 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1225,6 +1225,11 @@ static inline bool hmp_capable(void)
 	return max_possible_capacity != min_max_possible_capacity;
 }
 
+static inline bool is_max_capacity_cpu(int cpu)
+{
+	return cpu_max_possible_capacity(cpu) == max_possible_capacity;
+}
+
 /*
  * 'load' is in reference to "best cpu" at its best frequency.
  * Scale that in reference to a given cpu, accounting for how bad it is
@@ -1601,6 +1606,8 @@ static inline unsigned int nr_eligible_big_tasks(int cpu)
 	return 0;
 }
 
+static inline bool is_max_capacity_cpu(int cpu) { return true; }
+
 static inline int pct_task_load(struct task_struct *p) { return 0; }
 
 static inline int cpu_capacity(int cpu)
diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c
index 29d8a26a78ed..ba5a326a9fd8 100644
--- a/kernel/sched/sched_avg.c
+++ b/kernel/sched/sched_avg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, 2015-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, 2015-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -26,11 +26,13 @@ static DEFINE_PER_CPU(u64, nr_prod_sum);
 static DEFINE_PER_CPU(u64, last_time);
 static DEFINE_PER_CPU(u64, nr_big_prod_sum);
 static DEFINE_PER_CPU(u64, nr);
+static DEFINE_PER_CPU(u64, nr_max);
 
 static DEFINE_PER_CPU(unsigned long, iowait_prod_sum);
 static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
 static s64 last_get_time;
 
+#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
 /**
  * sched_get_nr_running_avg
  * @return: Average nr_running, iowait and nr_big_tasks value since last poll.
@@ -40,7 +42,8 @@ static s64 last_get_time;
  * Obtains the average nr_running value since the last poll.
  * This function may not be called concurrently with itself
  */
-void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
+void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg,
+			      unsigned int *max_nr, unsigned int *big_max_nr)
 {
 	int cpu;
 	u64 curr_time = sched_clock();
@@ -50,6 +53,8 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
 	*avg = 0;
 	*iowait_avg = 0;
 	*big_avg = 0;
+	*max_nr = 0;
+	*big_max_nr = 0;
 
 	if (!diff)
 		return;
@@ -78,17 +83,35 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg)
 		per_cpu(nr_big_prod_sum, cpu) = 0;
 		per_cpu(iowait_prod_sum, cpu) = 0;
 
+		if (*max_nr < per_cpu(nr_max, cpu))
+			*max_nr = per_cpu(nr_max, cpu);
+
+		if (is_max_capacity_cpu(cpu)) {
+			if (*big_max_nr < per_cpu(nr_max, cpu))
+				*big_max_nr = per_cpu(nr_max, cpu);
+		}
+
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
 		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
 	}
 
 	diff = curr_time - last_get_time;
 	last_get_time = curr_time;
 
-	*avg = (int)div64_u64(tmp_avg * 100, diff);
-	*big_avg = (int)div64_u64(tmp_big_avg * 100, diff);
-	*iowait_avg = (int)div64_u64(tmp_iowait * 100, diff);
-
-	trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg);
+	/*
+	 * Any task running on BIG cluster and BIG tasks running on little
+	 * cluster contributes to big_avg. Small or medium tasks can also
+	 * run on BIG cluster when co-location and scheduler boost features
+	 * are activated. We don't want these tasks to downmigrate to little
+	 * cluster when BIG CPUs are available but isolated. Round up the
+	 * average values so that core_ctl aggressively unisolate BIG CPUs.
+	 */
+	*avg = (int)DIV64_U64_ROUNDUP(tmp_avg, diff);
+	*big_avg = (int)DIV64_U64_ROUNDUP(tmp_big_avg, diff);
+	*iowait_avg = (int)DIV64_U64_ROUNDUP(tmp_iowait, diff);
+
+	trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg,
+				       *max_nr, *big_max_nr);
 
 	BUG_ON(*avg < 0 || *big_avg < 0 || *iowait_avg < 0);
 	pr_debug("%s - avg:%d big_avg:%d iowait_avg:%d\n",
@@ -121,6 +144,9 @@ void sched_update_nr_prod(int cpu, long delta, bool inc)
 
 	BUG_ON((s64)per_cpu(nr, cpu) < 0);
 
+	if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
 	per_cpu(nr_prod_sum, cpu) += nr_running * diff;
 	per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff;
 	per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
author	Linux Build Service Account <lnxbuild@quicinc.com>	2017-06-01 01:53:23 -0700
committer	Gerrit - the friendly Code Review server <code-review@localhost>	2017-06-01 01:53:22 -0700
commit	840d1a232cfd67867a61a31ccc5d81c5c2192474 (patch)
tree	1ebb3ac499232366bb72640512366e4f76fd74e6
parent	f53ee91e04c504b4e3fde9a0b11c7c1cecc33f94 (diff)
parent	57fd979fc92aac87bc6745883940d32fbdeb4ac4 (diff)