sched: kill unnecessary divisions on fast path

The max_possible_efficiency and CPU's efficiency are fixed values which are determined at cluster allocation time. Avoid division on the fast by using precomputed scale factor. Also update_cpu_busy_time() doesn't need to know how many full windows have elapsed. Thus replace unneeded division with simple comparison. Change-Id: I2be1aad3fb9b895e4f0917d05bd8eade985bbccf Suggested-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org> Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
author: Joonwoo Park <joonwoop@codeaurora.org> 2016-06-17 15:15:04 -0700
committer: Kyle Yan <kyan@codeaurora.org> 2016-06-21 15:11:21 -0700
commit: c876c09f5817c12b9162cf2389be730e94c35d75 (patch)
tree: ff50e0681ba6fec2acb01dddaafa7d941f61fda4 /kernel
parent: 47c31979a1a96ee9dba1ebc43d0f04a6e7f5ee19 (diff)
3 files changed, 15 insertions, 13 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 63f08cf4f517..aaa4f5e258b1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1412,6 +1412,7 @@ static struct sched_cluster init_cluster = {
 	.dstate			=	0,
 	.dstate_wakeup_energy	=	0,
 	.dstate_wakeup_latency	=	0,
+	.exec_scale_factor	=	1024,
 };
 
 void update_all_clusters_stats(void)
@@ -1429,6 +1430,10 @@ void update_all_clusters_stats(void)
 			compute_max_possible_capacity(cluster);
 		cluster->load_scale_factor = compute_load_scale_factor(cluster);
 
+		cluster->exec_scale_factor =
+			DIV_ROUND_UP(cluster->efficiency * 1024,
+				     max_possible_efficiency);
+
 		if (mpc > highest_mpc)
 			highest_mpc = mpc;
 
@@ -1939,15 +1944,11 @@ update_window_start(struct rq *rq, u64 wallclock)
 
 static inline u64 scale_exec_time(u64 delta, struct rq *rq)
 {
-	int cpu = cpu_of(rq);
-	int sf;
 	u32 freq;
 
 	freq = cpu_cycles_to_freq(rq->cc.cycles, rq->cc.time);
 	delta = DIV64_U64_ROUNDUP(delta * freq, max_possible_freq);
-	sf = DIV_ROUND_UP(cpu_efficiency(cpu) * 1024, max_possible_efficiency);
-
-	delta *= sf;
+	delta *= rq->cluster->exec_scale_factor;
 	delta >>= 10;
 
 	return delta;
@@ -2390,7 +2391,7 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
 static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 				 int event, u64 wallclock, u64 irqtime)
 {
-	int new_window, nr_full_windows = 0;
+	int new_window, full_window = 0;
 	int p_is_curr_task = (p == rq->curr);
 	u64 mark_start = p->ravg.mark_start;
 	u64 window_start = rq->window_start;
@@ -2407,8 +2408,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 
 	new_window = mark_start < window_start;
 	if (new_window) {
-		nr_full_windows = div64_u64((window_start - mark_start),
-						window_size);
+		full_window = (window_start - mark_start) >= window_size;
 		if (p->ravg.active_windows < USHRT_MAX)
 			p->ravg.active_windows++;
 	}
@@ -2443,7 +2443,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 			u64 curr_sum = rq->curr_runnable_sum;
 			u64 nt_curr_sum = rq->nt_curr_runnable_sum;
 
-			if (nr_full_windows)
+			if (full_window)
 				curr_sum = nt_curr_sum = 0;
 
 			rq->prev_runnable_sum = curr_sum;
@@ -2455,7 +2455,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 	} else {
 		if (p_is_curr_task && new_window) {
 			flip_counters = 1;
-			if (nr_full_windows)
+			if (full_window)
 				prev_sum_reset = 1;
 		}
 	}
@@ -2465,7 +2465,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 	if (new_window && !is_idle_task(p) && !exiting_task(p)) {
 		u32 curr_window = 0;
 
-		if (!nr_full_windows)
+		if (!full_window)
 			curr_window = p->ravg.curr_window;
 
 		p->ravg.prev_window = curr_window;
@@ -2544,7 +2544,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 		 * Irqtime can't be accounted by a task that isn't the
 		 * currently running task. */
 
-		if (!nr_full_windows) {
+		if (!full_window) {
 			/* A full window hasn't elapsed, account partial
 			 * contribution to previous completed window. */
 			delta = scale_exec_time(window_start - mark_start, rq);
@@ -2588,7 +2588,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 		 * An example of this would be a task that starts execution
 		 * and then sleeps once a new window has begun. */
 
-		if (!nr_full_windows) {
+		if (!full_window) {
 			/* A full window hasn't elapsed, account partial
 			 * contribution to previous completed window. */
 			delta = scale_exec_time(window_start - mark_start, rq);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 469640a0a626..d1c0ef4bf07d 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -326,6 +326,7 @@ do {									\
 	P(cluster->efficiency);
 	P(cluster->cur_freq);
 	P(cluster->max_freq);
+	P(cluster->exec_scale_factor);
 #endif
 #ifdef CONFIG_SCHED_HMP
 	P(hmp_stats.nr_big_tasks);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7db4d59d5ec3..45b76cfff1ec 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -382,6 +382,7 @@ struct sched_cluster {
 	int capacity;
 	int efficiency; /* Differentiate cpus with different IPC capability */
 	int load_scale_factor;
+	unsigned int exec_scale_factor;
 	/*
 	 * max_freq = user maximum
 	 * max_mitigated_freq = thermal defined maximum
author	Joonwoo Park <joonwoop@codeaurora.org>	2016-06-17 15:15:04 -0700
committer	Kyle Yan <kyan@codeaurora.org>	2016-06-21 15:11:21 -0700
commit	c876c09f5817c12b9162cf2389be730e94c35d75 (patch)
tree	ff50e0681ba6fec2acb01dddaafa7d941f61fda4 /kernel
parent	47c31979a1a96ee9dba1ebc43d0f04a6e7f5ee19 (diff)