cpuidle: lpm-levels: Consider history during LPM selection

Consider recent history (residencies) of the low power modes per core while the next low power mode to enter is selected. If most of the history says the pattern of residencies is repeating with minimal deviation then use the average of these for predicting the next mode to enter. If the pattern is not repeating then if more than 50 percent of the samples out of history have exited a low power mode earlier than the minumim residency of that mode, restrict it and also low power modes deeper than that. In any of the above case, trigger a hrtimer to wakeup cpu with timeout as predicted+delta or max residency of the mode selected if a deeper state can be selected after waking up incase if prediction goes wrong. Change-Id: I902a06939e19ac51dfd8c2db6b727b203ebfda14 Signed-off-by: Srinivas Rao L <lsrao@codeaurora.org>
author: Srinivas Rao L <lsrao@codeaurora.org> 2016-04-14 19:12:57 +0530
committer: Mahesh Sivasubramanian <msivasub@codeaurora.org> 2016-10-06 14:49:34 -0600
commit: 09372fb316e83ad623f867c0bc7928d2d4f6dade (patch)
tree: 6ff8829e012cce3475071645f509be4ac5516ebf /drivers
parent: 201164f28bac7f0ab17f8bfd9ae5820bfbd1a59f (diff)
1 files changed, 284 insertions, 9 deletions
diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
index 389e49ef8fca..ced95aa2b649 100644
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ * Copyright (C) 2009 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -83,9 +85,37 @@ struct lpm_debug {
 
 struct lpm_cluster *lpm_root_node;
 
+#define MAXSAMPLES 5
+
+static bool lpm_prediction;
+module_param_named(lpm_prediction,
+	lpm_prediction, bool, S_IRUGO | S_IWUSR | S_IWGRP);
+
+static uint32_t ref_stddev = 100;
+module_param_named(
+	ref_stddev, ref_stddev, uint, S_IRUGO | S_IWUSR | S_IWGRP
+);
+
+static uint32_t tmr_add = 100;
+module_param_named(
+	tmr_add, tmr_add, uint, S_IRUGO | S_IWUSR | S_IWGRP
+);
+
+struct lpm_history {
+	uint32_t resi[MAXSAMPLES];
+	int mode[MAXSAMPLES];
+	int nsamp;
+	uint32_t hptr;
+	uint32_t hinvalid;
+	uint32_t htmr_wkup;
+};
+
+static DEFINE_PER_CPU(struct lpm_history, hist);
+
 static DEFINE_PER_CPU(struct lpm_cluster*, cpu_cluster);
 static bool suspend_in_progress;
 static struct hrtimer lpm_hrtimer;
+static struct hrtimer histtimer;
 static struct lpm_debug *lpm_debug;
 static phys_addr_t lpm_debug_phys;
 static const int num_dbg_elements = 0x100;
@@ -327,10 +357,37 @@ static enum hrtimer_restart lpm_hrtimer_cb(struct hrtimer *h)
 	return HRTIMER_NORESTART;
 }
 
+static void histtimer_cancel(void)
+{
+	if (!lpm_prediction)
+		return;
+
+	hrtimer_try_to_cancel(&histtimer);
+}
+
+static enum hrtimer_restart histtimer_fn(struct hrtimer *h)
+{
+	int cpu = raw_smp_processor_id();
+	struct lpm_history *history = &per_cpu(hist, cpu);
+
+	history->hinvalid = 1;
+	return HRTIMER_NORESTART;
+}
+
+static void histtimer_start(uint32_t time_us)
+{
+	uint64_t time_ns = time_us * NSEC_PER_USEC;
+	ktime_t hist_ktime = ns_to_ktime(time_ns);
+
+	histtimer.function = histtimer_fn;
+	hrtimer_start(&histtimer, hist_ktime, HRTIMER_MODE_REL_PINNED);
+}
+
 static void msm_pm_set_timer(uint32_t modified_time_us)
 {
 	u64 modified_time_ns = modified_time_us * NSEC_PER_USEC;
 	ktime_t modified_ktime = ns_to_ktime(modified_time_ns);
+
 	lpm_hrtimer.function = lpm_hrtimer_cb;
 	hrtimer_start(&lpm_hrtimer, modified_ktime, HRTIMER_MODE_REL_PINNED);
 }
@@ -415,6 +472,143 @@ static int set_device_mode(struct lpm_cluster *cluster, int ndevice,
 		return -EINVAL;
 }
 
+static uint64_t lpm_cpuidle_predict(struct cpuidle_device *dev,
+		struct lpm_cpu *cpu, int *idx_restrict,
+		uint32_t *idx_restrict_time)
+{
+	int i, j, divisor;
+	uint64_t max, avg, stddev;
+	int64_t thresh = LLONG_MAX;
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu);
+
+	if (!lpm_prediction)
+		return 0;
+
+	/*
+	 * Samples are marked invalid when woken-up due to timer,
+	 * so donot predict.
+	 */
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+		return 0;
+	}
+
+	/*
+	 * Predict only when all the samples are collected.
+	 */
+	if (history->nsamp < MAXSAMPLES)
+		return 0;
+
+	/*
+	 * Check if the samples are not much deviated, if so use the
+	 * average of those as predicted sleep time. Else if any
+	 * specific mode has more premature exits return the index of
+	 * that mode.
+	 */
+
+again:
+	max = avg = divisor = stddev = 0;
+	for (i = 0; i < MAXSAMPLES; i++) {
+		int64_t value = history->resi[i];
+
+		if (value <= thresh) {
+			avg += value;
+			divisor++;
+			if (value > max)
+				max = value;
+		}
+	}
+	do_div(avg, divisor);
+
+	for (i = 0; i < MAXSAMPLES; i++) {
+		int64_t value = history->resi[i];
+
+		if (value <= thresh) {
+			int64_t diff = value - avg;
+
+			stddev += diff * diff;
+		}
+	}
+	do_div(stddev, divisor);
+	stddev = int_sqrt(stddev);
+
+	/*
+	 * If the deviation is less, return the average, else
+	 * ignore one maximum sample and retry
+	 */
+	if (((avg > stddev * 6) && (divisor >= (MAXSAMPLES - 1)))
+					|| stddev <= ref_stddev) {
+		return avg;
+	} else if (divisor  > (MAXSAMPLES - 1)) {
+		thresh = max - 1;
+		goto again;
+	}
+
+	/*
+	 * Find the number of premature exits for each of the mode,
+	 * excluding clockgating mode, and they are more than fifty
+	 * percent restrict that and deeper modes.
+	 */
+	if (history->htmr_wkup != 1) {
+		for (j = 1; j < cpu->nlevels; j++) {
+			uint32_t failed = 0;
+			uint64_t total = 0;
+
+			for (i = 0; i < MAXSAMPLES; i++) {
+				if ((history->mode[i] == j) &&
+					(history->resi[i] < min_residency[j])) {
+					failed++;
+					total += history->resi[i];
+				}
+			}
+			if (failed > (MAXSAMPLES/2)) {
+				*idx_restrict = j;
+				do_div(total, failed);
+				*idx_restrict_time = total;
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+static inline void invalidate_predict_history(struct cpuidle_device *dev)
+{
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+
+	if (!lpm_prediction)
+		return;
+
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+	}
+}
+
+static void clear_predict_history(void)
+{
+	struct lpm_history *history;
+	int i;
+	unsigned int cpu;
+
+	if (!lpm_prediction)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		history = &per_cpu(hist, cpu);
+		for (i = 0; i < MAXSAMPLES; i++) {
+			history->resi[i]  = 0;
+			history->mode[i] = -1;
+			history->hptr = 0;
+			history->nsamp = 0;
+		}
+	}
+}
+
+static void update_history(struct cpuidle_device *dev, int idx);
+
 static int cpu_power_select(struct cpuidle_device *dev,
 		struct lpm_cpu *cpu)
 {
@@ -425,9 +619,13 @@ static int cpu_power_select(struct cpuidle_device *dev,
 		(uint32_t)(ktime_to_us(tick_nohz_get_sleep_length()));
 	uint32_t modified_time_us = 0;
 	uint32_t next_event_us = 0;
-	int i;
+	int i, idx_restrict;
 	uint32_t lvl_latency_us = 0;
-	uint32_t *residency = get_per_cpu_max_residency(dev->cpu);
+	uint64_t predicted = 0;
+	uint32_t htime = 0, idx_restrict_time = 0;
+	uint32_t next_wakeup_us = sleep_us;
+	uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu);
+	uint32_t *max_residency = get_per_cpu_max_residency(dev->cpu);
 
 	if (!cpu)
 		return -EINVAL;
@@ -435,12 +633,13 @@ static int cpu_power_select(struct cpuidle_device *dev,
 	if (sleep_disabled)
 		return 0;
 
+	idx_restrict = cpu->nlevels + 1;
+
 	next_event_us = (uint32_t)(ktime_to_us(get_next_event_time(dev->cpu)));
 
 	for (i = 0; i < cpu->nlevels; i++) {
 		struct lpm_cpu_level *level = &cpu->levels[i];
 		struct power_params *pwr_params = &level->pwr;
-		uint32_t next_wakeup_us = sleep_us;
 		enum msm_pm_sleep_mode mode = level->mode;
 		bool allow;
 
@@ -463,24 +662,64 @@ static int cpu_power_select(struct cpuidle_device *dev,
 				next_wakeup_us = next_event_us - lvl_latency_us;
 		}
 
+		if (!i) {
+			/*
+			 * If the next_wake_us itself is not sufficient for
+			 * deeper low power modes than clock gating do not
+			 * call prediction.
+			 */
+			if (next_wakeup_us > max_residency[i]) {
+				predicted = lpm_cpuidle_predict(dev, cpu,
+					&idx_restrict, &idx_restrict_time);
+				if (predicted < min_residency[i])
+					predicted = 0;
+			} else
+				invalidate_predict_history(dev);
+		}
+
+		if (i >= idx_restrict)
+			break;
+
 		best_level = i;
 
 		if (next_event_us && next_event_us < sleep_us &&
-				(mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT))
+			(mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT))
 			modified_time_us
 				= next_event_us - lvl_latency_us;
 		else
 			modified_time_us = 0;
 
-		if (next_wakeup_us <= residency[i])
+		if (predicted ? (predicted <= max_residency[i])
+			: (next_wakeup_us <= max_residency[i]))
 			break;
 	}
 
 	if (modified_time_us)
 		msm_pm_set_timer(modified_time_us);
 
+	/*
+	 * Start timer to avoid staying in shallower mode forever
+	 * incase of misprediciton
+	 */
+	if ((predicted || (idx_restrict != (cpu->nlevels + 1)))
+			&& ((best_level >= 0)
+			&& (best_level < (cpu->nlevels-1)))) {
+		htime = predicted + tmr_add;
+		if (htime == tmr_add)
+			htime = idx_restrict_time;
+		else if (htime > max_residency[best_level])
+			htime = max_residency[best_level];
+
+		if ((next_wakeup_us > htime) &&
+			((next_wakeup_us - htime) > max_residency[best_level]))
+			histtimer_start(htime);
+	}
+
 	trace_cpu_power_select(best_level, sleep_us, latency_us, next_event_us);
 
+	trace_cpu_pred_select(idx_restrict_time ? 2 : (predicted ? 1 : 0),
+			predicted, htime);
+
 	return best_level;
 }
 
@@ -641,6 +880,7 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
 		}
 
 		us = us + 1;
+		clear_predict_history();
 		do_div(us, USEC_PER_SEC/SCLK_HZ);
 		msm_mpm_enter_sleep(us, from_idle, cpumask);
 	}
@@ -975,6 +1215,39 @@ static int lpm_cpuidle_select(struct cpuidle_driver *drv,
 	return idx;
 }
 
+static void update_history(struct cpuidle_device *dev, int idx)
+{
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	uint32_t tmr = 0;
+
+	if (!lpm_prediction)
+		return;
+
+	if (history->htmr_wkup) {
+		if (!history->hptr)
+			history->hptr = MAXSAMPLES-1;
+		else
+			history->hptr--;
+
+		history->resi[history->hptr] += dev->last_residency;
+		history->htmr_wkup = 0;
+		tmr = 1;
+	} else
+		history->resi[history->hptr] = dev->last_residency;
+
+	history->mode[history->hptr] = idx;
+
+	trace_cpu_pred_hist(history->mode[history->hptr],
+		history->resi[history->hptr], history->hptr, tmr);
+
+	if (history->nsamp < MAXSAMPLES)
+		history->nsamp++;
+
+	(history->hptr)++;
+	if (history->hptr >= MAXSAMPLES)
+		history->hptr = 0;
+}
+
 static int lpm_cpuidle_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int idx)
 {
@@ -1009,12 +1282,13 @@ exit:
 	cluster_unprepare(cluster, cpumask, idx, true, end_time);
 	cpu_unprepare(cluster, idx, true);
 	sched_set_cpu_cstate(smp_processor_id(), 0, 0, 0);
-
-	trace_cpu_idle_exit(idx, success);
 	end_time = ktime_to_ns(ktime_get()) - start_time;
-	dev->last_residency = do_div(end_time, 1000);
+	do_div(end_time, 1000);
+	dev->last_residency = end_time;
+	update_history(dev, idx);
+	trace_cpu_idle_exit(idx, success);
 	local_irq_enable();
-
+	histtimer_cancel();
 	return idx;
 }
 
@@ -1286,6 +1560,7 @@ static int lpm_probe(struct platform_device *pdev)
 	 */
 	suspend_set_ops(&lpm_suspend_ops);
 	hrtimer_init(&lpm_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&histtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 
 	ret = remote_spin_lock_init(&scm_handoff_lock, SCM_HANDOFF_LOCK_ID);
 	if (ret) {
author	Srinivas Rao L <lsrao@codeaurora.org>	2016-04-14 19:12:57 +0530
committer	Mahesh Sivasubramanian <msivasub@codeaurora.org>	2016-10-06 14:49:34 -0600
commit	09372fb316e83ad623f867c0bc7928d2d4f6dade (patch)
tree	6ff8829e012cce3475071645f509be4ac5516ebf /drivers
parent	201164f28bac7f0ab17f8bfd9ae5820bfbd1a59f (diff)