summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-hmp.txt17
-rw-r--r--include/linux/sched/sysctl.h1
-rw-r--r--kernel/sched/fair.c33
-rw-r--r--kernel/sched/hmp.c7
-rw-r--r--kernel/sysctl.c9
5 files changed, 59 insertions, 8 deletions
diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt
index 22449aec5558..b400e053e55d 100644
--- a/Documentation/scheduler/sched-hmp.txt
+++ b/Documentation/scheduler/sched-hmp.txt
@@ -1220,6 +1220,23 @@ This tunable is a percentage. Configure the minimum demand of big sync waker
task. Scheduler places small wakee tasks woken up by big sync waker on the
waker's cluster.
+*** 7.19 sched_prefer_sync_wakee_to_waker
+
+Appears at: /proc/sys/kernel/sched_prefer_sync_wakee_to_waker
+
+Default value: 0
+
+The default sync wakee policy has a preference to select an idle CPU in the
+waker cluster compared to the waker CPU running only 1 task. By selecting
+an idle CPU, it eliminates the chance of waker migrating to a different CPU
+after the wakee preempts it. This policy is also not susceptible to the
+incorrect "sync" usage i.e the waker does not goto sleep after waking up
+the wakee.
+
+However LPM exit latency associated with an idle CPU outweigh the above
+benefits on some targets. When this knob is turned on, the waker CPU is
+selected if it has only 1 runnable task.
+
=========================
8. HMP SCHEDULER TRACE POINTS
=========================
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1f9c2c734b20..861f715a673d 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -64,6 +64,7 @@ extern unsigned int sysctl_sched_pred_alert_freq;
extern unsigned int sysctl_sched_freq_aggregate;
extern unsigned int sysctl_sched_enable_thread_grouping;
extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
+extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker;
#else /* CONFIG_SCHED_HMP */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 14b8977d1be4..4489bec5d68a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2590,6 +2590,7 @@ static u32 __compute_runnable_contrib(u64 n)
#define SBC_FLAG_COST_CSTATE_PREV_CPU_TIE_BREAKER 0x80
#define SBC_FLAG_CSTATE_LOAD 0x100
#define SBC_FLAG_BEST_SIBLING 0x200
+#define SBC_FLAG_WAKER_CPU 0x400
/* Cluster selection flag */
#define SBC_FLAG_COLOC_CLUSTER 0x10000
@@ -3060,6 +3061,15 @@ wake_to_waker_cluster(struct cpu_select_env *env)
task_load(env->p) < sched_small_wakee_task_load;
}
+static inline bool
+bias_to_waker_cpu(struct task_struct *p, int cpu)
+{
+ return sysctl_sched_prefer_sync_wakee_to_waker &&
+ cpu_rq(cpu)->nr_running == 1 &&
+ cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) &&
+ cpu_active(cpu) && !cpu_isolated(cpu);
+}
+
static inline int
cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
{
@@ -3080,6 +3090,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
struct cluster_cpu_stats stats;
struct related_thread_group *grp;
unsigned int sbc_flag = 0;
+ int cpu = raw_smp_processor_id();
struct cpu_select_env env = {
.p = p,
@@ -3111,14 +3122,20 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
else
env.rtg = grp;
} else {
- cluster = cpu_rq(smp_processor_id())->cluster;
- if (wake_to_waker_cluster(&env) &&
- cluster_allowed(p, cluster)) {
- env.need_waker_cluster = 1;
- bitmap_zero(env.candidate_list, NR_CPUS);
- __set_bit(cluster->id, env.candidate_list);
- env.sbc_best_cluster_flag = SBC_FLAG_WAKER_CLUSTER;
-
+ cluster = cpu_rq(cpu)->cluster;
+ if (wake_to_waker_cluster(&env)) {
+ if (bias_to_waker_cpu(p, cpu)) {
+ target = cpu;
+ sbc_flag = SBC_FLAG_WAKER_CLUSTER |
+ SBC_FLAG_WAKER_CPU;
+ goto out;
+ } else if (cluster_allowed(p, cluster)) {
+ env.need_waker_cluster = 1;
+ bitmap_zero(env.candidate_list, NR_CPUS);
+ __set_bit(cluster->id, env.candidate_list);
+ env.sbc_best_cluster_flag =
+ SBC_FLAG_WAKER_CLUSTER;
+ }
} else if (bias_to_prev_cpu(&env, &stats)) {
sbc_flag = SBC_FLAG_PREV_CPU;
goto out;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 3bbf3ee46cf9..61e352aeec00 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -897,6 +897,13 @@ unsigned int __read_mostly sched_spill_load;
unsigned int __read_mostly sysctl_sched_spill_load_pct = 100;
/*
+ * Prefer the waker CPU for sync wakee task, if the CPU has only 1 runnable
+ * task. This eliminates the LPM exit latency associated with the idle
+ * CPUs in the waker cluster.
+ */
+unsigned int __read_mostly sysctl_sched_prefer_sync_wakee_to_waker;
+
+/*
* Tasks whose bandwidth consumption on a cpu is more than
* sched_upmigrate are considered "big" tasks. Big tasks will be
* considered for "up" migration, i.e migrating to a cpu with better
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index dad3324e7372..cdce7d0f5a0e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -422,6 +422,15 @@ static struct ctl_table kern_table[] = {
.extra2 = &one_hundred,
},
{
+ .procname = "sched_prefer_sync_wakee_to_waker",
+ .data = &sysctl_sched_prefer_sync_wakee_to_waker,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
.procname = "sched_enable_thread_grouping",
.data = &sysctl_sched_enable_thread_grouping,
.maxlen = sizeof(unsigned int),