diff options
-rw-r--r-- | Documentation/scheduler/sched-hmp.txt | 17 | ||||
-rw-r--r-- | include/linux/sched/sysctl.h | 1 | ||||
-rw-r--r-- | kernel/sched/fair.c | 33 | ||||
-rw-r--r-- | kernel/sched/hmp.c | 7 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 |
5 files changed, 59 insertions, 8 deletions
diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt index 22449aec5558..b400e053e55d 100644 --- a/Documentation/scheduler/sched-hmp.txt +++ b/Documentation/scheduler/sched-hmp.txt @@ -1220,6 +1220,23 @@ This tunable is a percentage. Configure the minimum demand of big sync waker task. Scheduler places small wakee tasks woken up by big sync waker on the waker's cluster. +*** 7.19 sched_prefer_sync_wakee_to_waker + +Appears at: /proc/sys/kernel/sched_prefer_sync_wakee_to_waker + +Default value: 0 + +The default sync wakee policy has a preference to select an idle CPU in the +waker cluster compared to the waker CPU running only 1 task. By selecting +an idle CPU, it eliminates the chance of waker migrating to a different CPU +after the wakee preempts it. This policy is also not susceptible to the +incorrect "sync" usage i.e the waker does not goto sleep after waking up +the wakee. + +However LPM exit latency associated with an idle CPU outweigh the above +benefits on some targets. When this knob is turned on, the waker CPU is +selected if it has only 1 runnable task. + ========================= 8. HMP SCHEDULER TRACE POINTS ========================= diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 1f9c2c734b20..861f715a673d 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -64,6 +64,7 @@ extern unsigned int sysctl_sched_pred_alert_freq; extern unsigned int sysctl_sched_freq_aggregate; extern unsigned int sysctl_sched_enable_thread_grouping; extern unsigned int sysctl_sched_freq_aggregate_threshold_pct; +extern unsigned int sysctl_sched_prefer_sync_wakee_to_waker; #else /* CONFIG_SCHED_HMP */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 14b8977d1be4..4489bec5d68a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2590,6 +2590,7 @@ static u32 __compute_runnable_contrib(u64 n) #define SBC_FLAG_COST_CSTATE_PREV_CPU_TIE_BREAKER 0x80 #define SBC_FLAG_CSTATE_LOAD 0x100 #define SBC_FLAG_BEST_SIBLING 0x200 +#define SBC_FLAG_WAKER_CPU 0x400 /* Cluster selection flag */ #define SBC_FLAG_COLOC_CLUSTER 0x10000 @@ -3060,6 +3061,15 @@ wake_to_waker_cluster(struct cpu_select_env *env) task_load(env->p) < sched_small_wakee_task_load; } +static inline bool +bias_to_waker_cpu(struct task_struct *p, int cpu) +{ + return sysctl_sched_prefer_sync_wakee_to_waker && + cpu_rq(cpu)->nr_running == 1 && + cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) && + cpu_active(cpu) && !cpu_isolated(cpu); +} + static inline int cluster_allowed(struct task_struct *p, struct sched_cluster *cluster) { @@ -3080,6 +3090,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, struct cluster_cpu_stats stats; struct related_thread_group *grp; unsigned int sbc_flag = 0; + int cpu = raw_smp_processor_id(); struct cpu_select_env env = { .p = p, @@ -3111,14 +3122,20 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, else env.rtg = grp; } else { - cluster = cpu_rq(smp_processor_id())->cluster; - if (wake_to_waker_cluster(&env) && - cluster_allowed(p, cluster)) { - env.need_waker_cluster = 1; - bitmap_zero(env.candidate_list, NR_CPUS); - __set_bit(cluster->id, env.candidate_list); - env.sbc_best_cluster_flag = SBC_FLAG_WAKER_CLUSTER; - + cluster = cpu_rq(cpu)->cluster; + if (wake_to_waker_cluster(&env)) { + if (bias_to_waker_cpu(p, cpu)) { + target = cpu; + sbc_flag = SBC_FLAG_WAKER_CLUSTER | + SBC_FLAG_WAKER_CPU; + goto out; + } else if (cluster_allowed(p, cluster)) { + env.need_waker_cluster = 1; + bitmap_zero(env.candidate_list, NR_CPUS); + __set_bit(cluster->id, env.candidate_list); + env.sbc_best_cluster_flag = + SBC_FLAG_WAKER_CLUSTER; + } } else if (bias_to_prev_cpu(&env, &stats)) { sbc_flag = SBC_FLAG_PREV_CPU; goto out; diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index 3bbf3ee46cf9..61e352aeec00 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -897,6 +897,13 @@ unsigned int __read_mostly sched_spill_load; unsigned int __read_mostly sysctl_sched_spill_load_pct = 100; /* + * Prefer the waker CPU for sync wakee task, if the CPU has only 1 runnable + * task. This eliminates the LPM exit latency associated with the idle + * CPUs in the waker cluster. + */ +unsigned int __read_mostly sysctl_sched_prefer_sync_wakee_to_waker; + +/* * Tasks whose bandwidth consumption on a cpu is more than * sched_upmigrate are considered "big" tasks. Big tasks will be * considered for "up" migration, i.e migrating to a cpu with better diff --git a/kernel/sysctl.c b/kernel/sysctl.c index dad3324e7372..cdce7d0f5a0e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -422,6 +422,15 @@ static struct ctl_table kern_table[] = { .extra2 = &one_hundred, }, { + .procname = "sched_prefer_sync_wakee_to_waker", + .data = &sysctl_sched_prefer_sync_wakee_to_waker, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "sched_enable_thread_grouping", .data = &sysctl_sched_enable_thread_grouping, .maxlen = sizeof(unsigned int), |