summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c24
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/irq/chip.c5
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/irq/resend.c12
-rw-r--r--kernel/kprobes.c5
-rw-r--r--kernel/posix-timers.c9
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--kernel/printk.c15
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/sched.c402
-rw-r--r--kernel/sched_debug.c30
-rw-r--r--kernel/sched_fair.c225
-rw-r--r--kernel/sched_idletask.c10
-rw-r--r--kernel/sched_rt.c48
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/clockevents.c10
19 files changed, 406 insertions, 418 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a777d3761416..04f3ffb8d9d4 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1992,19 +1992,19 @@ int __audit_signal_info(int sig, struct task_struct *t)
extern uid_t audit_sig_uid;
extern u32 audit_sig_sid;
- if (audit_pid && t->tgid == audit_pid &&
- (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) {
- audit_sig_pid = tsk->pid;
- if (ctx)
- audit_sig_uid = ctx->loginuid;
- else
- audit_sig_uid = tsk->uid;
- selinux_get_task_sid(tsk, &audit_sig_sid);
+ if (audit_pid && t->tgid == audit_pid) {
+ if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
+ audit_sig_pid = tsk->pid;
+ if (ctx)
+ audit_sig_uid = ctx->loginuid;
+ else
+ audit_sig_uid = tsk->uid;
+ selinux_get_task_sid(tsk, &audit_sig_sid);
+ }
+ if (!audit_signals || audit_dummy_context())
+ return 0;
}
- if (!audit_signals) /* audit_context checked in wrapper */
- return 0;
-
/* optimize the common case by putting first signal recipient directly
* in audit_context */
if (!ctx->target_pid) {
@@ -2023,7 +2023,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
axp->d.next = ctx->aux_pids;
ctx->aux_pids = (void *)axp;
}
- BUG_ON(axp->pid_count > AUDIT_AUX_PIDS);
+ BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS);
axp->target_pid[axp->pid_count] = t->tgid;
selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]);
diff --git a/kernel/exit.c b/kernel/exit.c
index 464c2b172f07..9578c1ae19ca 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -813,7 +813,7 @@ static void exit_notify(struct task_struct *tsk)
__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
}
- /* Let father know we died
+ /* Let father know we died
*
* Thread signals are configurable, but you aren't going to use
* that to send signals to arbitary processes.
@@ -826,9 +826,7 @@ static void exit_notify(struct task_struct *tsk)
* If our self_exec id doesn't match our parent_exec_id then
* we have changed execution domain as these two values started
* the same after a fork.
- *
*/
-
if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
( tsk->parent_exec_id != t->self_exec_id ||
tsk->self_exec_id != tsk->parent_exec_id)
@@ -848,9 +846,7 @@ static void exit_notify(struct task_struct *tsk)
}
state = EXIT_ZOMBIE;
- if (tsk->exit_signal == -1 &&
- (likely(tsk->ptrace == 0) ||
- unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
+ if (tsk->exit_signal == -1 && likely(!tsk->ptrace))
state = EXIT_DEAD;
tsk->exit_state = state;
diff --git a/kernel/futex.c b/kernel/futex.c
index 3415e9ad1391..e8935b195e88 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1670,6 +1670,7 @@ pi_faulted:
attempt);
if (ret)
goto out;
+ uval = 0;
goto retry_unlocked;
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 615ce97c6cfd..f1a73f0b54e7 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -352,13 +352,10 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
* keep it masked and get out of here
*/
action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
- desc->status |= IRQ_PENDING;
+ if (unlikely(!action || (desc->status & IRQ_DISABLED)))
goto out_unlock;
- }
desc->status |= IRQ_INPROGRESS;
- desc->status &= ~IRQ_PENDING;
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 203a518b6f14..853aefbd184b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -462,7 +462,9 @@ void free_irq(unsigned int irq, void *dev_id)
* We do this after actually deregistering it, to make sure that
* a 'real' IRQ doesn't run in parallel with our fake
*/
+ local_irq_save(flags);
handler(irq, dev_id);
+ local_irq_restore(flags);
}
#endif
}
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index c38272746887..a8046791ba2d 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -63,15 +63,11 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
desc->chip->enable(irq);
/*
- * Temporary hack to figure out more about the problem, which
- * is causing the ancient network cards to die.
+ * We do not resend level type interrupts. Level type
+ * interrupts are resent by hardware when they are still
+ * active.
*/
- if (desc->handle_irq != handle_edge_irq) {
- WARN_ON_ONCE(1);
- return;
- }
-
- if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
if (!desc->chip || !desc->chip->retrigger ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3e9f513a728d..4b8a4493c541 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1063,6 +1063,11 @@ EXPORT_SYMBOL_GPL(register_kprobe);
EXPORT_SYMBOL_GPL(unregister_kprobe);
EXPORT_SYMBOL_GPL(register_jprobe);
EXPORT_SYMBOL_GPL(unregister_jprobe);
+#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(jprobe_return);
+#endif
+
+#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(register_kretprobe);
EXPORT_SYMBOL_GPL(unregister_kretprobe);
+#endif
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 55b3761edaa9..7a15afb73ed0 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -547,9 +547,9 @@ sys_timer_create(const clockid_t which_clock,
new_timer->it_process = process;
list_add(&new_timer->list,
&process->signal->posix_timers);
- spin_unlock_irqrestore(&process->sighand->siglock, flags);
if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
get_task_struct(process);
+ spin_unlock_irqrestore(&process->sighand->siglock, flags);
} else {
spin_unlock_irqrestore(&process->sighand->siglock, flags);
process = NULL;
@@ -605,13 +605,14 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
if (timr) {
spin_lock(&timr->it_lock);
- spin_unlock(&idr_lock);
if ((timr->it_id != timer_id) || !(timr->it_process) ||
timr->it_process->tgid != current->tgid) {
- unlock_timer(timr, *flags);
+ spin_unlock(&timr->it_lock);
+ spin_unlock_irqrestore(&idr_lock, *flags);
timr = NULL;
- }
+ } else
+ spin_unlock(&idr_lock);
} else
spin_unlock_irqrestore(&idr_lock, *flags);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index a3b7854b8f7c..a686590d88c1 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -709,7 +709,8 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
region->end_pfn << PAGE_SHIFT);
for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
- memory_bm_set_bit(bm, pfn);
+ if (pfn_valid(pfn))
+ memory_bm_set_bit(bm, pfn);
}
}
diff --git a/kernel/printk.c b/kernel/printk.c
index 051d27e36a6c..8451dfc31d25 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -732,7 +732,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
return 0;
}
-int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
+int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
{
struct console_cmdline *c;
int i;
@@ -1083,6 +1083,19 @@ int unregister_console(struct console *console)
}
EXPORT_SYMBOL(unregister_console);
+static int __init disable_boot_consoles(void)
+{
+ if (console_drivers != NULL) {
+ if (console_drivers->flags & CON_BOOT) {
+ printk(KERN_INFO "turn off boot console %s%d\n",
+ console_drivers->name, console_drivers->index);
+ return unregister_console(console_drivers);
+ }
+ }
+ return 0;
+}
+late_initcall(disable_boot_consoles);
+
/**
* tty_write_message - write a message to a certain tty, not just the console.
* @tty: the destination tty_struct
diff --git a/kernel/profile.c b/kernel/profile.c
index 5b20fe977bed..cb1e37d2dac3 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -199,11 +199,11 @@ EXPORT_SYMBOL_GPL(register_timer_hook);
EXPORT_SYMBOL_GPL(unregister_timer_hook);
EXPORT_SYMBOL_GPL(task_handoff_register);
EXPORT_SYMBOL_GPL(task_handoff_unregister);
+EXPORT_SYMBOL_GPL(profile_event_register);
+EXPORT_SYMBOL_GPL(profile_event_unregister);
#endif /* CONFIG_PROFILING */
-EXPORT_SYMBOL_GPL(profile_event_register);
-EXPORT_SYMBOL_GPL(profile_event_unregister);
#ifdef CONFIG_SMP
/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 72bb9483d949..45e17b83b7f1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -263,6 +263,7 @@ struct rq {
unsigned int clock_warps, clock_overflows;
unsigned int clock_unstable_events;
+ u64 tick_timestamp;
atomic_t nr_iowait;
@@ -318,15 +319,19 @@ static inline int cpu_of(struct rq *rq)
}
/*
- * Per-runqueue clock, as finegrained as the platform can give us:
+ * Update the per-runqueue clock, as finegrained as the platform can give
+ * us, but without assuming monotonicity, etc.:
*/
-static unsigned long long __rq_clock(struct rq *rq)
+static void __update_rq_clock(struct rq *rq)
{
u64 prev_raw = rq->prev_clock_raw;
u64 now = sched_clock();
s64 delta = now - prev_raw;
u64 clock = rq->clock;
+#ifdef CONFIG_SCHED_DEBUG
+ WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+#endif
/*
* Protect against sched_clock() occasionally going backwards:
*/
@@ -337,8 +342,11 @@ static unsigned long long __rq_clock(struct rq *rq)
/*
* Catch too large forward jumps too:
*/
- if (unlikely(delta > 2*TICK_NSEC)) {
- clock++;
+ if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) {
+ if (clock < rq->tick_timestamp + TICK_NSEC)
+ clock = rq->tick_timestamp + TICK_NSEC;
+ else
+ clock++;
rq->clock_overflows++;
} else {
if (unlikely(delta > rq->clock_max_delta))
@@ -349,18 +357,12 @@ static unsigned long long __rq_clock(struct rq *rq)
rq->prev_clock_raw = now;
rq->clock = clock;
-
- return clock;
}
-static inline unsigned long long rq_clock(struct rq *rq)
+static void update_rq_clock(struct rq *rq)
{
- int this_cpu = smp_processor_id();
-
- if (this_cpu == cpu_of(rq))
- return __rq_clock(rq);
-
- return rq->clock;
+ if (likely(smp_processor_id() == cpu_of(rq)))
+ __update_rq_clock(rq);
}
/*
@@ -386,9 +388,12 @@ unsigned long long cpu_clock(int cpu)
{
unsigned long long now;
unsigned long flags;
+ struct rq *rq;
local_irq_save(flags);
- now = rq_clock(cpu_rq(cpu));
+ rq = cpu_rq(cpu);
+ update_rq_clock(rq);
+ now = rq->clock;
local_irq_restore(flags);
return now;
@@ -637,6 +642,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
#define WMULT_SHIFT 32
+/*
+ * Shift right and round:
+ */
+#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+
static unsigned long
calc_delta_mine(unsigned long delta_exec, unsigned long weight,
struct load_weight *lw)
@@ -644,18 +654,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
u64 tmp;
if (unlikely(!lw->inv_weight))
- lw->inv_weight = WMULT_CONST / lw->weight;
+ lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
tmp = (u64)delta_exec * weight;
/*
* Check whether we'd overflow the 64-bit multiplication:
*/
- if (unlikely(tmp > WMULT_CONST)) {
- tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight)
- >> (WMULT_SHIFT/2);
- } else {
- tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
- }
+ if (unlikely(tmp > WMULT_CONST))
+ tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+ WMULT_SHIFT/2);
+ else
+ tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
}
@@ -703,11 +712,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
* the relative distance between them is ~25%.)
*/
static const int prio_to_weight[40] = {
-/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
-/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280,
-/* 0 */ NICE_0_LOAD /* 1024 */,
-/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137,
-/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15,
+ /* -20 */ 88761, 71755, 56483, 46273, 36291,
+ /* -15 */ 29154, 23254, 18705, 14949, 11916,
+ /* -10 */ 9548, 7620, 6100, 4904, 3906,
+ /* -5 */ 3121, 2501, 1991, 1586, 1277,
+ /* 0 */ 1024, 820, 655, 526, 423,
+ /* 5 */ 335, 272, 215, 172, 137,
+ /* 10 */ 110, 87, 70, 56, 45,
+ /* 15 */ 36, 29, 23, 18, 15,
};
/*
@@ -718,14 +730,14 @@ static const int prio_to_weight[40] = {
* into multiplications:
*/
static const u32 prio_to_wmult[40] = {
-/* -20 */ 48356, 60446, 75558, 94446, 118058,
-/* -15 */ 147573, 184467, 230589, 288233, 360285,
-/* -10 */ 450347, 562979, 703746, 879575, 1099582,
-/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443,
-/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518,
-/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126,
-/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717,
-/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+ /* -20 */ 48388, 59856, 76040, 92818, 118348,
+ /* -15 */ 147320, 184698, 229616, 287308, 360437,
+ /* -10 */ 449829, 563644, 704093, 875809, 1099582,
+ /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
+ /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
+ /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
+ /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
+ /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
@@ -745,8 +757,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
- int this_best_prio, int best_prio, int best_prio_seen,
- struct rq_iterator *iterator);
+ int *this_best_prio, struct rq_iterator *iterator);
#include "sched_stats.h"
#include "sched_rt.c"
@@ -782,14 +793,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
* This function is called /before/ updating rq->ls.load
* and when switching tasks.
*/
-static void update_curr_load(struct rq *rq, u64 now)
+static void update_curr_load(struct rq *rq)
{
struct load_stat *ls = &rq->ls;
u64 start;
start = ls->load_update_start;
- ls->load_update_start = now;
- ls->delta_stat += now - start;
+ ls->load_update_start = rq->clock;
+ ls->delta_stat += rq->clock - start;
/*
* Stagger updates to ls->delta_fair. Very frequent updates
* can be expensive.
@@ -798,30 +809,28 @@ static void update_curr_load(struct rq *rq, u64 now)
__update_curr_load(rq, ls);
}
-static inline void
-inc_load(struct rq *rq, const struct task_struct *p, u64 now)
+static inline void inc_load(struct rq *rq, const struct task_struct *p)
{
- update_curr_load(rq, now);
+ update_curr_load(rq);
update_load_add(&rq->ls.load, p->se.load.weight);
}
-static inline void
-dec_load(struct rq *rq, const struct task_struct *p, u64 now)
+static inline void dec_load(struct rq *rq, const struct task_struct *p)
{
- update_curr_load(rq, now);
+ update_curr_load(rq);
update_load_sub(&rq->ls.load, p->se.load.weight);
}
-static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+static void inc_nr_running(struct task_struct *p, struct rq *rq)
{
rq->nr_running++;
- inc_load(rq, p, now);
+ inc_load(rq, p);
}
-static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+static void dec_nr_running(struct task_struct *p, struct rq *rq)
{
rq->nr_running--;
- dec_load(rq, p, now);
+ dec_load(rq, p);
}
static void set_load_weight(struct task_struct *p)
@@ -848,18 +857,16 @@ static void set_load_weight(struct task_struct *p)
p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
}
-static void
-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
{
sched_info_queued(p);
- p->sched_class->enqueue_task(rq, p, wakeup, now);
+ p->sched_class->enqueue_task(rq, p, wakeup);
p->se.on_rq = 1;
}
-static void
-dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
{
- p->sched_class->dequeue_task(rq, p, sleep, now);
+ p->sched_class->dequeue_task(rq, p, sleep);
p->se.on_rq = 0;
}
@@ -914,13 +921,11 @@ static int effective_prio(struct task_struct *p)
*/
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
- u64 now = rq_clock(rq);
-
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible--;
- enqueue_task(rq, p, wakeup, now);
- inc_nr_running(p, rq, now);
+ enqueue_task(rq, p, wakeup);
+ inc_nr_running(p, rq);
}
/*
@@ -928,13 +933,13 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
*/
static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
{
- u64 now = rq_clock(rq);
+ update_rq_clock(rq);
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible--;
- enqueue_task(rq, p, 0, now);
- inc_nr_running(p, rq, now);
+ enqueue_task(rq, p, 0);
+ inc_nr_running(p, rq);
}
/*
@@ -942,13 +947,11 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
*/
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
{
- u64 now = rq_clock(rq);
-
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible++;
- dequeue_task(rq, p, sleep, now);
- dec_nr_running(p, rq, now);
+ dequeue_task(rq, p, sleep);
+ dec_nr_running(p, rq);
}
/**
@@ -1516,6 +1519,7 @@ out_set_cpu:
out_activate:
#endif /* CONFIG_SMP */
+ update_rq_clock(rq);
activate_task(rq, p, 1);
/*
* Sync wakeups (i.e. those types of wakeups where the waker
@@ -1647,12 +1651,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
unsigned long flags;
struct rq *rq;
int this_cpu;
- u64 now;
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_RUNNING);
this_cpu = smp_processor_id(); /* parent's CPU */
- now = rq_clock(rq);
+ update_rq_clock(rq);
p->prio = effective_prio(p);
@@ -1666,8 +1669,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
* Let the scheduling class do new task startup
* management (if any):
*/
- p->sched_class->task_new(rq, p, now);
- inc_nr_running(p, rq, now);
+ p->sched_class->task_new(rq, p);
+ inc_nr_running(p, rq);
}
check_preempt_curr(rq, p);
task_rq_unlock(rq, &flags);
@@ -1954,7 +1957,6 @@ static void update_cpu_load(struct rq *this_rq)
unsigned long total_load = this_rq->ls.load.weight;
unsigned long this_load = total_load;
struct load_stat *ls = &this_rq->ls;
- u64 now = __rq_clock(this_rq);
int i, scale;
this_rq->nr_load_updates++;
@@ -1962,7 +1964,7 @@ static void update_cpu_load(struct rq *this_rq)
goto do_avg;
/* Update delta_fair/delta_exec fields first */
- update_curr_load(this_rq, now);
+ update_curr_load(this_rq);
fair_delta64 = ls->delta_fair + 1;
ls->delta_fair = 0;
@@ -1970,8 +1972,8 @@ static void update_cpu_load(struct rq *this_rq)
exec_delta64 = ls->delta_exec + 1;
ls->delta_exec = 0;
- sample_interval64 = now - ls->load_update_last;
- ls->load_update_last = now;
+ sample_interval64 = this_rq->clock - ls->load_update_last;
+ ls->load_update_last = this_rq->clock;
if ((s64)sample_interval64 < (s64)TICK_NSEC)
sample_interval64 = TICK_NSEC;
@@ -2026,6 +2028,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
spin_lock(&rq1->lock);
}
}
+ update_rq_clock(rq1);
+ update_rq_clock(rq2);
}
/*
@@ -2166,8 +2170,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
- int this_best_prio, int best_prio, int best_prio_seen,
- struct rq_iterator *iterator)
+ int *this_best_prio, struct rq_iterator *iterator)
{
int pulled = 0, pinned = 0, skip_for_load;
struct task_struct *p;
@@ -2192,12 +2195,8 @@ next:
*/
skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
SCHED_LOAD_SCALE_FUZZ;
- if (skip_for_load && p->prio < this_best_prio)
- skip_for_load = !best_prio_seen && p->prio == best_prio;
- if (skip_for_load ||
+ if ((skip_for_load && p->prio >= *this_best_prio) ||
!can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
-
- best_prio_seen |= p->prio == best_prio;
p = iterator->next(iterator->arg);
goto next;
}
@@ -2211,8 +2210,8 @@ next:
* and the prescribed amount of weighted load.
*/
if (pulled < max_nr_move && rem_load_move > 0) {
- if (p->prio < this_best_prio)
- this_best_prio = p->prio;
+ if (p->prio < *this_best_prio)
+ *this_best_prio = p->prio;
p = iterator->next(iterator->arg);
goto next;
}
@@ -2231,32 +2230,52 @@ out:
}
/*
- * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
- * load from busiest to this_rq, as part of a balancing operation within
- * "domain". Returns the number of tasks moved.
+ * move_tasks tries to move up to max_load_move weighted load from busiest to
+ * this_rq, as part of a balancing operation within domain "sd".
+ * Returns 1 if successful and 0 otherwise.
*
* Called with both runqueues locked.
*/
static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
- unsigned long max_nr_move, unsigned long max_load_move,
+ unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned)
{
struct sched_class *class = sched_class_highest;
- unsigned long load_moved, total_nr_moved = 0, nr_moved;
- long rem_load_move = max_load_move;
+ unsigned long total_load_moved = 0;
+ int this_best_prio = this_rq->curr->prio;
do {
- nr_moved = class->load_balance(this_rq, this_cpu, busiest,
- max_nr_move, (unsigned long)rem_load_move,
- sd, idle, all_pinned, &load_moved);
- total_nr_moved += nr_moved;
- max_nr_move -= nr_moved;
- rem_load_move -= load_moved;
+ total_load_moved +=
+ class->load_balance(this_rq, this_cpu, busiest,
+ ULONG_MAX, max_load_move - total_load_moved,
+ sd, idle, all_pinned, &this_best_prio);
class = class->next;
- } while (class && max_nr_move && rem_load_move > 0);
+ } while (class && max_load_move > total_load_moved);
- return total_nr_moved;
+ return total_load_moved > 0;
+}
+
+/*
+ * move_one_task tries to move exactly one task from busiest to this_rq, as
+ * part of active balancing operations within "domain".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ struct sched_domain *sd, enum cpu_idle_type idle)
+{
+ struct sched_class *class;
+ int this_best_prio = MAX_PRIO;
+
+ for (class = sched_class_highest; class; class = class->next)
+ if (class->load_balance(this_rq, this_cpu, busiest,
+ 1, ULONG_MAX, sd, idle, NULL,
+ &this_best_prio))
+ return 1;
+
+ return 0;
}
/*
@@ -2588,11 +2607,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
*/
#define MAX_PINNED_INTERVAL 512
-static inline unsigned long minus_1_or_zero(unsigned long n)
-{
- return n > 0 ? n - 1 : 0;
-}
-
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
@@ -2601,7 +2615,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
int *balance)
{
- int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
+ int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group;
unsigned long imbalance;
struct rq *busiest;
@@ -2642,18 +2656,17 @@ redo:
schedstat_add(sd, lb_imbalance[idle], imbalance);
- nr_moved = 0;
+ ld_moved = 0;
if (busiest->nr_running > 1) {
/*
* Attempt to move tasks. If find_busiest_group has found
* an imbalance but busiest->nr_running <= 1, the group is
- * still unbalanced. nr_moved simply stays zero, so it is
+ * still unbalanced. ld_moved simply stays zero, so it is
* correctly treated as an imbalance.
*/
local_irq_save(flags);
double_rq_lock(this_rq, busiest);
- nr_moved = move_tasks(this_rq, this_cpu, busiest,
- minus_1_or_zero(busiest->nr_running),
+ ld_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, idle, &all_pinned);
double_rq_unlock(this_rq, busiest);
local_irq_restore(flags);
@@ -2661,7 +2674,7 @@ redo:
/*
* some other cpu did the load balance for us.
*/
- if (nr_moved && this_cpu != smp_processor_id())
+ if (ld_moved && this_cpu != smp_processor_id())
resched_cpu(this_cpu);
/* All tasks on this runqueue were pinned by CPU affinity */
@@ -2673,7 +2686,7 @@ redo:
}
}
- if (!nr_moved) {
+ if (!ld_moved) {
schedstat_inc(sd, lb_failed[idle]);
sd->nr_balance_failed++;
@@ -2722,10 +2735,10 @@ redo:
sd->balance_interval *= 2;
}
- if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+ if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
return -1;
- return nr_moved;
+ return ld_moved;
out_balanced:
schedstat_inc(sd, lb_balanced[idle]);
@@ -2757,7 +2770,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
struct sched_group *group;
struct rq *busiest = NULL;
unsigned long imbalance;
- int nr_moved = 0;
+ int ld_moved = 0;
int sd_idle = 0;
int all_pinned = 0;
cpumask_t cpus = CPU_MASK_ALL;
@@ -2792,12 +2805,13 @@ redo:
schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
- nr_moved = 0;
+ ld_moved = 0;
if (busiest->nr_running > 1) {
/* Attempt to move tasks */
double_lock_balance(this_rq, busiest);
- nr_moved = move_tasks(this_rq, this_cpu, busiest,
- minus_1_or_zero(busiest->nr_running),
+ /* this_rq->clock is already updated */
+ update_rq_clock(busiest);
+ ld_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, CPU_NEWLY_IDLE,
&all_pinned);
spin_unlock(&busiest->lock);
@@ -2809,7 +2823,7 @@ redo:
}
}
- if (!nr_moved) {
+ if (!ld_moved) {
schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
@@ -2817,7 +2831,7 @@ redo:
} else
sd->nr_balance_failed = 0;
- return nr_moved;
+ return ld_moved;
out_balanced:
schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
@@ -2894,6 +2908,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
/* move a task from busiest_rq to target_rq */
double_lock_balance(busiest_rq, target_rq);
+ update_rq_clock(busiest_rq);
+ update_rq_clock(target_rq);
/* Search for an sd spanning us and the target CPU. */
for_each_domain(target_cpu, sd) {
@@ -2905,8 +2921,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
if (likely(sd)) {
schedstat_inc(sd, alb_cnt);
- if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
- ULONG_MAX, sd, CPU_IDLE, NULL))
+ if (move_one_task(target_rq, target_cpu, busiest_rq,
+ sd, CPU_IDLE))
schedstat_inc(sd, alb_pushed);
else
schedstat_inc(sd, alb_failed);
@@ -3090,7 +3106,7 @@ static void run_rebalance_domains(struct softirq_action *h)
if (need_resched())
break;
- rebalance_domains(balance_cpu, SCHED_IDLE);
+ rebalance_domains(balance_cpu, CPU_IDLE);
rq = cpu_rq(balance_cpu);
if (time_after(this_rq->next_balance, rq->next_balance))
@@ -3175,8 +3191,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
- int this_best_prio, int best_prio, int best_prio_seen,
- struct rq_iterator *iterator)
+ int *this_best_prio, struct rq_iterator *iterator)
{
*load_moved = 0;
@@ -3202,7 +3217,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
rq = task_rq_lock(p, &flags);
ns = p->se.sum_exec_runtime;
if (rq->curr == p) {
- delta_exec = rq_clock(rq) - p->se.exec_start;
+ update_rq_clock(rq);
+ delta_exec = rq->clock - p->se.exec_start;
if ((s64)delta_exec > 0)
ns += delta_exec;
}
@@ -3296,11 +3312,19 @@ void scheduler_tick(void)
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
+ u64 next_tick = rq->tick_timestamp + TICK_NSEC;
spin_lock(&rq->lock);
+ __update_rq_clock(rq);
+ /*
+ * Let rq->clock advance by at least TICK_NSEC:
+ */
+ if (unlikely(rq->clock < next_tick))
+ rq->clock = next_tick;
+ rq->tick_timestamp = rq->clock;
+ update_cpu_load(rq);
if (curr != rq->idle) /* FIXME: needed? */
curr->sched_class->task_tick(rq, curr);
- update_cpu_load(rq);
spin_unlock(&rq->lock);
#ifdef CONFIG_SMP
@@ -3382,7 +3406,7 @@ static inline void schedule_debug(struct task_struct *prev)
* Pick up the highest-prio task:
*/
static inline struct task_struct *
-pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
+pick_next_task(struct rq *rq, struct task_struct *prev)
{
struct sched_class *class;
struct task_struct *p;
@@ -3392,14 +3416,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
* the fair class we can call that function directly:
*/
if (likely(rq->nr_running == rq->cfs.nr_running)) {
- p = fair_sched_class.pick_next_task(rq, now);
+ p = fair_sched_class.pick_next_task(rq);
if (likely(p))
return p;
}
class = sched_class_highest;
for ( ; ; ) {
- p = class->pick_next_task(rq, now);
+ p = class->pick_next_task(rq);
if (p)
return p;
/*
@@ -3418,7 +3442,6 @@ asmlinkage void __sched schedule(void)
struct task_struct *prev, *next;
long *switch_count;
struct rq *rq;
- u64 now;
int cpu;
need_resched:
@@ -3436,6 +3459,7 @@ need_resched_nonpreemptible:
spin_lock_irq(&rq->lock);
clear_tsk_need_resched(prev);
+ __update_rq_clock(rq);
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
@@ -3450,9 +3474,8 @@ need_resched_nonpreemptible:
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
- now = __rq_clock(rq);
- prev->sched_class->put_prev_task(rq, prev, now);
- next = pick_next_task(rq, prev, now);
+ prev->sched_class->put_prev_task(rq, prev);
+ next = pick_next_task(rq, prev);
sched_info_switch(prev, next);
@@ -3895,17 +3918,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
unsigned long flags;
int oldprio, on_rq;
struct rq *rq;
- u64 now;
BUG_ON(prio < 0 || prio > MAX_PRIO);
rq = task_rq_lock(p, &flags);
- now = rq_clock(rq);
+ update_rq_clock(rq);
oldprio = p->prio;
on_rq = p->se.on_rq;
if (on_rq)
- dequeue_task(rq, p, 0, now);
+ dequeue_task(rq, p, 0);
if (rt_prio(prio))
p->sched_class = &rt_sched_class;
@@ -3915,7 +3937,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
p->prio = prio;
if (on_rq) {
- enqueue_task(rq, p, 0, now);
+ enqueue_task(rq, p, 0);
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
@@ -3938,7 +3960,6 @@ void set_user_nice(struct task_struct *p, long nice)
int old_prio, delta, on_rq;
unsigned long flags;
struct rq *rq;
- u64 now;
if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
return;
@@ -3947,7 +3968,7 @@ void set_user_nice(struct task_struct *p, long nice)
* the task might be in the middle of scheduling on another CPU.
*/
rq = task_rq_lock(p, &flags);
- now = rq_clock(rq);
+ update_rq_clock(rq);
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
@@ -3960,8 +3981,8 @@ void set_user_nice(struct task_struct *p, long nice)
}
on_rq = p->se.on_rq;
if (on_rq) {
- dequeue_task(rq, p, 0, now);
- dec_load(rq, p, now);
+ dequeue_task(rq, p, 0);
+ dec_load(rq, p);
}
p->static_prio = NICE_TO_PRIO(nice);
@@ -3971,8 +3992,8 @@ void set_user_nice(struct task_struct *p, long nice)
delta = p->prio - old_prio;
if (on_rq) {
- enqueue_task(rq, p, 0, now);
- inc_load(rq, p, now);
+ enqueue_task(rq, p, 0);
+ inc_load(rq, p);
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
@@ -4208,6 +4229,7 @@ recheck:
spin_unlock_irqrestore(&p->pi_lock, flags);
goto recheck;
}
+ update_rq_clock(rq);
on_rq = p->se.on_rq;
if (on_rq)
deactivate_task(rq, p, 0);
@@ -4463,10 +4485,8 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
out_unlock:
read_unlock(&tasklist_lock);
mutex_unlock(&sched_hotcpu_mutex);
- if (retval)
- return retval;
- return 0;
+ return retval;
}
/**
@@ -4966,6 +4986,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
on_rq = p->se.on_rq;
if (on_rq)
deactivate_task(rq_src, p, 0);
+
set_task_cpu(p, dest_cpu);
if (on_rq) {
activate_task(rq_dest, p, 0);
@@ -5198,7 +5219,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
for ( ; ; ) {
if (!rq->nr_running)
break;
- next = pick_next_task(rq, rq->curr, rq_clock(rq));
+ update_rq_clock(rq);
+ next = pick_next_task(rq, rq->curr);
if (!next)
break;
migrate_dead(dead_cpu, next);
@@ -5210,12 +5232,19 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
static struct ctl_table sd_ctl_dir[] = {
- {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, },
+ {
+ .procname = "sched_domain",
+ .mode = 0755,
+ },
{0,},
};
static struct ctl_table sd_ctl_root[] = {
- {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, },
+ {
+ .procname = "kernel",
+ .mode = 0755,
+ .child = sd_ctl_dir,
+ },
{0,},
};
@@ -5231,11 +5260,10 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
}
static void
-set_table_entry(struct ctl_table *entry, int ctl_name,
+set_table_entry(struct ctl_table *entry,
const char *procname, void *data, int maxlen,
mode_t mode, proc_handler *proc_handler)
{
- entry->ctl_name = ctl_name;
entry->procname = procname;
entry->data = data;
entry->maxlen = maxlen;
@@ -5248,28 +5276,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
{
struct ctl_table *table = sd_alloc_ctl_entry(14);
- set_table_entry(&table[0], 1, "min_interval", &sd->min_interval,
+ set_table_entry(&table[0], "min_interval", &sd->min_interval,
sizeof(long), 0644, proc_doulongvec_minmax);
- set_table_entry(&table[1], 2, "max_interval", &sd->max_interval,
+ set_table_entry(&table[1], "max_interval", &sd->max_interval,
sizeof(long), 0644, proc_doulongvec_minmax);
- set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx,
+ set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx,
+ set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx,
+ set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx,
+ set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx,
+ set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor,
+ set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
+ set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[10], 11, "cache_nice_tries",
+ set_table_entry(&table[10], "cache_nice_tries",
&sd->cache_nice_tries,
sizeof(int), 0644, proc_dointvec_minmax);
- set_table_entry(&table[12], 13, "flags", &sd->flags,
+ set_table_entry(&table[12], "flags", &sd->flags,
sizeof(int), 0644, proc_dointvec_minmax);
return table;
@@ -5289,7 +5317,6 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
i = 0;
for_each_domain(cpu, sd) {
snprintf(buf, 32, "domain%d", i);
- entry->ctl_name = i + 1;
entry->procname = kstrdup(buf, GFP_KERNEL);
entry->mode = 0755;
entry->child = sd_alloc_ctl_domain_table(sd);
@@ -5310,7 +5337,6 @@ static void init_sched_domain_sysctl(void)
for (i = 0; i < cpu_num; i++, entry++) {
snprintf(buf, 32, "cpu%d", i);
- entry->ctl_name = i + 1;
entry->procname = kstrdup(buf, GFP_KERNEL);
entry->mode = 0755;
entry->child = sd_alloc_ctl_cpu_table(i);
@@ -5379,6 +5405,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
rq->migration_thread = NULL;
/* Idle task back to normal (off runqueue, low prio) */
rq = task_rq_lock(rq->idle, &flags);
+ update_rq_clock(rq);
deactivate_task(rq, rq->idle, 0);
rq->idle->static_prio = MAX_PRIO;
__setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
@@ -6301,7 +6328,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
+static int arch_reinit_sched_domains(void)
{
int err;
@@ -6330,24 +6357,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
return ret ? ret : count;
}
-int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
-{
- int err = 0;
-
-#ifdef CONFIG_SCHED_SMT
- if (smt_capable())
- err = sysfs_create_file(&cls->kset.kobj,
- &attr_sched_smt_power_savings.attr);
-#endif
-#ifdef CONFIG_SCHED_MC
- if (!err && mc_capable())
- err = sysfs_create_file(&cls->kset.kobj,
- &attr_sched_mc_power_savings.attr);
-#endif
- return err;
-}
-#endif
-
#ifdef CONFIG_SCHED_MC
static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
{
@@ -6358,8 +6367,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
{
return sched_power_savings_store(buf, count, 0);
}
-SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
- sched_mc_power_savings_store);
+static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
+ sched_mc_power_savings_store);
#endif
#ifdef CONFIG_SCHED_SMT
@@ -6372,8 +6381,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
{
return sched_power_savings_store(buf, count, 1);
}
-SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
- sched_smt_power_savings_store);
+static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
+ sched_smt_power_savings_store);
+#endif
+
+int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+{
+ int err = 0;
+
+#ifdef CONFIG_SCHED_SMT
+ if (smt_capable())
+ err = sysfs_create_file(&cls->kset.kobj,
+ &attr_sched_smt_power_savings.attr);
+#endif
+#ifdef CONFIG_SCHED_MC
+ if (!err && mc_capable())
+ err = sysfs_create_file(&cls->kset.kobj,
+ &attr_sched_mc_power_savings.attr);
+#endif
+ return err;
+}
#endif
/*
@@ -6616,12 +6643,13 @@ void normalize_rt_tasks(void)
goto out_unlock;
#endif
+ update_rq_clock(rq);
on_rq = p->se.on_rq;
if (on_rq)
- deactivate_task(task_rq(p), p, 0);
+ deactivate_task(rq, p, 0);
__setscheduler(rq, p, SCHED_NORMAL, 0);
if (on_rq) {
- activate_task(task_rq(p), p, 0);
+ activate_task(rq, p, 0);
resched_task(rq->curr);
}
#ifdef CONFIG_SMP
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1c61e5315ad2..87e524762b85 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -29,34 +29,34 @@
} while (0)
static void
-print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now)
+print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
{
if (rq->curr == p)
SEQ_printf(m, "R");
else
SEQ_printf(m, " ");
- SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d "
- "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
+ SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d ",
p->comm, p->pid,
(long long)p->se.fair_key,
(long long)(p->se.fair_key - rq->cfs.fair_clock),
(long long)p->se.wait_runtime,
(long long)(p->nvcsw + p->nivcsw),
- p->prio,
+ p->prio);
#ifdef CONFIG_SCHEDSTATS
+ SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
(long long)p->se.sum_exec_runtime,
(long long)p->se.sum_wait_runtime,
(long long)p->se.sum_sleep_runtime,
(long long)p->se.wait_runtime_overruns,
- (long long)p->se.wait_runtime_underruns
+ (long long)p->se.wait_runtime_underruns);
#else
- 0LL, 0LL, 0LL, 0LL, 0LL
+ SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
+ 0LL, 0LL, 0LL, 0LL, 0LL);
#endif
- );
}
-static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
+static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
{
struct task_struct *g, *p;
@@ -77,7 +77,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
if (!p->se.on_rq || task_cpu(p) != rq_cpu)
continue;
- print_task(m, rq, p, now);
+ print_task(m, rq, p);
} while_each_thread(g, p);
read_unlock_irq(&tasklist_lock);
@@ -106,9 +106,9 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
(long long)wait_runtime_rq_sum);
}
-void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
+void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
- SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq);
+ SEQ_printf(m, "\ncfs_rq\n");
#define P(x) \
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
@@ -124,7 +124,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
}
-static void print_cpu(struct seq_file *m, int cpu, u64 now)
+static void print_cpu(struct seq_file *m, int cpu)
{
struct rq *rq = &per_cpu(runqueues, cpu);
@@ -166,9 +166,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P(cpu_load[4]);
#undef P
- print_cfs_stats(m, cpu, now);
+ print_cfs_stats(m, cpu);
- print_rq(m, rq, cpu, now);
+ print_rq(m, rq, cpu);
}
static int sched_debug_show(struct seq_file *m, void *v)
@@ -184,7 +184,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
for_each_online_cpu(cpu)
- print_cpu(m, cpu, now);
+ print_cpu(m, cpu);
SEQ_printf(m, "\n");
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6f579ff5a9bc..fedbb51bba96 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -75,7 +75,7 @@ enum {
unsigned int sysctl_sched_features __read_mostly =
SCHED_FEAT_FAIR_SLEEPERS *1 |
- SCHED_FEAT_SLEEPER_AVG *1 |
+ SCHED_FEAT_SLEEPER_AVG *0 |
SCHED_FEAT_SLEEPER_LOAD_AVG *1 |
SCHED_FEAT_PRECISE_CPU_LOAD *1 |
SCHED_FEAT_START_DEBIT *1 |
@@ -222,21 +222,25 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity)
{
u64 tmp;
+ if (likely(curr->load.weight == NICE_0_LOAD))
+ return granularity;
/*
- * Negative nice levels get the same granularity as nice-0:
+ * Positive nice levels get the same granularity as nice-0:
*/
- if (likely(curr->load.weight >= NICE_0_LOAD))
- return granularity;
+ if (likely(curr->load.weight < NICE_0_LOAD)) {
+ tmp = curr->load.weight * (u64)granularity;
+ return (long) (tmp >> NICE_0_SHIFT);
+ }
/*
- * Positive nice level tasks get linearly finer
+ * Negative nice level tasks get linearly finer
* granularity:
*/
- tmp = curr->load.weight * (u64)granularity;
+ tmp = curr->load.inv_weight * (u64)granularity;
/*
* It will always fit into 'long':
*/
- return (long) (tmp >> NICE_0_SHIFT);
+ return (long) (tmp >> WMULT_SHIFT);
}
static inline void
@@ -281,31 +285,28 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
* are not in our scheduling class.
*/
static inline void
-__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now)
+__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
- unsigned long delta, delta_exec, delta_fair;
- long delta_mine;
+ unsigned long delta, delta_exec, delta_fair, delta_mine;
struct load_weight *lw = &cfs_rq->load;
unsigned long load = lw->weight;
- if (unlikely(!load))
- return;
-
delta_exec = curr->delta_exec;
schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
curr->sum_exec_runtime += delta_exec;
cfs_rq->exec_clock += delta_exec;
+ if (unlikely(!load))
+ return;
+
delta_fair = calc_delta_fair(delta_exec, lw);
delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
- if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) {
- delta = calc_delta_mine(cfs_rq->sleeper_bonus,
- curr->load.weight, lw);
- if (unlikely(delta > cfs_rq->sleeper_bonus))
- delta = cfs_rq->sleeper_bonus;
-
+ if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) {
+ delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec);
+ delta = calc_delta_mine(delta, curr->load.weight, lw);
+ delta = min((u64)delta, cfs_rq->sleeper_bonus);
cfs_rq->sleeper_bonus -= delta;
delta_mine -= delta;
}
@@ -321,7 +322,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now)
add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec);
}
-static void update_curr(struct cfs_rq *cfs_rq, u64 now)
+static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq_curr(cfs_rq);
unsigned long delta_exec;
@@ -334,22 +335,22 @@ static void update_curr(struct cfs_rq *cfs_rq, u64 now)
* since the last time we changed load (this cannot
* overflow on 32 bits):
*/
- delta_exec = (unsigned long)(now - curr->exec_start);
+ delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start);
curr->delta_exec += delta_exec;
if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) {
- __update_curr(cfs_rq, curr, now);
+ __update_curr(cfs_rq, curr);
curr->delta_exec = 0;
}
- curr->exec_start = now;
+ curr->exec_start = rq_of(cfs_rq)->clock;
}
static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
se->wait_start_fair = cfs_rq->fair_clock;
- schedstat_set(se->wait_start, now);
+ schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
}
/*
@@ -377,8 +378,7 @@ calc_weighted(unsigned long delta, unsigned long weight, int shift)
/*
* Task is being enqueued - update stats:
*/
-static void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
s64 key;
@@ -387,7 +387,7 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
* a dequeue/enqueue event is a NOP)
*/
if (se != cfs_rq_curr(cfs_rq))
- update_stats_wait_start(cfs_rq, se, now);
+ update_stats_wait_start(cfs_rq, se);
/*
* Update the key:
*/
@@ -407,7 +407,8 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
(WMULT_SHIFT - NICE_0_SHIFT);
} else {
tmp = se->wait_runtime;
- key -= (tmp * se->load.weight) >> NICE_0_SHIFT;
+ key -= (tmp * se->load.inv_weight) >>
+ (WMULT_SHIFT - NICE_0_SHIFT);
}
}
@@ -418,11 +419,12 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
* Note: must be called with a freshly updated rq->fair_clock.
*/
static inline void
-__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long delta_fair = se->delta_fair_run;
- schedstat_set(se->wait_max, max(se->wait_max, now - se->wait_start));
+ schedstat_set(se->wait_max, max(se->wait_max,
+ rq_of(cfs_rq)->clock - se->wait_start));
if (unlikely(se->load.weight != NICE_0_LOAD))
delta_fair = calc_weighted(delta_fair, se->load.weight,
@@ -432,7 +434,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
}
static void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long delta_fair;
@@ -442,7 +444,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
se->delta_fair_run += delta_fair;
if (unlikely(abs(se->delta_fair_run) >=
sysctl_sched_stat_granularity)) {
- __update_stats_wait_end(cfs_rq, se, now);
+ __update_stats_wait_end(cfs_rq, se);
se->delta_fair_run = 0;
}
@@ -451,34 +453,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
}
static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- update_curr(cfs_rq, now);
+ update_curr(cfs_rq);
/*
* Mark the end of the wait period if dequeueing a
* waiting task:
*/
if (se != cfs_rq_curr(cfs_rq))
- update_stats_wait_end(cfs_rq, se, now);
+ update_stats_wait_end(cfs_rq, se);
}
/*
* We are picking a new current task - update its stats:
*/
static inline void
-update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*
* We are starting a new run period:
*/
- se->exec_start = now;
+ se->exec_start = rq_of(cfs_rq)->clock;
}
/*
* We are descheduling a task - update its stats:
*/
static inline void
-update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
se->exec_start = 0;
}
@@ -487,8 +489,7 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
* Scheduling class queueing methods:
*/
-static void
-__enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long load = cfs_rq->load.weight, delta_fair;
long prev_runtime;
@@ -518,12 +519,13 @@ __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
* Track the amount of bonus we've given to sleepers:
*/
cfs_rq->sleeper_bonus += delta_fair;
+ if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
+ cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit;
schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
}
-static void
-enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct task_struct *tsk = task_of(se);
unsigned long delta_fair;
@@ -538,7 +540,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
se->delta_fair_sleep += delta_fair;
if (unlikely(abs(se->delta_fair_sleep) >=
sysctl_sched_stat_granularity)) {
- __enqueue_sleeper(cfs_rq, se, now);
+ __enqueue_sleeper(cfs_rq, se);
se->delta_fair_sleep = 0;
}
@@ -546,7 +548,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
#ifdef CONFIG_SCHEDSTATS
if (se->sleep_start) {
- u64 delta = now - se->sleep_start;
+ u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
if ((s64)delta < 0)
delta = 0;
@@ -558,7 +560,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
se->sum_sleep_runtime += delta;
}
if (se->block_start) {
- u64 delta = now - se->block_start;
+ u64 delta = rq_of(cfs_rq)->clock - se->block_start;
if ((s64)delta < 0)
delta = 0;
@@ -573,26 +575,24 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
}
static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
- int wakeup, u64 now)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
{
/*
* Update the fair clock.
*/
- update_curr(cfs_rq, now);
+ update_curr(cfs_rq);
if (wakeup)
- enqueue_sleeper(cfs_rq, se, now);
+ enqueue_sleeper(cfs_rq, se);
- update_stats_enqueue(cfs_rq, se, now);
+ update_stats_enqueue(cfs_rq, se);
__enqueue_entity(cfs_rq, se);
}
static void
-dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
- int sleep, u64 now)
+dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
- update_stats_dequeue(cfs_rq, se, now);
+ update_stats_dequeue(cfs_rq, se);
if (sleep) {
se->sleep_start_fair = cfs_rq->fair_clock;
#ifdef CONFIG_SCHEDSTATS
@@ -600,9 +600,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
struct task_struct *tsk = task_of(se);
if (tsk->state & TASK_INTERRUPTIBLE)
- se->sleep_start = now;
+ se->sleep_start = rq_of(cfs_rq)->clock;
if (tsk->state & TASK_UNINTERRUPTIBLE)
- se->block_start = now;
+ se->block_start = rq_of(cfs_rq)->clock;
}
cfs_rq->wait_runtime -= se->wait_runtime;
#endif
@@ -629,7 +629,7 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
}
static inline void
-set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*
* Any task has to be enqueued before it get to execute on
@@ -638,49 +638,46 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
* done a put_prev_task_fair() shortly before this, which
* updated rq->fair_clock - used by update_stats_wait_end())
*/
- update_stats_wait_end(cfs_rq, se, now);
- update_stats_curr_start(cfs_rq, se, now);
+ update_stats_wait_end(cfs_rq, se);
+ update_stats_curr_start(cfs_rq, se);
set_cfs_rq_curr(cfs_rq, se);
}
-static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now)
+static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
struct sched_entity *se = __pick_next_entity(cfs_rq);
- set_next_entity(cfs_rq, se, now);
+ set_next_entity(cfs_rq, se);
return se;
}
-static void
-put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now)
+static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
{
/*
* If still on the runqueue then deactivate_task()
* was not called and update_curr() has to be done:
*/
if (prev->on_rq)
- update_curr(cfs_rq, now);
+ update_curr(cfs_rq);
- update_stats_curr_end(cfs_rq, prev, now);
+ update_stats_curr_end(cfs_rq, prev);
if (prev->on_rq)
- update_stats_wait_start(cfs_rq, prev, now);
+ update_stats_wait_start(cfs_rq, prev);
set_cfs_rq_curr(cfs_rq, NULL);
}
static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
- struct rq *rq = rq_of(cfs_rq);
struct sched_entity *next;
- u64 now = __rq_clock(rq);
/*
* Dequeue and enqueue the task to update its
* position within the tree:
*/
- dequeue_entity(cfs_rq, curr, 0, now);
- enqueue_entity(cfs_rq, curr, 0, now);
+ dequeue_entity(cfs_rq, curr, 0);
+ enqueue_entity(cfs_rq, curr, 0);
/*
* Reschedule if another task tops the current one.
@@ -785,8 +782,7 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p)
* increased. Here we update the fair scheduling stats and
* then put the task into the rbtree:
*/
-static void
-enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
@@ -795,7 +791,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
if (se->on_rq)
break;
cfs_rq = cfs_rq_of(se);
- enqueue_entity(cfs_rq, se, wakeup, now);
+ enqueue_entity(cfs_rq, se, wakeup);
}
}
@@ -804,15 +800,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
* decreased. We remove the task from the rbtree and
* update the fair scheduling stats:
*/
-static void
-dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- dequeue_entity(cfs_rq, se, sleep, now);
+ dequeue_entity(cfs_rq, se, sleep);
/* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight)
break;
@@ -825,14 +820,14 @@ dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
static void yield_task_fair(struct rq *rq, struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
- u64 now = __rq_clock(rq);
+ __update_rq_clock(rq);
/*
* Dequeue and enqueue the task to update its
* position within the tree:
*/
- dequeue_entity(cfs_rq, &p->se, 0, now);
- enqueue_entity(cfs_rq, &p->se, 0, now);
+ dequeue_entity(cfs_rq, &p->se, 0);
+ enqueue_entity(cfs_rq, &p->se, 0);
}
/*
@@ -845,7 +840,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
unsigned long gran;
if (unlikely(rt_prio(p->prio))) {
- update_curr(cfs_rq, rq_clock(rq));
+ update_rq_clock(rq);
+ update_curr(cfs_rq);
resched_task(curr);
return;
}
@@ -861,7 +857,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
__check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran);
}
-static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
+static struct task_struct *pick_next_task_fair(struct rq *rq)
{
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
@@ -870,7 +866,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
return NULL;
do {
- se = pick_next_entity(cfs_rq, now);
+ se = pick_next_entity(cfs_rq);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);
@@ -880,14 +876,14 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
/*
* Account for a descheduled task:
*/
-static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now)
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
{
struct sched_entity *se = &prev->se;
struct cfs_rq *cfs_rq;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- put_prev_entity(cfs_rq, se, now);
+ put_prev_entity(cfs_rq, se);
}
}
@@ -930,6 +926,7 @@ static struct task_struct *load_balance_next_fair(void *arg)
return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
}
+#ifdef CONFIG_FAIR_GROUP_SCHED
static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr;
@@ -943,12 +940,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
return p->prio;
}
+#endif
-static int
+static unsigned long
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
- unsigned long max_nr_move, unsigned long max_load_move,
- struct sched_domain *sd, enum cpu_idle_type idle,
- int *all_pinned, unsigned long *total_load_moved)
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, int *this_best_prio)
{
struct cfs_rq *busy_cfs_rq;
unsigned long load_moved, total_nr_moved = 0, nr_moved;
@@ -959,15 +957,14 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
cfs_rq_iterator.next = load_balance_next_fair;
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
struct cfs_rq *this_cfs_rq;
long imbalance;
unsigned long maxload;
- int this_best_prio, best_prio, best_prio_seen = 0;
this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
- imbalance = busy_cfs_rq->load.weight -
- this_cfs_rq->load.weight;
+ imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
if (imbalance <= 0)
continue;
@@ -976,27 +973,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
imbalance /= 2;
maxload = min(rem_load_move, imbalance);
- this_best_prio = cfs_rq_best_prio(this_cfs_rq);
- best_prio = cfs_rq_best_prio(busy_cfs_rq);
-
- /*
- * Enable handling of the case where there is more than one task
- * with the best priority. If the current running task is one
- * of those with prio==best_prio we know it won't be moved
- * and therefore it's safe to override the skip (based on load)
- * of any task we find with that prio.
- */
- if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se)
- best_prio_seen = 1;
-
+ *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
+#else
+# define maxload rem_load_move
+#endif
/* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
*/
cfs_rq_iterator.arg = busy_cfs_rq;
nr_moved = balance_tasks(this_rq, this_cpu, busiest,
max_nr_move, maxload, sd, idle, all_pinned,
- &load_moved, this_best_prio, best_prio,
- best_prio_seen, &cfs_rq_iterator);
+ &load_moved, this_best_prio, &cfs_rq_iterator);
total_nr_moved += nr_moved;
max_nr_move -= nr_moved;
@@ -1006,9 +993,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
break;
}
- *total_load_moved = max_load_move - rem_load_move;
-
- return total_nr_moved;
+ return max_load_move - rem_load_move;
}
/*
@@ -1032,14 +1017,14 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr)
* monopolize the CPU. Note: the parent runqueue is locked,
* the child is not running yet.
*/
-static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now)
+static void task_new_fair(struct rq *rq, struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
struct sched_entity *se = &p->se;
sched_info_queued(p);
- update_stats_enqueue(cfs_rq, se, now);
+ update_stats_enqueue(cfs_rq, se);
/*
* Child runs first: we let it run before the parent
* until it reschedules once. We set up the key so that
@@ -1072,15 +1057,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now)
*/
static void set_curr_task_fair(struct rq *rq)
{
- struct task_struct *curr = rq->curr;
- struct sched_entity *se = &curr->se;
- u64 now = rq_clock(rq);
- struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &rq->curr.se;
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
- set_next_entity(cfs_rq, se, now);
- }
+ for_each_sched_entity(se)
+ set_next_entity(cfs_rq_of(se), se);
}
#else
static void set_curr_task_fair(struct rq *rq)
@@ -1109,12 +1089,11 @@ struct sched_class fair_sched_class __read_mostly = {
};
#ifdef CONFIG_SCHED_DEBUG
-void print_cfs_stats(struct seq_file *m, int cpu, u64 now)
+static void print_cfs_stats(struct seq_file *m, int cpu)
{
- struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq;
- for_each_leaf_cfs_rq(rq, cfs_rq)
- print_cfs_rq(m, cpu, cfs_rq, now);
+ for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+ print_cfs_rq(m, cpu, cfs_rq);
}
#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 41841e741c4a..3503fb2d9f96 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
resched_task(rq->idle);
}
-static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
+static struct task_struct *pick_next_task_idle(struct rq *rq)
{
schedstat_inc(rq, sched_goidle);
@@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
* message if some code attempts to do it:
*/
static void
-dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
{
spin_unlock_irq(&rq->lock);
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
@@ -33,15 +33,15 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
spin_lock_irq(&rq->lock);
}
-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
-static int
+static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
- int *all_pinned, unsigned long *total_load_moved)
+ int *all_pinned, int *this_best_prio)
{
return 0;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 002fcf8d3f64..dcdcad632fd9 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -7,7 +7,7 @@
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
*/
-static inline void update_curr_rt(struct rq *rq, u64 now)
+static inline void update_curr_rt(struct rq *rq)
{
struct task_struct *curr = rq->curr;
u64 delta_exec;
@@ -15,18 +15,17 @@ static inline void update_curr_rt(struct rq *rq, u64 now)
if (!task_has_rt_policy(curr))
return;
- delta_exec = now - curr->se.exec_start;
+ delta_exec = rq->clock - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
- curr->se.exec_start = now;
+ curr->se.exec_start = rq->clock;
}
-static void
-enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
{
struct rt_prio_array *array = &rq->rt.active;
@@ -37,12 +36,11 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
/*
* Adding/removing a task to/from a priority array:
*/
-static void
-dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
{
struct rt_prio_array *array = &rq->rt.active;
- update_curr_rt(rq, now);
+ update_curr_rt(rq);
list_del(&p->run_list);
if (list_empty(array->queue + p->prio))
@@ -75,7 +73,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
resched_task(rq->curr);
}
-static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now)
+static struct task_struct *pick_next_task_rt(struct rq *rq)
{
struct rt_prio_array *array = &rq->rt.active;
struct task_struct *next;
@@ -89,14 +87,14 @@ static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now)
queue = array->queue + idx;
next = list_entry(queue->next, struct task_struct, run_list);
- next->se.exec_start = now;
+ next->se.exec_start = rq->clock;
return next;
}
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
- update_curr_rt(rq, now);
+ update_curr_rt(rq);
p->se.exec_start = 0;
}
@@ -172,28 +170,15 @@ static struct task_struct *load_balance_next_rt(void *arg)
return p;
}
-static int
+static unsigned long
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
- int *all_pinned, unsigned long *load_moved)
+ int *all_pinned, int *this_best_prio)
{
- int this_best_prio, best_prio, best_prio_seen = 0;
int nr_moved;
struct rq_iterator rt_rq_iterator;
-
- best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
- this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
-
- /*
- * Enable handling of the case where there is more than one task
- * with the best priority. If the current running task is one
- * of those with prio==best_prio we know it won't be moved
- * and therefore it's safe to override the skip (based on load)
- * of any task we find with that prio.
- */
- if (busiest->curr->prio == best_prio)
- best_prio_seen = 1;
+ unsigned long load_moved;
rt_rq_iterator.start = load_balance_start_rt;
rt_rq_iterator.next = load_balance_next_rt;
@@ -203,11 +188,10 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
rt_rq_iterator.arg = busiest;
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
- max_load_move, sd, idle, all_pinned, load_moved,
- this_best_prio, best_prio, best_prio_seen,
- &rt_rq_iterator);
+ max_load_move, sd, idle, all_pinned, &load_moved,
+ this_best_prio, &rt_rq_iterator);
- return nr_moved;
+ return load_moved;
}
static void task_tick_rt(struct rq *rq, struct task_struct *p)
diff --git a/kernel/signal.c b/kernel/signal.c
index ef8156a6aad5..ad63109e413c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -378,7 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
- if (tsk == current)
+ if (likely(tsk == current))
signr = __dequeue_signal(&tsk->pending, mask, info);
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
@@ -425,7 +425,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
}
- if ( signr &&
+ if (signr && likely(tsk == current) &&
((info->si_code & __SI_MASK) == __SI_TIMER) &&
info->si_sys_private){
/*
@@ -1561,10 +1561,6 @@ static inline int may_ptrace_stop(void)
(current->ptrace & PT_ATTACHED)))
return 0;
- if (unlikely(current->signal == current->parent->signal) &&
- unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))
- return 0;
-
/*
* Are we in the middle of do_coredump?
* If so and our tracer is also part of the coredump stopping
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79c891e6266c..9029690f4fae 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -27,7 +27,6 @@
#include <linux/capability.h>
#include <linux/ctype.h>
#include <linux/utsname.h>
-#include <linux/capability.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
#include <linux/init.h>
@@ -1023,6 +1022,7 @@ static ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &proc_doulongvec_minmax,
},
+#endif
#ifdef CONFIG_NUMA
{
.ctl_name = CTL_UNNUMBERED,
@@ -1034,7 +1034,6 @@ static ctl_table vm_table[] = {
.strategy = &sysctl_string,
},
#endif
-#endif
#if defined(CONFIG_X86_32) || \
(defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
{
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 2ad1c37b8dfe..41dd3105ce7f 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb)
return ret;
}
-/**
- * clockevents_unregister_notifier - unregister a clock events change listener
- */
-void clockevents_unregister_notifier(struct notifier_block *nb)
-{
- spin_lock(&clockevents_lock);
- raw_notifier_chain_unregister(&clockevents_chain, nb);
- spin_unlock(&clockevents_lock);
-}
-
/*
* Notify about a clock event change. Called with clockevents_lock
* held.