summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c21
-rw-r--r--kernel/cgroup.c19
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/locking/osq_lock.c35
-rw-r--r--kernel/time/alarmtimer.c14
5 files changed, 75 insertions, 18 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2cbfba78d3db..85de5094b936 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -313,7 +313,8 @@ static const char *const bpf_jmp_string[16] = {
[BPF_EXIT >> 4] = "exit",
};
-static void print_bpf_insn(struct bpf_insn *insn)
+static void print_bpf_insn(const struct verifier_env *env,
+ const struct bpf_insn *insn)
{
u8 class = BPF_CLASS(insn->code);
@@ -377,9 +378,19 @@ static void print_bpf_insn(struct bpf_insn *insn)
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->imm);
- } else if (BPF_MODE(insn->code) == BPF_IMM) {
- verbose("(%02x) r%d = 0x%x\n",
- insn->code, insn->dst_reg, insn->imm);
+ } else if (BPF_MODE(insn->code) == BPF_IMM &&
+ BPF_SIZE(insn->code) == BPF_DW) {
+ /* At this point, we already made sure that the second
+ * part of the ldimm64 insn is accessible.
+ */
+ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+ bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+
+ if (map_ptr && !env->allow_ptr_leaks)
+ imm = 0;
+
+ verbose("(%02x) r%d = 0x%llx\n", insn->code,
+ insn->dst_reg, (unsigned long long)imm);
} else {
verbose("BUG_ld_%02x\n", insn->code);
return;
@@ -1758,7 +1769,7 @@ static int do_check(struct verifier_env *env)
if (log_level) {
verbose("%d: ", insn_idx);
- print_bpf_insn(insn);
+ print_bpf_insn(env, insn);
}
if (class == BPF_ALU || class == BPF_ALU64) {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25cf44889559..077bb52e2d47 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -717,10 +717,10 @@ static void css_set_move_task(struct task_struct *task,
if (to_cset) {
/*
- * We are synchronized through cgroup_threadgroup_rwsem
- * against PF_EXITING setting such that we can't race
- * against cgroup_exit() changing the css_set to
- * init_css_set and dropping the old one.
+ * We are synchronized through css_set_lock against
+ * PF_EXITING setting such that we can't race against
+ * cgroup_exit() disassociating the task from the
+ * css_set.
*/
WARN_ON_ONCE(task->flags & PF_EXITING);
@@ -5701,19 +5701,22 @@ void cgroup_exit(struct task_struct *tsk)
int i;
/*
- * Unlink from @tsk from its css_set. As migration path can't race
- * with us, we can check css_set and cg_list without synchronization.
+ * Avoid potential race with the migrate path.
+ */
+ spin_lock_irq(&css_set_lock);
+ /*
+ * Unlink from @tsk from its css_set.
*/
cset = task_css_set(tsk);
if (!list_empty(&tsk->cg_list)) {
- spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
- spin_unlock_irq(&css_set_lock);
} else {
get_css_set(cset);
}
+ spin_unlock_irq(&css_set_lock);
+
/* see cgroup_post_fork() for details */
for_each_subsys_which(ss, i, &have_exit_callback)
ss->exit(tsk);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e5c70dcb7f8e..2c2effdb4437 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1305,8 +1305,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
ret = __irq_set_trigger(desc,
new->flags & IRQF_TRIGGER_MASK);
- if (ret)
+ if (ret) {
+ irq_release_resources(desc);
goto out_mask;
+ }
}
desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 05a37857ab55..1e6a51cc25c4 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -1,6 +1,7 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/osq_lock.h>
+#include <linux/sched/rt.h>
/*
* An MCS like lock especially tailored for optimistic spinning for sleeping
@@ -85,6 +86,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
struct optimistic_spin_node *prev, *next;
+ struct task_struct *task = current;
int curr = encode_cpu(smp_processor_id());
int old;
@@ -104,6 +106,32 @@ bool osq_lock(struct optimistic_spin_queue *lock)
prev = decode_cpu(old);
node->prev = prev;
+
+ /*
+ * We need to avoid reordering of link updation sequence of osq.
+ * A case in which the status of optimistic spin queue is
+ * CPU6->CPU2 in which CPU6 has acquired the lock. At this point
+ * if CPU0 comes in to acquire osq_lock, it will update the tail
+ * count. After tail count update if CPU2 starts to unqueue itself
+ * from optimistic spin queue, it will find updated tail count with
+ * CPU0 and update CPU2 node->next to NULL in osq_wait_next(). If
+ * reordering of following stores happen then prev->next where prev
+ * being CPU2 would be updated to point to CPU0 node:
+ * node->prev = prev;
+ * WRITE_ONCE(prev->next, node);
+ *
+ * At this point if next instruction
+ * WRITE_ONCE(next->prev, prev);
+ * in CPU2 path is committed before the update of CPU0 node->prev =
+ * prev then CPU0 node->prev will point to CPU6 node. At this point
+ * if CPU0 path's node->prev = prev is committed resulting in change
+ * of CPU0 prev back to CPU2 node. CPU2 node->next is NULL, so if
+ * CPU0 gets into unqueue path of osq_lock it will keep spinning
+ * in infinite loop as condition prev->next == node will never be
+ * true.
+ */
+ smp_mb();
+
WRITE_ONCE(prev->next, node);
/*
@@ -118,8 +146,13 @@ bool osq_lock(struct optimistic_spin_queue *lock)
while (!READ_ONCE(node->locked)) {
/*
* If we need to reschedule bail... so we can block.
+ * If a task spins on owner on a CPU after acquiring
+ * osq_lock while a RT task spins on another CPU to
+ * acquire osq_lock, it will starve the owner from
+ * completing if owner is to be scheduled on the same CPU.
+ * It will be a live lock.
*/
- if (need_resched())
+ if (need_resched() || rt_task(task))
goto unqueue;
cpu_relax_lowlatency();
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2af5687b83c9..1a4de0022cc5 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -569,7 +569,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- start = ktime_add(start, base->gettime());
+ start = ktime_add_safe(start, base->gettime());
alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -655,7 +655,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
overrun++;
}
- alarm->node.expires = ktime_add(alarm->node.expires, interval);
+ alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
return overrun;
}
EXPORT_SYMBOL_GPL(alarm_forward);
@@ -843,13 +843,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
/* start the timer */
timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
+
+ /*
+ * Rate limit to the tick as a hot fix to prevent DOS. Will be
+ * mopped up later.
+ */
+ if (ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC)
+ timr->it.alarm.interval = ktime_set(0, TICK_NSEC);
+
exp = timespec_to_ktime(new_setting->it_value);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now;
now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
- exp = ktime_add(now, exp);
+ exp = ktime_add_safe(now, exp);
}
alarm_start(&timr->it.alarm.alarmtimer, exp);