diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/verifier.c | 21 | ||||
-rw-r--r-- | kernel/cgroup.c | 19 | ||||
-rw-r--r-- | kernel/irq/manage.c | 4 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 35 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 14 |
5 files changed, 75 insertions, 18 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2cbfba78d3db..85de5094b936 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -313,7 +313,8 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; -static void print_bpf_insn(struct bpf_insn *insn) +static void print_bpf_insn(const struct verifier_env *env, + const struct bpf_insn *insn) { u8 class = BPF_CLASS(insn->code); @@ -377,9 +378,19 @@ static void print_bpf_insn(struct bpf_insn *insn) insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->src_reg, insn->imm); - } else if (BPF_MODE(insn->code) == BPF_IMM) { - verbose("(%02x) r%d = 0x%x\n", - insn->code, insn->dst_reg, insn->imm); + } else if (BPF_MODE(insn->code) == BPF_IMM && + BPF_SIZE(insn->code) == BPF_DW) { + /* At this point, we already made sure that the second + * part of the ldimm64 insn is accessible. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + + if (map_ptr && !env->allow_ptr_leaks) + imm = 0; + + verbose("(%02x) r%d = 0x%llx\n", insn->code, + insn->dst_reg, (unsigned long long)imm); } else { verbose("BUG_ld_%02x\n", insn->code); return; @@ -1758,7 +1769,7 @@ static int do_check(struct verifier_env *env) if (log_level) { verbose("%d: ", insn_idx); - print_bpf_insn(insn); + print_bpf_insn(env, insn); } if (class == BPF_ALU || class == BPF_ALU64) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 25cf44889559..077bb52e2d47 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -717,10 +717,10 @@ static void css_set_move_task(struct task_struct *task, if (to_cset) { /* - * We are synchronized through cgroup_threadgroup_rwsem - * against PF_EXITING setting such that we can't race - * against cgroup_exit() changing the css_set to - * init_css_set and dropping the old one. + * We are synchronized through css_set_lock against + * PF_EXITING setting such that we can't race against + * cgroup_exit() disassociating the task from the + * css_set. */ WARN_ON_ONCE(task->flags & PF_EXITING); @@ -5701,19 +5701,22 @@ void cgroup_exit(struct task_struct *tsk) int i; /* - * Unlink from @tsk from its css_set. As migration path can't race - * with us, we can check css_set and cg_list without synchronization. + * Avoid potential race with the migrate path. + */ + spin_lock_irq(&css_set_lock); + /* + * Unlink from @tsk from its css_set. */ cset = task_css_set(tsk); if (!list_empty(&tsk->cg_list)) { - spin_lock_irq(&css_set_lock); css_set_move_task(tsk, cset, NULL, false); - spin_unlock_irq(&css_set_lock); } else { get_css_set(cset); } + spin_unlock_irq(&css_set_lock); + /* see cgroup_post_fork() for details */ for_each_subsys_which(ss, i, &have_exit_callback) ss->exit(tsk); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e5c70dcb7f8e..2c2effdb4437 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1305,8 +1305,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) ret = __irq_set_trigger(desc, new->flags & IRQF_TRIGGER_MASK); - if (ret) + if (ret) { + irq_release_resources(desc); goto out_mask; + } } desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 05a37857ab55..1e6a51cc25c4 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -1,6 +1,7 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/osq_lock.h> +#include <linux/sched/rt.h> /* * An MCS like lock especially tailored for optimistic spinning for sleeping @@ -85,6 +86,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) { struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); struct optimistic_spin_node *prev, *next; + struct task_struct *task = current; int curr = encode_cpu(smp_processor_id()); int old; @@ -104,6 +106,32 @@ bool osq_lock(struct optimistic_spin_queue *lock) prev = decode_cpu(old); node->prev = prev; + + /* + * We need to avoid reordering of link updation sequence of osq. + * A case in which the status of optimistic spin queue is + * CPU6->CPU2 in which CPU6 has acquired the lock. At this point + * if CPU0 comes in to acquire osq_lock, it will update the tail + * count. After tail count update if CPU2 starts to unqueue itself + * from optimistic spin queue, it will find updated tail count with + * CPU0 and update CPU2 node->next to NULL in osq_wait_next(). If + * reordering of following stores happen then prev->next where prev + * being CPU2 would be updated to point to CPU0 node: + * node->prev = prev; + * WRITE_ONCE(prev->next, node); + * + * At this point if next instruction + * WRITE_ONCE(next->prev, prev); + * in CPU2 path is committed before the update of CPU0 node->prev = + * prev then CPU0 node->prev will point to CPU6 node. At this point + * if CPU0 path's node->prev = prev is committed resulting in change + * of CPU0 prev back to CPU2 node. CPU2 node->next is NULL, so if + * CPU0 gets into unqueue path of osq_lock it will keep spinning + * in infinite loop as condition prev->next == node will never be + * true. + */ + smp_mb(); + WRITE_ONCE(prev->next, node); /* @@ -118,8 +146,13 @@ bool osq_lock(struct optimistic_spin_queue *lock) while (!READ_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. + * If a task spins on owner on a CPU after acquiring + * osq_lock while a RT task spins on another CPU to + * acquire osq_lock, it will starve the owner from + * completing if owner is to be scheduled on the same CPU. + * It will be a live lock. */ - if (need_resched()) + if (need_resched() || rt_task(task)) goto unqueue; cpu_relax_lowlatency(); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 2af5687b83c9..1a4de0022cc5 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -569,7 +569,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; - start = ktime_add(start, base->gettime()); + start = ktime_add_safe(start, base->gettime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); @@ -655,7 +655,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) overrun++; } - alarm->node.expires = ktime_add(alarm->node.expires, interval); + alarm->node.expires = ktime_add_safe(alarm->node.expires, interval); return overrun; } EXPORT_SYMBOL_GPL(alarm_forward); @@ -843,13 +843,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, /* start the timer */ timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); + + /* + * Rate limit to the tick as a hot fix to prevent DOS. Will be + * mopped up later. + */ + if (ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC) + timr->it.alarm.interval = ktime_set(0, TICK_NSEC); + exp = timespec_to_ktime(new_setting->it_value); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now; now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); - exp = ktime_add(now, exp); + exp = ktime_add_safe(now, exp); } alarm_start(&timr->it.alarm.alarmtimer, exp); |