5 files changed, 75 insertions, 18 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2cbfba78d3db..85de5094b936 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -313,7 +313,8 @@ static const char *const bpf_jmp_string[16] = {
 	[BPF_EXIT >> 4] = "exit",
 };
 
-static void print_bpf_insn(struct bpf_insn *insn)
+static void print_bpf_insn(const struct verifier_env *env,
+			   const struct bpf_insn *insn)
 {
 	u8 class = BPF_CLASS(insn->code);
 
@@ -377,9 +378,19 @@ static void print_bpf_insn(struct bpf_insn *insn)
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->src_reg, insn->imm);
-		} else if (BPF_MODE(insn->code) == BPF_IMM) {
-			verbose("(%02x) r%d = 0x%x\n",
-				insn->code, insn->dst_reg, insn->imm);
+		} else if (BPF_MODE(insn->code) == BPF_IMM &&
+			   BPF_SIZE(insn->code) == BPF_DW) {
+			/* At this point, we already made sure that the second
+			 * part of the ldimm64 insn is accessible.
+			 */
+			u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+			bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+
+			if (map_ptr && !env->allow_ptr_leaks)
+				imm = 0;
+
+			verbose("(%02x) r%d = 0x%llx\n", insn->code,
+				insn->dst_reg, (unsigned long long)imm);
 		} else {
 			verbose("BUG_ld_%02x\n", insn->code);
 			return;
@@ -1758,7 +1769,7 @@ static int do_check(struct verifier_env *env)
 
 		if (log_level) {
 			verbose("%d: ", insn_idx);
-			print_bpf_insn(insn);
+			print_bpf_insn(env, insn);
 		}
 
 		if (class == BPF_ALU || class == BPF_ALU64) {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25cf44889559..077bb52e2d47 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -717,10 +717,10 @@ static void css_set_move_task(struct task_struct *task,
 
 	if (to_cset) {
 		/*
-		 * We are synchronized through cgroup_threadgroup_rwsem
-		 * against PF_EXITING setting such that we can't race
-		 * against cgroup_exit() changing the css_set to
-		 * init_css_set and dropping the old one.
+		 * We are synchronized through css_set_lock against
+		 * PF_EXITING setting such that we can't race against
+		 * cgroup_exit() disassociating the task from the
+		 * css_set.
 		 */
 		WARN_ON_ONCE(task->flags & PF_EXITING);
 
@@ -5701,19 +5701,22 @@ void cgroup_exit(struct task_struct *tsk)
 	int i;
 
 	/*
-	 * Unlink from @tsk from its css_set.  As migration path can't race
-	 * with us, we can check css_set and cg_list without synchronization.
+	 * Avoid potential race with the migrate path.
+	 */
+	spin_lock_irq(&css_set_lock);
+	/*
+	 * Unlink from @tsk from its css_set.
 	 */
 	cset = task_css_set(tsk);
 
 	if (!list_empty(&tsk->cg_list)) {
-		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
-		spin_unlock_irq(&css_set_lock);
 	} else {
 		get_css_set(cset);
 	}
 
+	spin_unlock_irq(&css_set_lock);
+
 	/* see cgroup_post_fork() for details */
 	for_each_subsys_which(ss, i, &have_exit_callback)
 		ss->exit(tsk);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e5c70dcb7f8e..2c2effdb4437 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1305,8 +1305,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			ret = __irq_set_trigger(desc,
 						new->flags & IRQF_TRIGGER_MASK);
 
-			if (ret)
+			if (ret) {
+				irq_release_resources(desc);
 				goto out_mask;
+			}
 		}
 
 		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 05a37857ab55..1e6a51cc25c4 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -1,6 +1,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/osq_lock.h>
+#include <linux/sched/rt.h>
 
 /*
  * An MCS like lock especially tailored for optimistic spinning for sleeping
@@ -85,6 +86,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 {
 	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
 	struct optimistic_spin_node *prev, *next;
+	struct task_struct *task = current;
 	int curr = encode_cpu(smp_processor_id());
 	int old;
 
@@ -104,6 +106,32 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 
 	prev = decode_cpu(old);
 	node->prev = prev;
+
+	/*
+	 * We need to avoid reordering of link updation sequence of osq.
+	 * A case in which the status of optimistic spin queue is
+	 * CPU6->CPU2 in which CPU6 has acquired the lock. At this point
+	 * if CPU0 comes in to acquire osq_lock, it will update the tail
+	 * count. After tail count update if CPU2 starts to unqueue itself
+	 * from optimistic spin queue, it will find updated tail count with
+	 * CPU0 and update CPU2 node->next to NULL in osq_wait_next(). If
+	 * reordering of following stores happen then prev->next where prev
+	 * being CPU2 would be updated to point to CPU0 node:
+	 *      node->prev = prev;
+	 *      WRITE_ONCE(prev->next, node);
+	 *
+	 * At this point if next instruction
+	 *      WRITE_ONCE(next->prev, prev);
+	 * in CPU2 path is committed before the update of CPU0 node->prev =
+	 * prev then CPU0 node->prev will point to CPU6 node. At this point
+	 * if CPU0 path's node->prev = prev is committed resulting in change
+	 * of CPU0 prev back to CPU2 node. CPU2 node->next is NULL, so if
+	 * CPU0 gets into unqueue path of osq_lock it will keep spinning
+	 * in infinite loop as condition prev->next == node will never be
+	 * true.
+	 */
+	smp_mb();
+
 	WRITE_ONCE(prev->next, node);
 
 	/*
@@ -118,8 +146,13 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 	while (!READ_ONCE(node->locked)) {
 		/*
 		 * If we need to reschedule bail... so we can block.
+		 * If a task spins on owner on a CPU after acquiring
+		 * osq_lock while a RT task spins on another CPU  to
+		 * acquire osq_lock, it will starve the owner from
+		 * completing if owner is to be scheduled on the same CPU.
+		 * It will be a live lock.
 		 */
-		if (need_resched())
+		if (need_resched() || rt_task(task))
 			goto unqueue;
 
 		cpu_relax_lowlatency();
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2af5687b83c9..1a4de0022cc5 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -569,7 +569,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
 {
 	struct alarm_base *base = &alarm_bases[alarm->type];
 
-	start = ktime_add(start, base->gettime());
+	start = ktime_add_safe(start, base->gettime());
 	alarm_start(alarm, start);
 }
 EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -655,7 +655,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 		overrun++;
 	}
 
-	alarm->node.expires = ktime_add(alarm->node.expires, interval);
+	alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
 	return overrun;
 }
 EXPORT_SYMBOL_GPL(alarm_forward);
@@ -843,13 +843,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
+
+	/*
+	 * Rate limit to the tick as a hot fix to prevent DOS. Will be
+	 * mopped up later.
+	 */
+	if (ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC)
+		timr->it.alarm.interval = ktime_set(0, TICK_NSEC);
+
 	exp = timespec_to_ktime(new_setting->it_value);
 	/* Convert (if necessary) to absolute time */
 	if (flags != TIMER_ABSTIME) {
 		ktime_t now;
 
 		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
-		exp = ktime_add(now, exp);
+		exp = ktime_add_safe(now, exp);
 	}
 
 	alarm_start(&timr->it.alarm.alarmtimer, exp);