summaryrefslogtreecommitdiff
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorMichael Bestas <mkbestas@lineageos.org>2021-04-19 18:56:01 +0300
committerMichael Bestas <mkbestas@lineageos.org>2021-04-19 18:56:01 +0300
commitf3d4e7ef44f14e433312bc0646ea996d8c8756bf (patch)
tree78f6dc67808364d9ad3eae5e4c323a668bb6fadc /kernel/futex.c
parentdb1bf2b55b337174e62637e6c23b45f166bdc14e (diff)
parentf5978a07daf67b25d101caa42ab3b18f0edf3dde (diff)
Merge branch 'android-4.4-p' of https://android.googlesource.com/kernel/common into lineage-18.1-caf-msm8998
This brings LA.UM.9.2.r1-02700-SDMxx0.0 up to date with https://android.googlesource.com/kernel/common/ android-4.4-p at commit: f5978a07daf67 Merge 4.4.267 into android-4.4-p Conflicts: arch/alpha/include/asm/Kbuild drivers/mmc/core/mmc.c drivers/usb/gadget/configfs.c Change-Id: I978d923e97c18f284edbd32c0c19ac70002f7d83
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c209
1 files changed, 177 insertions, 32 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index cb96565ad7c0..df724e36df56 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1016,6 +1016,39 @@ static void exit_pi_state_list(struct task_struct *curr)
* [10] There is no transient state which leaves owner and user space
* TID out of sync. Except one error case where the kernel is denied
* write access to the user address, see fixup_pi_state_owner().
+ *
+ *
+ * Serialization and lifetime rules:
+ *
+ * hb->lock:
+ *
+ * hb -> futex_q, relation
+ * futex_q -> pi_state, relation
+ *
+ * (cannot be raw because hb can contain arbitrary amount
+ * of futex_q's)
+ *
+ * pi_mutex->wait_lock:
+ *
+ * {uval, pi_state}
+ *
+ * (and pi_mutex 'obviously')
+ *
+ * p->pi_lock:
+ *
+ * p->pi_state_list -> pi_state->list, relation
+ *
+ * pi_state->refcount:
+ *
+ * pi_state lifetime
+ *
+ *
+ * Lock order:
+ *
+ * hb->lock
+ * pi_mutex->wait_lock
+ * p->pi_lock
+ *
*/
/*
@@ -1023,10 +1056,12 @@ static void exit_pi_state_list(struct task_struct *curr)
* the pi_state against the user space value. If correct, attach to
* it.
*/
-static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
+ struct futex_pi_state *pi_state,
struct futex_pi_state **ps)
{
pid_t pid = uval & FUTEX_TID_MASK;
+ int ret, uval2;
/*
* Userspace might have messed up non-PI and PI futexes [3]
@@ -1034,9 +1069,34 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
if (unlikely(!pi_state))
return -EINVAL;
+ /*
+ * We get here with hb->lock held, and having found a
+ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
+ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
+ * which in turn means that futex_lock_pi() still has a reference on
+ * our pi_state.
+ */
WARN_ON(!atomic_read(&pi_state->refcount));
/*
+ * Now that we have a pi_state, we can acquire wait_lock
+ * and do the state validation.
+ */
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+
+ /*
+ * Since {uval, pi_state} is serialized by wait_lock, and our current
+ * uval was read without holding it, it can have changed. Verify it
+ * still is what we expect it to be, otherwise retry the entire
+ * operation.
+ */
+ if (get_futex_value_locked(&uval2, uaddr))
+ goto out_efault;
+
+ if (uval != uval2)
+ goto out_eagain;
+
+ /*
* Handle the owner died case:
*/
if (uval & FUTEX_OWNER_DIED) {
@@ -1051,11 +1111,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* is not 0. Inconsistent state. [5]
*/
if (pid)
- return -EINVAL;
+ goto out_einval;
/*
* Take a ref on the state and return success. [4]
*/
- goto out_state;
+ goto out_attach;
}
/*
@@ -1067,14 +1127,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* Take a ref on the state and return success. [6]
*/
if (!pid)
- goto out_state;
+ goto out_attach;
} else {
/*
* If the owner died bit is not set, then the pi_state
* must have an owner. [7]
*/
if (!pi_state->owner)
- return -EINVAL;
+ goto out_einval;
}
/*
@@ -1083,11 +1143,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* user space TID. [9/10]
*/
if (pid != task_pid_vnr(pi_state->owner))
- return -EINVAL;
-out_state:
+ goto out_einval;
+
+out_attach:
atomic_inc(&pi_state->refcount);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
*ps = pi_state;
return 0;
+
+out_einval:
+ ret = -EINVAL;
+ goto out_error;
+
+out_eagain:
+ ret = -EAGAIN;
+ goto out_error;
+
+out_efault:
+ ret = -EFAULT;
+ goto out_error;
+
+out_error:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ return ret;
}
/**
@@ -1120,11 +1198,67 @@ static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
put_task_struct(exiting);
}
+static int handle_exit_race(u32 __user *uaddr, u32 uval,
+ struct task_struct *tsk)
+{
+ u32 uval2;
+
+ /*
+ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+ * caller that the alleged owner is busy.
+ */
+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
+ return -EBUSY;
+
+ /*
+ * Reread the user space value to handle the following situation:
+ *
+ * CPU0 CPU1
+ *
+ * sys_exit() sys_futex()
+ * do_exit() futex_lock_pi()
+ * futex_lock_pi_atomic()
+ * exit_signals(tsk) No waiters:
+ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
+ * mm_release(tsk) Set waiter bit
+ * exit_robust_list(tsk) { *uaddr = 0x80000PID;
+ * Set owner died attach_to_pi_owner() {
+ * *uaddr = 0xC0000000; tsk = get_task(PID);
+ * } if (!tsk->flags & PF_EXITING) {
+ * ... attach();
+ * tsk->futex_state = } else {
+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
+ * FUTEX_STATE_DEAD)
+ * return -EAGAIN;
+ * return -ESRCH; <--- FAIL
+ * }
+ *
+ * Returning ESRCH unconditionally is wrong here because the
+ * user space value has been changed by the exiting task.
+ *
+ * The same logic applies to the case where the exiting task is
+ * already gone.
+ */
+ if (get_futex_value_locked(&uval2, uaddr))
+ return -EFAULT;
+
+ /* If the user space value has changed, try again. */
+ if (uval2 != uval)
+ return -EAGAIN;
+
+ /*
+ * The exiting task did not have a robust list, the robust list was
+ * corrupted or the user space value in *uaddr is simply bogus.
+ * Give up and tell user space.
+ */
+ return -ESRCH;
+}
+
/*
* Lookup the task for the TID provided from user space and attach to
* it after doing proper sanity checks.
*/
-static int attach_to_pi_owner(u32 uval, union futex_key *key,
+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
struct futex_pi_state **ps,
struct task_struct **exiting)
{
@@ -1135,12 +1269,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
/*
* We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 [1]
+ *
+ * The !pid check is paranoid. None of the call sites should end up
+ * with pid == 0, but better safe than sorry. Let the caller retry
*/
if (!pid)
- return -ESRCH;
+ return -EAGAIN;
p = futex_find_get_task(pid);
if (!p)
- return -ESRCH;
+ return handle_exit_race(uaddr, uval, NULL);
if (unlikely(p->flags & PF_KTHREAD)) {
put_task_struct(p);
@@ -1159,7 +1296,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
* FUTEX_STATE_DEAD, we know that the task has finished
* the cleanup:
*/
- int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
+ int ret = handle_exit_race(uaddr, uval, p);
raw_spin_unlock_irq(&p->pi_lock);
/*
@@ -1180,6 +1317,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
/*
* No existing pi state. First waiter. [2]
+ *
+ * This creates pi_state, we have hb->lock held, this means nothing can
+ * observe this state, wait_lock is irrelevant.
*/
pi_state = alloc_pi_state();
@@ -1204,7 +1344,8 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
return 0;
}
-static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+static int lookup_pi_state(u32 __user *uaddr, u32 uval,
+ struct futex_hash_bucket *hb,
union futex_key *key, struct futex_pi_state **ps,
struct task_struct **exiting)
{
@@ -1215,13 +1356,13 @@ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
* attach to the pi_state when the validation succeeds.
*/
if (match)
- return attach_to_pi_state(uval, match->pi_state, ps);
+ return attach_to_pi_state(uaddr, uval, match->pi_state, ps);
/*
* We are the first waiter - try to look up the owner based on
* @uval and attach to it.
*/
- return attach_to_pi_owner(uval, key, ps, exiting);
+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
}
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1234,7 +1375,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
return -EFAULT;
- /*If user space value changed, let the caller retry */
+ /* If user space value changed, let the caller retry */
return curval != uval ? -EAGAIN : 0;
}
@@ -1298,7 +1439,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
*/
match = futex_top_waiter(hb, key);
if (match)
- return attach_to_pi_state(uval, match->pi_state, ps);
+ return attach_to_pi_state(uaddr, uval, match->pi_state, ps);
/*
* No waiter and user TID is 0. We are here because the
@@ -1337,7 +1478,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable.
*/
- return attach_to_pi_owner(uval, key, ps, exiting);
+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
}
/**
@@ -1438,6 +1579,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
ret = -EFAULT;
+
} else if (curval != uval) {
/*
* If a unconditional UNLOCK_PI operation (user space did not
@@ -1971,7 +2113,7 @@ retry_private:
* rereading and handing potential crap to
* lookup_pi_state.
*/
- ret = lookup_pi_state(ret, hb2, &key2,
+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
&pi_state, &exiting);
}
@@ -2249,7 +2391,6 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
int err = 0;
oldowner = pi_state->owner;
-
/*
* We are here because either:
*
@@ -2268,11 +2409,10 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* because we can fault here. Imagine swapped out pages or a fork
* that marked all the anonymous memory readonly for cow.
*
- * Modifying pi_state _before_ the user space value would
- * leave the pi_state in an inconsistent state when we fault
- * here, because we need to drop the hash bucket lock to
- * handle the fault. This might be observed in the PID check
- * in lookup_pi_state.
+ * Modifying pi_state _before_ the user space value would leave the
+ * pi_state in an inconsistent state when we fault here, because we
+ * need to drop the locks to handle the fault. This might be observed
+ * in the PID check in lookup_pi_state.
*/
retry:
if (!argowner) {
@@ -2333,21 +2473,26 @@ retry:
return argowner == current;
/*
- * To handle the page fault we need to drop the hash bucket
- * lock here. That gives the other task (either the highest priority
- * waiter itself or the task which stole the rtmutex) the
- * chance to try the fixup of the pi_state. So once we are
- * back from handling the fault we need to check the pi_state
- * after reacquiring the hash bucket lock and before trying to
- * do another fixup. When the fixup has been done already we
- * simply return.
+ * To handle the page fault we need to drop the locks here. That gives
+ * the other task (either the highest priority waiter itself or the
+ * task which stole the rtmutex) the chance to try the fixup of the
+ * pi_state. So once we are back from handling the fault we need to
+ * check the pi_state after reacquiring the locks and before trying to
+ * do another fixup. When the fixup has been done already we simply
+ * return.
+ *
+ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
+ * drop hb->lock since the caller owns the hb -> futex_q relation.
+ * Dropping the pi_mutex->wait_lock requires the state revalidate.
*/
handle_fault:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(q->lock_ptr);
err = fault_in_user_writeable(uaddr);
spin_lock(q->lock_ptr);
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
/*
* Check if someone else fixed it for us: