summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/futex.c98
1 files changed, 91 insertions, 7 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 1fce19fc824c..a12aa6785361 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -470,6 +470,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
struct page *page, *page_head;
+ struct address_space *mapping;
int err, ro = 0;
/*
@@ -555,7 +556,19 @@ again:
}
#endif
- lock_page(page_head);
+ /*
+ * The treatment of mapping from this point on is critical. The page
+ * lock protects many things but in this context the page lock
+ * stabilizes mapping, prevents inode freeing in the shared
+ * file-backed region case and guards against movement to swap cache.
+ *
+ * Strictly speaking the page lock is not needed in all cases being
+ * considered here and page lock forces unnecessarily serialization
+ * From this point on, mapping will be re-verified if necessary and
+ * page lock will be acquired only if it is unavoidable
+ */
+
+ mapping = READ_ONCE(page_head->mapping);
/*
* If page_head->mapping is NULL, then it cannot be a PageAnon
@@ -572,18 +585,31 @@ again:
* shmem_writepage move it from filecache to swapcache beneath us:
* an unlikely race, but we do need to retry for page_head->mapping.
*/
- if (!page_head->mapping) {
- int shmem_swizzled = PageSwapCache(page_head);
+ if (unlikely(!mapping)) {
+ int shmem_swizzled;
+
+ /*
+ * Page lock is required to identify which special case above
+ * applies. If this is really a shmem page then the page lock
+ * will prevent unexpected transitions.
+ */
+ lock_page(page);
+ shmem_swizzled = PageSwapCache(page) || page->mapping;
unlock_page(page_head);
put_page(page_head);
+
if (shmem_swizzled)
goto again;
+
return -EFAULT;
}
/*
* Private mappings are handled in a simple way.
*
+ * If the futex key is stored on an anonymous page, then the associated
+ * object is the mm which is implicitly pinned by the calling process.
+ *
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
* it's a read-only handle, it's expected that futexes attach to
* the object not the particular process.
@@ -601,16 +627,74 @@ again:
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm;
key->private.address = address;
+
+ get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
} else {
+ struct inode *inode;
+
+ /*
+ * The associated futex object in this case is the inode and
+ * the page->mapping must be traversed. Ordinarily this should
+ * be stabilised under page lock but it's not strictly
+ * necessary in this case as we just want to pin the inode, not
+ * update the radix tree or anything like that.
+ *
+ * The RCU read lock is taken as the inode is finally freed
+ * under RCU. If the mapping still matches expectations then the
+ * mapping->host can be safely accessed as being a valid inode.
+ */
+ rcu_read_lock();
+
+ if (READ_ONCE(page_head->mapping) != mapping) {
+ rcu_read_unlock();
+ put_page(page_head);
+
+ goto again;
+ }
+
+ inode = READ_ONCE(mapping->host);
+ if (!inode) {
+ rcu_read_unlock();
+ put_page(page_head);
+
+ goto again;
+ }
+
+ /*
+ * Take a reference unless it is about to be freed. Previously
+ * this reference was taken by ihold under the page lock
+ * pinning the inode in place so i_lock was unnecessary. The
+ * only way for this check to fail is if the inode was
+ * truncated in parallel so warn for now if this happens.
+ *
+ * We are not calling into get_futex_key_refs() in file-backed
+ * cases, therefore a successful atomic_inc return below will
+ * guarantee that get_futex_key() will still imply smp_mb(); (B).
+ */
+ if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+ rcu_read_unlock();
+ put_page(page_head);
+
+ goto again;
+ }
+
+ /* Should be impossible but lets be paranoid for now */
+ if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+ err = -EFAULT;
+ rcu_read_unlock();
+ iput(inode);
+
+ goto out;
+ }
+
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = page_head->mapping->host;
+ key->shared.inode = inode;
key->shared.pgoff = basepage_index(page);
+ rcu_read_unlock();
}
- get_futex_key_refs(key); /* implies MB (B) */
-
out:
- unlock_page(page_head);
put_page(page_head);
return err;
}