futex: Fix inode life-time issue

commit 8019ad13ef7f64be44d4f892af9c840179009254 upstream.

As reported by Jann, ihold() does not in fact guarantee inode
persistence. And instead of making it so, replace the usage of inode
pointers with a per boot, machine wide, unique inode identifier.

This sequence number is global, but shared (file backed) futexes are
rare enough that this should not become a performance issue.

Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Peter Zijlstra 2020-03-04 11:28:31 +01:00 committed by prashantpaddune
parent cc32625948
commit 9fb0d76952
4 changed files with 65 additions and 16 deletions

View File

@ -135,6 +135,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;

View File

@ -701,6 +701,7 @@ struct inode {
struct rcu_head i_rcu;
};
u64 i_version;
atomic64_t i_sequence; /* see futex */
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;

View File

@ -34,23 +34,26 @@ handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
union futex_key {
struct {
u64 i_seq;
unsigned long pgoff;
struct inode *inode;
int offset;
unsigned int offset;
} shared;
struct {
union {
struct mm_struct *mm;
u64 __tmp;
};
unsigned long address;
struct mm_struct *mm;
int offset;
unsigned int offset;
} private;
struct {
u64 ptr;
unsigned long word;
void *ptr;
int offset;
unsigned int offset;
} both;
};
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);

View File

@ -407,7 +407,7 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
ihold(key->shared.inode); /* implies MB (B) */
smp_mb(); /* explicit smp_mb(); (B) */
break;
case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies MB (B) */
@ -438,7 +438,6 @@ static void drop_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
iput(key->shared.inode);
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
@ -446,6 +445,46 @@ static void drop_futex_key_refs(union futex_key *key)
}
}
/*
* Generate a machine wide unique identifier for this inode.
*
* This relies on u64 not wrapping in the life-time of the machine; which with
* 1ns resolution means almost 585 years.
*
* This further relies on the fact that a well formed program will not unmap
* the file while it has a (shared) futex waiting on it. This mapping will have
* a file reference which pins the mount and inode.
*
* If for some reason an inode gets evicted and read back in again, it will get
* a new sequence number and will _NOT_ match, even though it is the exact same
* file.
*
* It is important that match_futex() will never have a false-positive, esp.
* for PI futexes that can mess up the state. The above argues that false-negatives
* are only possible for malformed programs.
*/
static u64 get_inode_sequence_number(struct inode *inode)
{
static atomic64_t i_seq;
u64 old;
/* Does the inode already have a sequence number? */
old = atomic64_read(&inode->i_sequence);
if (likely(old))
return old;
for (;;) {
u64 new = atomic64_add_return(1, &i_seq);
if (WARN_ON_ONCE(!new))
continue;
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
if (old)
return old;
return new;
}
}
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
@ -458,9 +497,15 @@ static void drop_futex_key_refs(union futex_key *key)
*
* The key words are stored in *key on success.
*
* For shared mappings, it's (page->index, file_inode(vma->vm_file),
* offset_within_page). For private mappings, it's (uaddr, current->mm).
* We can usually work out the index without swapping in the page.
* For shared mappings (when @fshared), the key is:
* ( inode->i_sequence, page->index, offset_within_page )
* [ also see get_inode_sequence_number() ]
*
* For private mappings (or when !@fshared), the key is:
* ( current->mm, address, 0 )
*
* This allows (cross process, where applicable) identification of the futex
* without keeping the page pinned for the duration of the FUTEX_WAIT.
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
@ -628,8 +673,6 @@ again:
key->private.mm = mm;
key->private.address = address;
get_futex_key_refs(key); /* implies smp_mb(); (B) */
} else {
struct inode *inode;
@ -687,13 +730,14 @@ again:
goto out;
}
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = inode;
key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(page);
rcu_read_unlock();
}
get_futex_key_refs(key); /* implies smp_mb(); (B) */
out:
put_page(page_head);
return err;