Merge 4.19.172 into android-4.19-stable

Changes in 4.19.172
	gpio: mvebu: fix pwm .get_state period calculation
	Revert "mm/slub: fix a memory leak in sysfs_slab_add()"
	futex: Move futex exit handling into futex code
	futex: Replace PF_EXITPIDONE with a state
	exit/exec: Seperate mm_release()
	futex: Split futex_mm_release() for exit/exec
	futex: Set task::futex_state to DEAD right after handling futex exit
	futex: Mark the begin of futex exit explicitly
	futex: Sanitize exit state handling
	futex: Provide state handling for exec() as well
	futex: Add mutex around futex exit
	futex: Provide distinct return value when owner is exiting
	futex: Prevent exit livelock
	futex: Ensure the correct return value from futex_lock_pi()
	futex: Replace pointless printk in fixup_owner()
	futex: Provide and use pi_state_update_owner()
	rtmutex: Remove unused argument from rt_mutex_proxy_unlock()
	futex: Use pi_state_update_owner() in put_pi_state()
	futex: Simplify fixup_pi_state_owner()
	futex: Handle faults correctly for PI futexes
	HID: wacom: Correct NULL dereference on AES pen proximity
	tracing: Fix race in trace_open and buffer resize call
	tools: Factor HOSTCC, HOSTLD, HOSTAR definitions
	dm integrity: conditionally disable "recalculate" feature
	writeback: Drop I_DIRTY_TIME_EXPIRE
	fs: fix lazytime expiration handling in __writeback_single_inode()
	Linux 4.19.172

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I9b5391e9e955a105ab9c144fa6258dcbea234211
This commit is contained in:
Greg Kroah-Hartman
2021-01-30 15:05:40 +01:00
24 changed files with 453 additions and 288 deletions

View File

@@ -146,6 +146,13 @@ block_size:number
Supported values are 512, 1024, 2048 and 4096 bytes. If not Supported values are 512, 1024, 2048 and 4096 bytes. If not
specified the default block size is 512 bytes. specified the default block size is 512 bytes.
legacy_recalculate
Allow recalculating of volumes with HMAC keys. This is disabled by
default for security reasons - an attacker could modify the volume,
set recalc_sector to zero, and the kernel would not detect the
modification.
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
be changed when reloading the target (load an inactive table and swap the be changed when reloading the target (load an inactive table and swap the
tables with suspend and resume). The other arguments should not be changed tables with suspend and resume). The other arguments should not be changed

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
VERSION = 4 VERSION = 4
PATCHLEVEL = 19 PATCHLEVEL = 19
SUBLEVEL = 171 SUBLEVEL = 172
EXTRAVERSION = EXTRAVERSION =
NAME = "People's Front" NAME = "People's Front"

View File

@@ -650,9 +650,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
spin_lock_irqsave(&mvpwm->lock, flags); spin_lock_irqsave(&mvpwm->lock, flags);
val = (unsigned long long) u = readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm)); val = (unsigned long long) u * NSEC_PER_SEC;
val *= NSEC_PER_SEC;
do_div(val, mvpwm->clk_rate); do_div(val, mvpwm->clk_rate);
if (val > UINT_MAX) if (val > UINT_MAX)
state->duty_cycle = UINT_MAX; state->duty_cycle = UINT_MAX;
@@ -661,21 +660,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
else else
state->duty_cycle = 1; state->duty_cycle = 1;
val = (unsigned long long) val = (unsigned long long) u; /* on duration */
readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm)); /* period = on + off duration */
val += readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
val *= NSEC_PER_SEC; val *= NSEC_PER_SEC;
do_div(val, mvpwm->clk_rate); do_div(val, mvpwm->clk_rate);
if (val < state->duty_cycle) { if (val > UINT_MAX)
state->period = UINT_MAX;
else if (val)
state->period = val;
else
state->period = 1; state->period = 1;
} else {
val -= state->duty_cycle;
if (val > UINT_MAX)
state->period = UINT_MAX;
else if (val)
state->period = val;
else
state->period = 1;
}
regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u); regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u);
if (u) if (u)

View File

@@ -150,9 +150,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev,
} }
if (flush) if (flush)
wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo); wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo);
else if (insert) else if (insert)
wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo, wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo,
raw_data, report_size); raw_data, report_size);
return insert && !flush; return insert && !flush;
@@ -1251,7 +1251,7 @@ static void wacom_devm_kfifo_release(struct device *dev, void *res)
static int wacom_devm_kfifo_alloc(struct wacom *wacom) static int wacom_devm_kfifo_alloc(struct wacom *wacom)
{ {
struct wacom_wac *wacom_wac = &wacom->wacom_wac; struct wacom_wac *wacom_wac = &wacom->wacom_wac;
struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; struct kfifo_rec_ptr_2 *pen_fifo;
int error; int error;
pen_fifo = devres_alloc(wacom_devm_kfifo_release, pen_fifo = devres_alloc(wacom_devm_kfifo_release,
@@ -1268,6 +1268,7 @@ static int wacom_devm_kfifo_alloc(struct wacom *wacom)
} }
devres_add(&wacom->hdev->dev, pen_fifo); devres_add(&wacom->hdev->dev, pen_fifo);
wacom_wac->pen_fifo = pen_fifo;
return 0; return 0;
} }

View File

@@ -344,7 +344,7 @@ struct wacom_wac {
struct input_dev *pen_input; struct input_dev *pen_input;
struct input_dev *touch_input; struct input_dev *touch_input;
struct input_dev *pad_input; struct input_dev *pad_input;
struct kfifo_rec_ptr_2 pen_fifo; struct kfifo_rec_ptr_2 *pen_fifo;
int pid; int pid;
int num_contacts_left; int num_contacts_left;
u8 bt_features; u8 bt_features;

View File

@@ -240,6 +240,7 @@ struct dm_integrity_c {
bool journal_uptodate; bool journal_uptodate;
bool just_formatted; bool just_formatted;
bool legacy_recalculate;
struct alg_spec internal_hash_alg; struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg; struct alg_spec journal_crypt_alg;
@@ -345,6 +346,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic)
return READ_ONCE(ic->failed); return READ_ONCE(ic->failed);
} }
static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
{
if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) &&
!ic->legacy_recalculate)
return true;
return false;
}
static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
unsigned j, unsigned char seq) unsigned j, unsigned char seq)
{ {
@@ -2503,6 +2512,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string;
arg_count += ic->legacy_recalculate;
DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
ic->tag_size, ic->mode, arg_count); ic->tag_size, ic->mode, arg_count);
if (ic->meta_dev) if (ic->meta_dev)
@@ -2516,6 +2526,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
DMEMIT(" commit_time:%u", ic->autocommit_msec); DMEMIT(" commit_time:%u", ic->autocommit_msec);
if (ic->legacy_recalculate)
DMEMIT(" legacy_recalculate");
#define EMIT_ALG(a, n) \ #define EMIT_ALG(a, n) \
do { \ do { \
@@ -3118,7 +3130,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
unsigned extra_args; unsigned extra_args;
struct dm_arg_set as; struct dm_arg_set as;
static const struct dm_arg _args[] = { static const struct dm_arg _args[] = {
{0, 15, "Invalid number of feature args"}, {0, 12, "Invalid number of feature args"},
}; };
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
bool recalculate; bool recalculate;
@@ -3248,6 +3260,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad; goto bad;
} else if (!strcmp(opt_string, "recalculate")) { } else if (!strcmp(opt_string, "recalculate")) {
recalculate = true; recalculate = true;
} else if (!strcmp(opt_string, "legacy_recalculate")) {
ic->legacy_recalculate = true;
} else { } else {
r = -EINVAL; r = -EINVAL;
ti->error = "Invalid argument"; ti->error = "Invalid argument";
@@ -3523,6 +3537,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
} }
} }
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
dm_integrity_disable_recalculate(ic)) {
ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
r = -EOPNOTSUPP;
goto bad;
}
ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL); 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL);
if (IS_ERR(ic->bufio)) { if (IS_ERR(ic->bufio)) {

View File

@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *mm)
/* Notify parent that we're no longer interested in the old VM */ /* Notify parent that we're no longer interested in the old VM */
tsk = current; tsk = current;
old_mm = current->mm; old_mm = current->mm;
mm_release(tsk, old_mm); exec_mm_release(tsk, old_mm);
if (old_mm) { if (old_mm) {
sync_mm_rss(old_mm); sync_mm_rss(old_mm);

View File

@@ -1392,22 +1392,26 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
ret = err; ret = err;
} }
/*
* If the inode has dirty timestamps and we need to write them, call
* mark_inode_dirty_sync() to notify the filesystem about it and to
* change I_DIRTY_TIME into I_DIRTY_SYNC.
*/
if ((inode->i_state & I_DIRTY_TIME) &&
(wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
time_after(jiffies, inode->dirtied_time_when +
dirtytime_expire_interval * HZ))) {
trace_writeback_lazytime(inode);
mark_inode_dirty_sync(inode);
}
/* /*
* Some filesystems may redirty the inode during the writeback * Some filesystems may redirty the inode during the writeback
* due to delalloc, clear dirty metadata flags right before * due to delalloc, clear dirty metadata flags right before
* write_inode() * write_inode()
*/ */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY; dirty = inode->i_state & I_DIRTY;
if ((inode->i_state & I_DIRTY_TIME) &&
((dirty & I_DIRTY_INODE) ||
wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
time_after(jiffies, inode->dirtied_time_when +
dirtytime_expire_interval * HZ))) {
dirty |= I_DIRTY_TIME;
trace_writeback_lazytime(inode);
}
inode->i_state &= ~dirty; inode->i_state &= ~dirty;
/* /*
@@ -1428,8 +1432,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (dirty & I_DIRTY_TIME)
mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */ /* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & ~I_DIRTY_PAGES) { if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc); int err = write_inode(inode, wbc);

View File

@@ -445,8 +445,6 @@ struct compat_kexec_segment;
struct compat_mq_attr; struct compat_mq_attr;
struct compat_msgbuf; struct compat_msgbuf;
extern void compat_exit_robust_list(struct task_struct *curr);
#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)

View File

@@ -2,7 +2,9 @@
#ifndef _LINUX_FUTEX_H #ifndef _LINUX_FUTEX_H
#define _LINUX_FUTEX_H #define _LINUX_FUTEX_H
#include <linux/sched.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <uapi/linux/futex.h> #include <uapi/linux/futex.h>
struct inode; struct inode;
@@ -51,15 +53,35 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX #ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr); enum {
FUTEX_STATE_OK,
FUTEX_STATE_EXITING,
FUTEX_STATE_DEAD,
};
static inline void futex_init_task(struct task_struct *tsk)
{
tsk->robust_list = NULL;
#ifdef CONFIG_COMPAT
tsk->compat_robust_list = NULL;
#endif
INIT_LIST_HEAD(&tsk->pi_state_list);
tsk->pi_state_cache = NULL;
tsk->futex_state = FUTEX_STATE_OK;
mutex_init(&tsk->futex_exit_mutex);
}
void futex_exit_recursive(struct task_struct *tsk);
void futex_exit_release(struct task_struct *tsk);
void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3); u32 __user *uaddr2, u32 val2, u32 val3);
#else #else
static inline void exit_robust_list(struct task_struct *curr) static inline void futex_init_task(struct task_struct *tsk) { }
{ static inline void futex_exit_recursive(struct task_struct *tsk) { }
} static inline void futex_exit_release(struct task_struct *tsk) { }
static inline void futex_exec_release(struct task_struct *tsk) { }
static inline long do_futex(u32 __user *uaddr, int op, u32 val, static inline long do_futex(u32 __user *uaddr, int op, u32 val,
ktime_t *timeout, u32 __user *uaddr2, ktime_t *timeout, u32 __user *uaddr2,
u32 val2, u32 val3) u32 val2, u32 val3)
@@ -68,12 +90,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
} }
#endif #endif
#ifdef CONFIG_FUTEX_PI
extern void exit_pi_state_list(struct task_struct *curr);
#else
static inline void exit_pi_state_list(struct task_struct *curr)
{
}
#endif
#endif #endif

View File

@@ -1080,6 +1080,8 @@ struct task_struct {
#endif #endif
struct list_head pi_state_list; struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache; struct futex_pi_state *pi_state_cache;
struct mutex futex_exit_mutex;
unsigned int futex_state;
#endif #endif
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -1479,7 +1481,6 @@ extern struct pid *cad_pid;
*/ */
#define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */ #define PF_EXITING 0x00000004 /* Getting shut down */
#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */

View File

@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
* succeeds. * succeeds.
*/ */
extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
/* Remove the current tasks stale references to the old mm_struct */ /* Remove the current tasks stale references to the old mm_struct on exit() */
extern void mm_release(struct task_struct *, struct mm_struct *); extern void exit_mm_release(struct task_struct *, struct mm_struct *);
/* Remove the current tasks stale references to the old mm_struct on exec() */
extern void exec_mm_release(struct task_struct *, struct mm_struct *);
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_update_next_owner(struct mm_struct *mm);

View File

@@ -498,7 +498,7 @@ static void exit_mm(void)
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct core_state *core_state; struct core_state *core_state;
mm_release(current, mm); exit_mm_release(current, mm);
if (!mm) if (!mm)
return; return;
sync_mm_rss(mm); sync_mm_rss(mm);
@@ -819,32 +819,12 @@ void __noreturn do_exit(long code)
*/ */
if (unlikely(tsk->flags & PF_EXITING)) { if (unlikely(tsk->flags & PF_EXITING)) {
pr_alert("Fixing recursive fault but reboot is needed!\n"); pr_alert("Fixing recursive fault but reboot is needed!\n");
/* futex_exit_recursive(tsk);
* We can do this unlocked here. The futex code uses
* this flag just to verify whether the pi state
* cleanup has been done or not. In the worst case it
* loops once more. We pretend that the cleanup was
* done as there is no way to return. Either the
* OWNER_DIED bit is set by now or we push the blocked
* task into the wait for ever nirwana as well.
*/
tsk->flags |= PF_EXITPIDONE;
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
schedule(); schedule();
} }
exit_signals(tsk); /* sets PF_EXITING */ exit_signals(tsk); /* sets PF_EXITING */
/*
* Ensure that all new tsk->pi_lock acquisitions must observe
* PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
*/
smp_mb();
/*
* Ensure that we must observe the pi_state in exit_mm() ->
* mm_release() -> exit_pi_state_list().
*/
raw_spin_lock_irq(&tsk->pi_lock);
raw_spin_unlock_irq(&tsk->pi_lock);
/* sync mm's RSS info before statistics gathering */ /* sync mm's RSS info before statistics gathering */
if (tsk->mm) if (tsk->mm)
@@ -919,12 +899,6 @@ void __noreturn do_exit(long code)
* Make sure we are holding no locks: * Make sure we are holding no locks:
*/ */
debug_check_no_locks_held(); debug_check_no_locks_held();
/*
* We can do this unlocked here. The futex code uses this flag
* just to verify whether the pi state cleanup has been done
* or not. In the worst case it loops once more.
*/
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context) if (tsk->io_context)
exit_io_context(tsk); exit_io_context(tsk);

View File

@@ -1232,24 +1232,8 @@ static int wait_for_vfork_done(struct task_struct *child,
* restoring the old one. . . * restoring the old one. . .
* Eric Biederman 10 January 1998 * Eric Biederman 10 January 1998
*/ */
void mm_release(struct task_struct *tsk, struct mm_struct *mm) static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{ {
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
if (unlikely(tsk->robust_list)) {
exit_robust_list(tsk);
tsk->robust_list = NULL;
}
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list)) {
compat_exit_robust_list(tsk);
tsk->compat_robust_list = NULL;
}
#endif
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
#endif
uprobe_free_utask(tsk); uprobe_free_utask(tsk);
/* Get rid of any cached register state */ /* Get rid of any cached register state */
@@ -1282,6 +1266,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
complete_vfork_done(tsk); complete_vfork_done(tsk);
} }
void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
futex_exit_release(tsk);
mm_release(tsk, mm);
}
void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
futex_exec_release(tsk);
mm_release(tsk, mm);
}
/* /*
* Allocate a new mm structure and copy contents from the * Allocate a new mm structure and copy contents from the
* mm structure of the passed in task structure. * mm structure of the passed in task structure.
@@ -2066,14 +2062,8 @@ static __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
p->plug = NULL; p->plug = NULL;
#endif #endif
#ifdef CONFIG_FUTEX futex_init_task(p);
p->robust_list = NULL;
#ifdef CONFIG_COMPAT
p->compat_robust_list = NULL;
#endif
INIT_LIST_HEAD(&p->pi_state_list);
p->pi_state_cache = NULL;
#endif
/* /*
* sigaltstack should be cleared when sharing the same VM * sigaltstack should be cleared when sharing the same VM
*/ */

View File

@@ -341,6 +341,12 @@ static inline bool should_fail_futex(bool fshared)
} }
#endif /* CONFIG_FAIL_FUTEX */ #endif /* CONFIG_FAIL_FUTEX */
#ifdef CONFIG_COMPAT
static void compat_exit_robust_list(struct task_struct *curr);
#else
static inline void compat_exit_robust_list(struct task_struct *curr) { }
#endif
static inline void futex_get_mm(union futex_key *key) static inline void futex_get_mm(union futex_key *key)
{ {
mmgrab(key->private.mm); mmgrab(key->private.mm);
@@ -833,6 +839,29 @@ static struct futex_pi_state *alloc_pi_state(void)
return pi_state; return pi_state;
} }
static void pi_state_update_owner(struct futex_pi_state *pi_state,
struct task_struct *new_owner)
{
struct task_struct *old_owner = pi_state->owner;
lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
if (old_owner) {
raw_spin_lock(&old_owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&old_owner->pi_lock);
}
if (new_owner) {
raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
}
}
static void get_pi_state(struct futex_pi_state *pi_state) static void get_pi_state(struct futex_pi_state *pi_state)
{ {
WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount)); WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
@@ -855,17 +884,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already * and has cleaned up the pi_state already
*/ */
if (pi_state->owner) { if (pi_state->owner) {
struct task_struct *owner;
unsigned long flags; unsigned long flags;
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner; pi_state_update_owner(pi_state, NULL);
if (owner) { rt_mutex_proxy_unlock(&pi_state->pi_mutex);
raw_spin_lock(&owner->pi_lock);
list_del_init(&pi_state->list);
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
} }
@@ -890,7 +913,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* Kernel cleans up PI-state, but userspace is likely hosed. * Kernel cleans up PI-state, but userspace is likely hosed.
* (Robust-futex cleanup is separate and might save the day for userspace.) * (Robust-futex cleanup is separate and might save the day for userspace.)
*/ */
void exit_pi_state_list(struct task_struct *curr) static void exit_pi_state_list(struct task_struct *curr)
{ {
struct list_head *next, *head = &curr->pi_state_list; struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state; struct futex_pi_state *pi_state;
@@ -960,7 +983,8 @@ void exit_pi_state_list(struct task_struct *curr)
} }
raw_spin_unlock_irq(&curr->pi_lock); raw_spin_unlock_irq(&curr->pi_lock);
} }
#else
static inline void exit_pi_state_list(struct task_struct *curr) { }
#endif #endif
/* /*
@@ -1010,7 +1034,8 @@ void exit_pi_state_list(struct task_struct *curr)
* FUTEX_OWNER_DIED bit. See [4] * FUTEX_OWNER_DIED bit. See [4]
* *
* [10] There is no transient state which leaves owner and user space * [10] There is no transient state which leaves owner and user space
* TID out of sync. * TID out of sync. Except one error case where the kernel is denied
* write access to the user address, see fixup_pi_state_owner().
* *
* *
* Serialization and lifetime rules: * Serialization and lifetime rules:
@@ -1169,16 +1194,47 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
return ret; return ret;
} }
/**
* wait_for_owner_exiting - Block until the owner has exited
* @exiting: Pointer to the exiting task
*
* Caller must hold a refcount on @exiting.
*/
static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
{
if (ret != -EBUSY) {
WARN_ON_ONCE(exiting);
return;
}
if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
return;
mutex_lock(&exiting->futex_exit_mutex);
/*
* No point in doing state checking here. If the waiter got here
* while the task was in exec()->exec_futex_release() then it can
* have any FUTEX_STATE_* value when the waiter has acquired the
* mutex. OK, if running, EXITING or DEAD if it reached exit()
* already. Highly unlikely and not a problem. Just one more round
* through the futex maze.
*/
mutex_unlock(&exiting->futex_exit_mutex);
put_task_struct(exiting);
}
static int handle_exit_race(u32 __user *uaddr, u32 uval, static int handle_exit_race(u32 __user *uaddr, u32 uval,
struct task_struct *tsk) struct task_struct *tsk)
{ {
u32 uval2; u32 uval2;
/* /*
* If PF_EXITPIDONE is not yet set, then try again. * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
* caller that the alleged owner is busy.
*/ */
if (tsk && !(tsk->flags & PF_EXITPIDONE)) if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
return -EAGAIN; return -EBUSY;
/* /*
* Reread the user space value to handle the following situation: * Reread the user space value to handle the following situation:
@@ -1196,8 +1252,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
* *uaddr = 0xC0000000; tsk = get_task(PID); * *uaddr = 0xC0000000; tsk = get_task(PID);
* } if (!tsk->flags & PF_EXITING) { * } if (!tsk->flags & PF_EXITING) {
* ... attach(); * ... attach();
* tsk->flags |= PF_EXITPIDONE; } else { * tsk->futex_state = } else {
* if (!(tsk->flags & PF_EXITPIDONE)) * FUTEX_STATE_DEAD; if (tsk->futex_state !=
* FUTEX_STATE_DEAD)
* return -EAGAIN; * return -EAGAIN;
* return -ESRCH; <--- FAIL * return -ESRCH; <--- FAIL
* } * }
@@ -1228,7 +1285,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
* it after doing proper sanity checks. * it after doing proper sanity checks.
*/ */
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
struct futex_pi_state **ps) struct futex_pi_state **ps,
struct task_struct **exiting)
{ {
pid_t pid = uval & FUTEX_TID_MASK; pid_t pid = uval & FUTEX_TID_MASK;
struct futex_pi_state *pi_state; struct futex_pi_state *pi_state;
@@ -1253,22 +1311,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
} }
/* /*
* We need to look at the task state flags to figure out, * We need to look at the task state to figure out, whether the
* whether the task is exiting. To protect against the do_exit * task is exiting. To protect against the change of the task state
* change of the task flags, we do this protected by * in futex_exit_release(), we do this protected by p->pi_lock:
* p->pi_lock:
*/ */
raw_spin_lock_irq(&p->pi_lock); raw_spin_lock_irq(&p->pi_lock);
if (unlikely(p->flags & PF_EXITING)) { if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
/* /*
* The task is on the way out. When PF_EXITPIDONE is * The task is on the way out. When the futex state is
* set, we know that the task has finished the * FUTEX_STATE_DEAD, we know that the task has finished
* cleanup: * the cleanup:
*/ */
int ret = handle_exit_race(uaddr, uval, p); int ret = handle_exit_race(uaddr, uval, p);
raw_spin_unlock_irq(&p->pi_lock); raw_spin_unlock_irq(&p->pi_lock);
put_task_struct(p); /*
* If the owner task is between FUTEX_STATE_EXITING and
* FUTEX_STATE_DEAD then store the task pointer and keep
* the reference on the task struct. The calling code will
* drop all locks, wait for the task to reach
* FUTEX_STATE_DEAD and then drop the refcount. This is
* required to prevent a live lock when the current task
* preempted the exiting task between the two states.
*/
if (ret == -EBUSY)
*exiting = p;
else
put_task_struct(p);
return ret; return ret;
} }
@@ -1307,7 +1376,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
static int lookup_pi_state(u32 __user *uaddr, u32 uval, static int lookup_pi_state(u32 __user *uaddr, u32 uval,
struct futex_hash_bucket *hb, struct futex_hash_bucket *hb,
union futex_key *key, struct futex_pi_state **ps) union futex_key *key, struct futex_pi_state **ps,
struct task_struct **exiting)
{ {
struct futex_q *top_waiter = futex_top_waiter(hb, key); struct futex_q *top_waiter = futex_top_waiter(hb, key);
@@ -1322,7 +1392,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
* We are the first waiter - try to look up the owner based on * We are the first waiter - try to look up the owner based on
* @uval and attach to it. * @uval and attach to it.
*/ */
return attach_to_pi_owner(uaddr, uval, key, ps); return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
} }
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1350,6 +1420,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* lookup * lookup
* @task: the task to perform the atomic lock work for. This will * @task: the task to perform the atomic lock work for. This will
* be "current" except in the case of requeue pi. * be "current" except in the case of requeue pi.
* @exiting: Pointer to store the task pointer of the owner task
* which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
* *
* Return: * Return:
@@ -1358,11 +1430,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
* - <0 - error * - <0 - error
* *
* The hb->lock and futex_key refs shall be held by the caller. * The hb->lock and futex_key refs shall be held by the caller.
*
* @exiting is only set when the return value is -EBUSY. If so, this holds
* a refcount on the exiting task on return and the caller needs to drop it
* after waiting for the exit to complete.
*/ */
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
union futex_key *key, union futex_key *key,
struct futex_pi_state **ps, struct futex_pi_state **ps,
struct task_struct *task, int set_waiters) struct task_struct *task,
struct task_struct **exiting,
int set_waiters)
{ {
u32 uval, newval, vpid = task_pid_vnr(task); u32 uval, newval, vpid = task_pid_vnr(task);
struct futex_q *top_waiter; struct futex_q *top_waiter;
@@ -1432,7 +1510,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* attach to the owner. If that fails, no harm done, we only * attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable. * set the FUTEX_WAITERS bit in the user space variable.
*/ */
return attach_to_pi_owner(uaddr, newval, key, ps); return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
} }
/** /**
@@ -1537,26 +1615,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
ret = -EINVAL; ret = -EINVAL;
} }
if (ret) if (!ret) {
goto out_unlock; /*
* This is a point of no return; once we modified the uval
/* * there is no going back and subsequent operations must
* This is a point of no return; once we modify the uval there is no * not fail.
* going back and subsequent operations must not fail. */
*/ pi_state_update_owner(pi_state, new_owner);
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
raw_spin_lock(&pi_state->owner->pi_lock); }
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&pi_state->owner->pi_lock);
raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
out_unlock: out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
@@ -1853,6 +1920,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
* @key1: the from futex key * @key1: the from futex key
* @key2: the to futex key * @key2: the to futex key
* @ps: address to store the pi_state pointer * @ps: address to store the pi_state pointer
* @exiting: Pointer to store the task pointer of the owner task
* which is in the middle of exiting
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
* *
* Try and get the lock on behalf of the top waiter if we can do it atomically. * Try and get the lock on behalf of the top waiter if we can do it atomically.
@@ -1860,16 +1929,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
* hb1 and hb2 must be held by the caller. * hb1 and hb2 must be held by the caller.
* *
* @exiting is only set when the return value is -EBUSY. If so, this holds
* a refcount on the exiting task on return and the caller needs to drop it
* after waiting for the exit to complete.
*
* Return: * Return:
* - 0 - failed to acquire the lock atomically; * - 0 - failed to acquire the lock atomically;
* - >0 - acquired the lock, return value is vpid of the top_waiter * - >0 - acquired the lock, return value is vpid of the top_waiter
* - <0 - error * - <0 - error
*/ */
static int futex_proxy_trylock_atomic(u32 __user *pifutex, static int
struct futex_hash_bucket *hb1, futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
struct futex_hash_bucket *hb2, struct futex_hash_bucket *hb2, union futex_key *key1,
union futex_key *key1, union futex_key *key2, union futex_key *key2, struct futex_pi_state **ps,
struct futex_pi_state **ps, int set_waiters) struct task_struct **exiting, int set_waiters)
{ {
struct futex_q *top_waiter = NULL; struct futex_q *top_waiter = NULL;
u32 curval; u32 curval;
@@ -1906,7 +1979,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
*/ */
vpid = task_pid_vnr(top_waiter->task); vpid = task_pid_vnr(top_waiter->task);
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters); exiting, set_waiters);
if (ret == 1) { if (ret == 1) {
requeue_pi_wake_futex(top_waiter, key2, hb2); requeue_pi_wake_futex(top_waiter, key2, hb2);
return vpid; return vpid;
@@ -2035,6 +2108,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
} }
if (requeue_pi && (task_count - nr_wake < nr_requeue)) { if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
struct task_struct *exiting = NULL;
/* /*
* Attempt to acquire uaddr2 and wake the top waiter. If we * Attempt to acquire uaddr2 and wake the top waiter. If we
* intend to requeue waiters, force setting the FUTEX_WAITERS * intend to requeue waiters, force setting the FUTEX_WAITERS
@@ -2042,7 +2117,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* faults rather in the requeue loop below. * faults rather in the requeue loop below.
*/ */
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
&key2, &pi_state, nr_requeue); &key2, &pi_state,
&exiting, nr_requeue);
/* /*
* At this point the top_waiter has either taken uaddr2 or is * At this point the top_waiter has either taken uaddr2 or is
@@ -2069,7 +2145,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* If that call succeeds then we have pi_state and an * If that call succeeds then we have pi_state and an
* initial refcount on it. * initial refcount on it.
*/ */
ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
&pi_state, &exiting);
} }
switch (ret) { switch (ret) {
@@ -2087,17 +2164,24 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
if (!ret) if (!ret)
goto retry; goto retry;
goto out; goto out;
case -EBUSY:
case -EAGAIN: case -EAGAIN:
/* /*
* Two reasons for this: * Two reasons for this:
* - Owner is exiting and we just wait for the * - EBUSY: Owner is exiting and we just wait for the
* exit to complete. * exit to complete.
* - The user space value changed. * - EAGAIN: The user space value changed.
*/ */
double_unlock_hb(hb1, hb2); double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2); hb_waiters_dec(hb2);
put_futex_key(&key2); put_futex_key(&key2);
put_futex_key(&key1); put_futex_key(&key1);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
* this task might loop forever, aka. live lock.
*/
wait_for_owner_exiting(ret, exiting);
cond_resched(); cond_resched();
goto retry; goto retry;
default: default:
@@ -2362,18 +2446,13 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr); spin_unlock(q->lock_ptr);
} }
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner) struct task_struct *argowner)
{ {
u32 uval, uninitialized_var(curval), newval, newtid;
struct futex_pi_state *pi_state = q->pi_state; struct futex_pi_state *pi_state = q->pi_state;
u32 uval, uninitialized_var(curval), newval;
struct task_struct *oldowner, *newowner; struct task_struct *oldowner, *newowner;
u32 newtid; int err = 0;
int ret, err = 0;
lockdep_assert_held(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
oldowner = pi_state->owner; oldowner = pi_state->owner;
@@ -2407,14 +2486,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* We raced against a concurrent self; things are * We raced against a concurrent self; things are
* already fixed up. Nothing to do. * already fixed up. Nothing to do.
*/ */
ret = 0; return 0;
goto out_unlock;
} }
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
/* We got the lock after all, nothing to fix. */ /* We got the lock. pi_state is correct. Tell caller. */
ret = 0; return 1;
goto out_unlock;
} }
/* /*
@@ -2441,8 +2518,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* We raced against a concurrent self; things are * We raced against a concurrent self; things are
* already fixed up. Nothing to do. * already fixed up. Nothing to do.
*/ */
ret = 0; return 1;
goto out_unlock;
} }
newowner = argowner; newowner = argowner;
} }
@@ -2472,22 +2548,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* We fixed up user space. Now we need to fix the pi_state * We fixed up user space. Now we need to fix the pi_state
* itself. * itself.
*/ */
if (pi_state->owner != NULL) { pi_state_update_owner(pi_state, newowner);
raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&pi_state->owner->pi_lock);
}
pi_state->owner = newowner; return argowner == current;
raw_spin_lock(&newowner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &newowner->pi_state_list);
raw_spin_unlock(&newowner->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return 0;
/* /*
* In order to reschedule or handle a page fault, we need to drop the * In order to reschedule or handle a page fault, we need to drop the
@@ -2508,17 +2571,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
switch (err) { switch (err) {
case -EFAULT: case -EFAULT:
ret = fault_in_user_writeable(uaddr); err = fault_in_user_writeable(uaddr);
break; break;
case -EAGAIN: case -EAGAIN:
cond_resched(); cond_resched();
ret = 0; err = 0;
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
ret = err;
break; break;
} }
@@ -2528,17 +2590,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
/* /*
* Check if someone else fixed it for us: * Check if someone else fixed it for us:
*/ */
if (pi_state->owner != oldowner) { if (pi_state->owner != oldowner)
ret = 0; return argowner == current;
goto out_unlock;
}
if (ret) /* Retry if err was -EAGAIN or the fault in succeeded */
goto out_unlock; if (!err)
goto retry;
goto retry; /*
* fault_in_user_writeable() failed so user state is immutable. At
* best we can make the kernel state consistent but user state will
* be most likely hosed and any subsequent unlock operation will be
* rejected due to PI futex rule [10].
*
* Ensure that the rtmutex owner is also the pi_state owner despite
* the user space value claiming something different. There is no
* point in unlocking the rtmutex if current is the owner as it
* would need to wait until the next waiter has taken the rtmutex
* to guarantee consistent state. Keep it simple. Userspace asked
* for this wreckaged state.
*
* The rtmutex has an owner - either current or some other
* task. See the EAGAIN loop above.
*/
pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
out_unlock: return err;
}
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner)
{
struct futex_pi_state *pi_state = q->pi_state;
int ret;
lockdep_assert_held(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
ret = __fixup_pi_state_owner(uaddr, q, argowner);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret; return ret;
} }
@@ -2562,8 +2651,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/ */
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{ {
int ret = 0;
if (locked) { if (locked) {
/* /*
* Got the lock. We might not be the anticipated owner if we * Got the lock. We might not be the anticipated owner if we
@@ -2574,8 +2661,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention. * stable state, anything else needs more attention.
*/ */
if (q->pi_state->owner != current) if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current); return fixup_pi_state_owner(uaddr, q, current);
goto out; return 1;
} }
/* /*
@@ -2586,24 +2673,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable * Another speculative read; pi_state->owner == current is unstable
* but needs our attention. * but needs our attention.
*/ */
if (q->pi_state->owner == current) { if (q->pi_state->owner == current)
ret = fixup_pi_state_owner(uaddr, q, NULL); return fixup_pi_state_owner(uaddr, q, NULL);
goto out;
}
/* /*
* Paranoia check. If we did not take the lock, then we should not be * Paranoia check. If we did not take the lock, then we should not be
* the owner of the rt_mutex. * the owner of the rt_mutex. Warn and establish consistent state.
*/ */
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " return fixup_pi_state_owner(uaddr, q, current);
"pi-state %p\n", ret,
q->pi_state->pi_mutex.owner,
q->pi_state->owner);
}
out: return 0;
return ret ? ret : locked;
} }
/** /**
@@ -2824,7 +2904,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock) ktime_t *time, int trylock)
{ {
struct hrtimer_sleeper timeout, *to = NULL; struct hrtimer_sleeper timeout, *to = NULL;
struct futex_pi_state *pi_state = NULL; struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter; struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init; struct futex_q q = futex_q_init;
@@ -2852,7 +2932,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
retry_private: retry_private:
hb = queue_lock(&q); hb = queue_lock(&q);
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
&exiting, 0);
if (unlikely(ret)) { if (unlikely(ret)) {
/* /*
* Atomic work succeeded and we got the lock, * Atomic work succeeded and we got the lock,
@@ -2865,15 +2946,22 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
goto out_unlock_put_key; goto out_unlock_put_key;
case -EFAULT: case -EFAULT:
goto uaddr_faulted; goto uaddr_faulted;
case -EBUSY:
case -EAGAIN: case -EAGAIN:
/* /*
* Two reasons for this: * Two reasons for this:
* - Task is exiting and we just wait for the * - EBUSY: Task is exiting and we just wait for the
* exit to complete. * exit to complete.
* - The user space value changed. * - EAGAIN: The user space value changed.
*/ */
queue_unlock(hb); queue_unlock(hb);
put_futex_key(&q.key); put_futex_key(&q.key);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
* this task might loop forever, aka. live lock.
*/
wait_for_owner_exiting(ret, exiting);
cond_resched(); cond_resched();
goto retry; goto retry;
default: default:
@@ -2958,23 +3046,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
if (res) if (res)
ret = (res < 0) ? res : 0; ret = (res < 0) ? res : 0;
/*
* If fixup_owner() faulted and was unable to handle the fault, unlock
* it and return the fault to userspace.
*/
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* Unqueue and drop the lock */ /* Unqueue and drop the lock */
unqueue_me_pi(&q); unqueue_me_pi(&q);
if (pi_state) {
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
}
goto out_put_key; goto out_put_key;
out_unlock_put_key: out_unlock_put_key:
@@ -3240,7 +3314,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 __user *uaddr2) u32 __user *uaddr2)
{ {
struct hrtimer_sleeper timeout, *to = NULL; struct hrtimer_sleeper timeout, *to = NULL;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter; struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT; union futex_key key2 = FUTEX_KEY_INIT;
@@ -3325,16 +3398,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) { if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr); spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current); ret = fixup_pi_state_owner(uaddr2, &q, current);
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* /*
* Drop the reference to the pi state which * Drop the reference to the pi state which
* the requeue_pi() code acquired for us. * the requeue_pi() code acquired for us.
*/ */
put_pi_state(q.pi_state); put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr); spin_unlock(q.lock_ptr);
/*
* Adjust the return value. It's either -EFAULT or
* success (1) but the caller expects 0 for success.
*/
ret = ret < 0 ? ret : 0;
} }
} else { } else {
struct rt_mutex *pi_mutex; struct rt_mutex *pi_mutex;
@@ -3365,25 +3439,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (res) if (res)
ret = (res < 0) ? res : 0; ret = (res < 0) ? res : 0;
/*
* If fixup_pi_state_owner() faulted and was unable to handle
* the fault, unlock the rt_mutex and return the fault to
* userspace.
*/
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* Unqueue and drop the lock. */ /* Unqueue and drop the lock. */
unqueue_me_pi(&q); unqueue_me_pi(&q);
} }
if (pi_state) {
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
}
if (ret == -EINTR) { if (ret == -EINTR) {
/* /*
* We've already been requeued, but cannot restart by calling * We've already been requeued, but cannot restart by calling
@@ -3625,7 +3684,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
* *
* We silently return on any sign of list-walking problem. * We silently return on any sign of list-walking problem.
*/ */
void exit_robust_list(struct task_struct *curr) static void exit_robust_list(struct task_struct *curr)
{ {
struct robust_list_head __user *head = curr->robust_list; struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *next_entry, *pending; struct robust_list __user *entry, *next_entry, *pending;
@@ -3690,6 +3749,114 @@ void exit_robust_list(struct task_struct *curr)
} }
} }
static void futex_cleanup(struct task_struct *tsk)
{
if (unlikely(tsk->robust_list)) {
exit_robust_list(tsk);
tsk->robust_list = NULL;
}
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list)) {
compat_exit_robust_list(tsk);
tsk->compat_robust_list = NULL;
}
#endif
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
}
/**
* futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
* @tsk: task to set the state on
*
* Set the futex exit state of the task lockless. The futex waiter code
* observes that state when a task is exiting and loops until the task has
* actually finished the futex cleanup. The worst case for this is that the
* waiter runs through the wait loop until the state becomes visible.
*
* This is called from the recursive fault handling path in do_exit().
*
* This is best effort. Either the futex exit code has run already or
* not. If the OWNER_DIED bit has been set on the futex then the waiter can
* take it over. If not, the problem is pushed back to user space. If the
* futex exit code did not run yet, then an already queued waiter might
* block forever, but there is nothing which can be done about that.
*/
void futex_exit_recursive(struct task_struct *tsk)
{
/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
if (tsk->futex_state == FUTEX_STATE_EXITING)
mutex_unlock(&tsk->futex_exit_mutex);
tsk->futex_state = FUTEX_STATE_DEAD;
}
static void futex_cleanup_begin(struct task_struct *tsk)
{
/*
* Prevent various race issues against a concurrent incoming waiter
* including live locks by forcing the waiter to block on
* tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
* attach_to_pi_owner().
*/
mutex_lock(&tsk->futex_exit_mutex);
/*
* Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
*
* This ensures that all subsequent checks of tsk->futex_state in
* attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
* tsk->pi_lock held.
*
* It guarantees also that a pi_state which was queued right before
* the state change under tsk->pi_lock by a concurrent waiter must
* be observed in exit_pi_state_list().
*/
raw_spin_lock_irq(&tsk->pi_lock);
tsk->futex_state = FUTEX_STATE_EXITING;
raw_spin_unlock_irq(&tsk->pi_lock);
}
static void futex_cleanup_end(struct task_struct *tsk, int state)
{
/*
* Lockless store. The only side effect is that an observer might
* take another loop until it becomes visible.
*/
tsk->futex_state = state;
/*
* Drop the exit protection. This unblocks waiters which observed
* FUTEX_STATE_EXITING to reevaluate the state.
*/
mutex_unlock(&tsk->futex_exit_mutex);
}
void futex_exec_release(struct task_struct *tsk)
{
/*
* The state handling is done for consistency, but in the case of
* exec() there is no way to prevent futher damage as the PID stays
* the same. But for the unlikely and arguably buggy case that a
* futex is held on exec(), this provides at least as much state
* consistency protection which is possible.
*/
futex_cleanup_begin(tsk);
futex_cleanup(tsk);
/*
* Reset the state to FUTEX_STATE_OK. The task is alive and about
* exec a new binary.
*/
futex_cleanup_end(tsk, FUTEX_STATE_OK);
}
void futex_exit_release(struct task_struct *tsk)
{
futex_cleanup_begin(tsk);
futex_cleanup(tsk);
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
}
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3) u32 __user *uaddr2, u32 val2, u32 val3)
{ {
@@ -3817,7 +3984,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
* *
* We silently return on any sign of list-walking problem. * We silently return on any sign of list-walking problem.
*/ */
void compat_exit_robust_list(struct task_struct *curr) static void compat_exit_robust_list(struct task_struct *curr)
{ {
struct compat_robust_list_head __user *head = curr->compat_robust_list; struct compat_robust_list_head __user *head = curr->compat_robust_list;
struct robust_list __user *entry, *next_entry, *pending; struct robust_list __user *entry, *next_entry, *pending;

View File

@@ -1719,8 +1719,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
* possible because it belongs to the pi_state which is about to be freed * possible because it belongs to the pi_state which is about to be freed
* and it is not longer visible to other tasks. * and it is not longer visible to other tasks.
*/ */
void rt_mutex_proxy_unlock(struct rt_mutex *lock, void rt_mutex_proxy_unlock(struct rt_mutex *lock)
struct task_struct *proxy_owner)
{ {
debug_rt_mutex_proxy_unlock(lock); debug_rt_mutex_proxy_unlock(lock);
rt_mutex_set_owner(lock, NULL); rt_mutex_set_owner(lock, NULL);

View File

@@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner); struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
struct task_struct *proxy_owner);
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter, struct rt_mutex_waiter *waiter,

View File

@@ -4393,6 +4393,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpumask_test_cpu(cpu, buffer->cpumask)) if (!cpumask_test_cpu(cpu, buffer->cpumask))
return; return;
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
atomic_inc(&buffer->resize_disabled); atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled); atomic_inc(&cpu_buffer->record_disabled);
@@ -4416,6 +4418,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&buffer->resize_disabled); atomic_dec(&buffer->resize_disabled);
mutex_unlock(&buffer->mutex);
} }
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);

View File

@@ -5815,10 +5815,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
s->kobj.kset = kset; s->kobj.kset = kset;
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
if (err) { if (err)
kobject_put(&s->kobj);
goto out; goto out;
}
err = sysfs_create_group(&s->kobj, &slab_attr_group); err = sysfs_create_group(&s->kobj, &slab_attr_group);
if (err) if (err)

View File

@@ -15,10 +15,6 @@ endef
$(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,LD,$(CROSS_COMPILE)ld) $(call allow-override,LD,$(CROSS_COMPILE)ld)
HOSTCC ?= gcc
HOSTLD ?= ld
HOSTAR ?= ar
export HOSTCC HOSTLD HOSTAR export HOSTCC HOSTLD HOSTAR
ifeq ($(V),1) ifeq ($(V),1)

View File

@@ -7,15 +7,6 @@ ARCH := x86
endif endif
# always use the host compiler # always use the host compiler
ifneq ($(LLVM),)
HOSTAR ?= llvm-ar
HOSTCC ?= clang
HOSTLD ?= ld.lld
else
HOSTAR ?= ar
HOSTCC ?= gcc
HOSTLD ?= ld
endif
AR = $(HOSTAR) AR = $(HOSTAR)
CC = $(HOSTCC) CC = $(HOSTCC)
LD = $(HOSTLD) LD = $(HOSTLD)

View File

@@ -148,10 +148,6 @@ endef
LD += $(EXTRA_LDFLAGS) LD += $(EXTRA_LDFLAGS)
HOSTCC ?= gcc
HOSTLD ?= ld
HOSTAR ?= ar
PKG_CONFIG = $(CROSS_COMPILE)pkg-config PKG_CONFIG = $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config LLVM_CONFIG ?= llvm-config

View File

@@ -57,7 +57,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
CROSS_COMPILE ?= $(CROSS) CROSS_COMPILE ?= $(CROSS)
LD = $(CC) LD = $(CC)
HOSTCC = gcc
# check if compiler option is supported # check if compiler option is supported
cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}

View File

@@ -60,6 +60,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
$(call allow-override,CXX,$(CROSS_COMPILE)g++) $(call allow-override,CXX,$(CROSS_COMPILE)g++)
$(call allow-override,STRIP,$(CROSS_COMPILE)strip) $(call allow-override,STRIP,$(CROSS_COMPILE)strip)
ifneq ($(LLVM),)
HOSTAR ?= llvm-ar
HOSTCC ?= clang
HOSTLD ?= ld.lld
else
HOSTAR ?= ar
HOSTCC ?= gcc
HOSTLD ?= ld
endif
ifeq ($(CC_NO_CLANG), 1) ifeq ($(CC_NO_CLANG), 1)
EXTRA_WARNINGS += -Wstrict-aliasing=3 EXTRA_WARNINGS += -Wstrict-aliasing=3
endif endif