Merge 4.19.172 into android-4.19-stable
Changes in 4.19.172 gpio: mvebu: fix pwm .get_state period calculation Revert "mm/slub: fix a memory leak in sysfs_slab_add()" futex: Move futex exit handling into futex code futex: Replace PF_EXITPIDONE with a state exit/exec: Seperate mm_release() futex: Split futex_mm_release() for exit/exec futex: Set task::futex_state to DEAD right after handling futex exit futex: Mark the begin of futex exit explicitly futex: Sanitize exit state handling futex: Provide state handling for exec() as well futex: Add mutex around futex exit futex: Provide distinct return value when owner is exiting futex: Prevent exit livelock futex: Ensure the correct return value from futex_lock_pi() futex: Replace pointless printk in fixup_owner() futex: Provide and use pi_state_update_owner() rtmutex: Remove unused argument from rt_mutex_proxy_unlock() futex: Use pi_state_update_owner() in put_pi_state() futex: Simplify fixup_pi_state_owner() futex: Handle faults correctly for PI futexes HID: wacom: Correct NULL dereference on AES pen proximity tracing: Fix race in trace_open and buffer resize call tools: Factor HOSTCC, HOSTLD, HOSTAR definitions dm integrity: conditionally disable "recalculate" feature writeback: Drop I_DIRTY_TIME_EXPIRE fs: fix lazytime expiration handling in __writeback_single_inode() Linux 4.19.172 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I9b5391e9e955a105ab9c144fa6258dcbea234211
This commit is contained in:
@@ -146,6 +146,13 @@ block_size:number
|
||||
Supported values are 512, 1024, 2048 and 4096 bytes. If not
|
||||
specified the default block size is 512 bytes.
|
||||
|
||||
legacy_recalculate
|
||||
Allow recalculating of volumes with HMAC keys. This is disabled by
|
||||
default for security reasons - an attacker could modify the volume,
|
||||
set recalc_sector to zero, and the kernel would not detect the
|
||||
modification.
|
||||
|
||||
|
||||
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
|
||||
be changed when reloading the target (load an inactive table and swap the
|
||||
tables with suspend and resume). The other arguments should not be changed
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 19
|
||||
SUBLEVEL = 171
|
||||
SUBLEVEL = 172
|
||||
EXTRAVERSION =
|
||||
NAME = "People's Front"
|
||||
|
||||
|
||||
@@ -650,9 +650,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
|
||||
|
||||
spin_lock_irqsave(&mvpwm->lock, flags);
|
||||
|
||||
val = (unsigned long long)
|
||||
readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
|
||||
val *= NSEC_PER_SEC;
|
||||
u = readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
|
||||
val = (unsigned long long) u * NSEC_PER_SEC;
|
||||
do_div(val, mvpwm->clk_rate);
|
||||
if (val > UINT_MAX)
|
||||
state->duty_cycle = UINT_MAX;
|
||||
@@ -661,21 +660,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
|
||||
else
|
||||
state->duty_cycle = 1;
|
||||
|
||||
val = (unsigned long long)
|
||||
readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
|
||||
val = (unsigned long long) u; /* on duration */
|
||||
/* period = on + off duration */
|
||||
val += readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
|
||||
val *= NSEC_PER_SEC;
|
||||
do_div(val, mvpwm->clk_rate);
|
||||
if (val < state->duty_cycle) {
|
||||
if (val > UINT_MAX)
|
||||
state->period = UINT_MAX;
|
||||
else if (val)
|
||||
state->period = val;
|
||||
else
|
||||
state->period = 1;
|
||||
} else {
|
||||
val -= state->duty_cycle;
|
||||
if (val > UINT_MAX)
|
||||
state->period = UINT_MAX;
|
||||
else if (val)
|
||||
state->period = val;
|
||||
else
|
||||
state->period = 1;
|
||||
}
|
||||
|
||||
regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u);
|
||||
if (u)
|
||||
|
||||
@@ -150,9 +150,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev,
|
||||
}
|
||||
|
||||
if (flush)
|
||||
wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo);
|
||||
wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo);
|
||||
else if (insert)
|
||||
wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo,
|
||||
wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo,
|
||||
raw_data, report_size);
|
||||
|
||||
return insert && !flush;
|
||||
@@ -1251,7 +1251,7 @@ static void wacom_devm_kfifo_release(struct device *dev, void *res)
|
||||
static int wacom_devm_kfifo_alloc(struct wacom *wacom)
|
||||
{
|
||||
struct wacom_wac *wacom_wac = &wacom->wacom_wac;
|
||||
struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo;
|
||||
struct kfifo_rec_ptr_2 *pen_fifo;
|
||||
int error;
|
||||
|
||||
pen_fifo = devres_alloc(wacom_devm_kfifo_release,
|
||||
@@ -1268,6 +1268,7 @@ static int wacom_devm_kfifo_alloc(struct wacom *wacom)
|
||||
}
|
||||
|
||||
devres_add(&wacom->hdev->dev, pen_fifo);
|
||||
wacom_wac->pen_fifo = pen_fifo;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -344,7 +344,7 @@ struct wacom_wac {
|
||||
struct input_dev *pen_input;
|
||||
struct input_dev *touch_input;
|
||||
struct input_dev *pad_input;
|
||||
struct kfifo_rec_ptr_2 pen_fifo;
|
||||
struct kfifo_rec_ptr_2 *pen_fifo;
|
||||
int pid;
|
||||
int num_contacts_left;
|
||||
u8 bt_features;
|
||||
|
||||
@@ -240,6 +240,7 @@ struct dm_integrity_c {
|
||||
|
||||
bool journal_uptodate;
|
||||
bool just_formatted;
|
||||
bool legacy_recalculate;
|
||||
|
||||
struct alg_spec internal_hash_alg;
|
||||
struct alg_spec journal_crypt_alg;
|
||||
@@ -345,6 +346,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic)
|
||||
return READ_ONCE(ic->failed);
|
||||
}
|
||||
|
||||
static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
|
||||
{
|
||||
if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) &&
|
||||
!ic->legacy_recalculate)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
|
||||
unsigned j, unsigned char seq)
|
||||
{
|
||||
@@ -2503,6 +2512,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
|
||||
arg_count += !!ic->internal_hash_alg.alg_string;
|
||||
arg_count += !!ic->journal_crypt_alg.alg_string;
|
||||
arg_count += !!ic->journal_mac_alg.alg_string;
|
||||
arg_count += ic->legacy_recalculate;
|
||||
DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
|
||||
ic->tag_size, ic->mode, arg_count);
|
||||
if (ic->meta_dev)
|
||||
@@ -2516,6 +2526,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
|
||||
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
|
||||
DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
|
||||
DMEMIT(" commit_time:%u", ic->autocommit_msec);
|
||||
if (ic->legacy_recalculate)
|
||||
DMEMIT(" legacy_recalculate");
|
||||
|
||||
#define EMIT_ALG(a, n) \
|
||||
do { \
|
||||
@@ -3118,7 +3130,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
unsigned extra_args;
|
||||
struct dm_arg_set as;
|
||||
static const struct dm_arg _args[] = {
|
||||
{0, 15, "Invalid number of feature args"},
|
||||
{0, 12, "Invalid number of feature args"},
|
||||
};
|
||||
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
|
||||
bool recalculate;
|
||||
@@ -3248,6 +3260,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
goto bad;
|
||||
} else if (!strcmp(opt_string, "recalculate")) {
|
||||
recalculate = true;
|
||||
} else if (!strcmp(opt_string, "legacy_recalculate")) {
|
||||
ic->legacy_recalculate = true;
|
||||
} else {
|
||||
r = -EINVAL;
|
||||
ti->error = "Invalid argument";
|
||||
@@ -3523,6 +3537,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
|
||||
le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
|
||||
dm_integrity_disable_recalculate(ic)) {
|
||||
ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
|
||||
r = -EOPNOTSUPP;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
|
||||
1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL);
|
||||
if (IS_ERR(ic->bufio)) {
|
||||
|
||||
@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *mm)
|
||||
/* Notify parent that we're no longer interested in the old VM */
|
||||
tsk = current;
|
||||
old_mm = current->mm;
|
||||
mm_release(tsk, old_mm);
|
||||
exec_mm_release(tsk, old_mm);
|
||||
|
||||
if (old_mm) {
|
||||
sync_mm_rss(old_mm);
|
||||
|
||||
@@ -1392,22 +1392,26 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the inode has dirty timestamps and we need to write them, call
|
||||
* mark_inode_dirty_sync() to notify the filesystem about it and to
|
||||
* change I_DIRTY_TIME into I_DIRTY_SYNC.
|
||||
*/
|
||||
if ((inode->i_state & I_DIRTY_TIME) &&
|
||||
(wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
|
||||
time_after(jiffies, inode->dirtied_time_when +
|
||||
dirtytime_expire_interval * HZ))) {
|
||||
trace_writeback_lazytime(inode);
|
||||
mark_inode_dirty_sync(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Some filesystems may redirty the inode during the writeback
|
||||
* due to delalloc, clear dirty metadata flags right before
|
||||
* write_inode()
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
|
||||
dirty = inode->i_state & I_DIRTY;
|
||||
if ((inode->i_state & I_DIRTY_TIME) &&
|
||||
((dirty & I_DIRTY_INODE) ||
|
||||
wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
|
||||
time_after(jiffies, inode->dirtied_time_when +
|
||||
dirtytime_expire_interval * HZ))) {
|
||||
dirty |= I_DIRTY_TIME;
|
||||
trace_writeback_lazytime(inode);
|
||||
}
|
||||
inode->i_state &= ~dirty;
|
||||
|
||||
/*
|
||||
@@ -1428,8 +1432,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
if (dirty & I_DIRTY_TIME)
|
||||
mark_inode_dirty_sync(inode);
|
||||
/* Don't write the inode if only I_DIRTY_PAGES was set */
|
||||
if (dirty & ~I_DIRTY_PAGES) {
|
||||
int err = write_inode(inode, wbc);
|
||||
|
||||
@@ -445,8 +445,6 @@ struct compat_kexec_segment;
|
||||
struct compat_mq_attr;
|
||||
struct compat_msgbuf;
|
||||
|
||||
extern void compat_exit_robust_list(struct task_struct *curr);
|
||||
|
||||
#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
|
||||
|
||||
#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
#ifndef _LINUX_FUTEX_H
|
||||
#define _LINUX_FUTEX_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ktime.h>
|
||||
|
||||
#include <uapi/linux/futex.h>
|
||||
|
||||
struct inode;
|
||||
@@ -51,15 +53,35 @@ union futex_key {
|
||||
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
|
||||
|
||||
#ifdef CONFIG_FUTEX
|
||||
extern void exit_robust_list(struct task_struct *curr);
|
||||
enum {
|
||||
FUTEX_STATE_OK,
|
||||
FUTEX_STATE_EXITING,
|
||||
FUTEX_STATE_DEAD,
|
||||
};
|
||||
|
||||
static inline void futex_init_task(struct task_struct *tsk)
|
||||
{
|
||||
tsk->robust_list = NULL;
|
||||
#ifdef CONFIG_COMPAT
|
||||
tsk->compat_robust_list = NULL;
|
||||
#endif
|
||||
INIT_LIST_HEAD(&tsk->pi_state_list);
|
||||
tsk->pi_state_cache = NULL;
|
||||
tsk->futex_state = FUTEX_STATE_OK;
|
||||
mutex_init(&tsk->futex_exit_mutex);
|
||||
}
|
||||
|
||||
void futex_exit_recursive(struct task_struct *tsk);
|
||||
void futex_exit_release(struct task_struct *tsk);
|
||||
void futex_exec_release(struct task_struct *tsk);
|
||||
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
u32 __user *uaddr2, u32 val2, u32 val3);
|
||||
#else
|
||||
static inline void exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void futex_init_task(struct task_struct *tsk) { }
|
||||
static inline void futex_exit_recursive(struct task_struct *tsk) { }
|
||||
static inline void futex_exit_release(struct task_struct *tsk) { }
|
||||
static inline void futex_exec_release(struct task_struct *tsk) { }
|
||||
static inline long do_futex(u32 __user *uaddr, int op, u32 val,
|
||||
ktime_t *timeout, u32 __user *uaddr2,
|
||||
u32 val2, u32 val3)
|
||||
@@ -68,12 +90,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FUTEX_PI
|
||||
extern void exit_pi_state_list(struct task_struct *curr);
|
||||
#else
|
||||
static inline void exit_pi_state_list(struct task_struct *curr)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1080,6 +1080,8 @@ struct task_struct {
|
||||
#endif
|
||||
struct list_head pi_state_list;
|
||||
struct futex_pi_state *pi_state_cache;
|
||||
struct mutex futex_exit_mutex;
|
||||
unsigned int futex_state;
|
||||
#endif
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
|
||||
@@ -1479,7 +1481,6 @@ extern struct pid *cad_pid;
|
||||
*/
|
||||
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
|
||||
#define PF_EXITING 0x00000004 /* Getting shut down */
|
||||
#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
|
||||
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
|
||||
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
|
||||
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
|
||||
|
||||
@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
|
||||
* succeeds.
|
||||
*/
|
||||
extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
|
||||
/* Remove the current tasks stale references to the old mm_struct */
|
||||
extern void mm_release(struct task_struct *, struct mm_struct *);
|
||||
/* Remove the current tasks stale references to the old mm_struct on exit() */
|
||||
extern void exit_mm_release(struct task_struct *, struct mm_struct *);
|
||||
/* Remove the current tasks stale references to the old mm_struct on exec() */
|
||||
extern void exec_mm_release(struct task_struct *, struct mm_struct *);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
extern void mm_update_next_owner(struct mm_struct *mm);
|
||||
|
||||
@@ -498,7 +498,7 @@ static void exit_mm(void)
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct core_state *core_state;
|
||||
|
||||
mm_release(current, mm);
|
||||
exit_mm_release(current, mm);
|
||||
if (!mm)
|
||||
return;
|
||||
sync_mm_rss(mm);
|
||||
@@ -819,32 +819,12 @@ void __noreturn do_exit(long code)
|
||||
*/
|
||||
if (unlikely(tsk->flags & PF_EXITING)) {
|
||||
pr_alert("Fixing recursive fault but reboot is needed!\n");
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses
|
||||
* this flag just to verify whether the pi state
|
||||
* cleanup has been done or not. In the worst case it
|
||||
* loops once more. We pretend that the cleanup was
|
||||
* done as there is no way to return. Either the
|
||||
* OWNER_DIED bit is set by now or we push the blocked
|
||||
* task into the wait for ever nirwana as well.
|
||||
*/
|
||||
tsk->flags |= PF_EXITPIDONE;
|
||||
futex_exit_recursive(tsk);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
/*
|
||||
* Ensure that all new tsk->pi_lock acquisitions must observe
|
||||
* PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
|
||||
*/
|
||||
smp_mb();
|
||||
/*
|
||||
* Ensure that we must observe the pi_state in exit_mm() ->
|
||||
* mm_release() -> exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
|
||||
/* sync mm's RSS info before statistics gathering */
|
||||
if (tsk->mm)
|
||||
@@ -919,12 +899,6 @@ void __noreturn do_exit(long code)
|
||||
* Make sure we are holding no locks:
|
||||
*/
|
||||
debug_check_no_locks_held();
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses this flag
|
||||
* just to verify whether the pi state cleanup has been done
|
||||
* or not. In the worst case it loops once more.
|
||||
*/
|
||||
tsk->flags |= PF_EXITPIDONE;
|
||||
|
||||
if (tsk->io_context)
|
||||
exit_io_context(tsk);
|
||||
|
||||
@@ -1232,24 +1232,8 @@ static int wait_for_vfork_done(struct task_struct *child,
|
||||
* restoring the old one. . .
|
||||
* Eric Biederman 10 January 1998
|
||||
*/
|
||||
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
/* Get rid of any futexes when releasing the mm */
|
||||
#ifdef CONFIG_FUTEX
|
||||
if (unlikely(tsk->robust_list)) {
|
||||
exit_robust_list(tsk);
|
||||
tsk->robust_list = NULL;
|
||||
}
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(tsk->compat_robust_list)) {
|
||||
compat_exit_robust_list(tsk);
|
||||
tsk->compat_robust_list = NULL;
|
||||
}
|
||||
#endif
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
#endif
|
||||
|
||||
uprobe_free_utask(tsk);
|
||||
|
||||
/* Get rid of any cached register state */
|
||||
@@ -1282,6 +1266,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
complete_vfork_done(tsk);
|
||||
}
|
||||
|
||||
void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
futex_exit_release(tsk);
|
||||
mm_release(tsk, mm);
|
||||
}
|
||||
|
||||
void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
futex_exec_release(tsk);
|
||||
mm_release(tsk, mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new mm structure and copy contents from the
|
||||
* mm structure of the passed in task structure.
|
||||
@@ -2066,14 +2062,8 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
#ifdef CONFIG_BLOCK
|
||||
p->plug = NULL;
|
||||
#endif
|
||||
#ifdef CONFIG_FUTEX
|
||||
p->robust_list = NULL;
|
||||
#ifdef CONFIG_COMPAT
|
||||
p->compat_robust_list = NULL;
|
||||
#endif
|
||||
INIT_LIST_HEAD(&p->pi_state_list);
|
||||
p->pi_state_cache = NULL;
|
||||
#endif
|
||||
futex_init_task(p);
|
||||
|
||||
/*
|
||||
* sigaltstack should be cleared when sharing the same VM
|
||||
*/
|
||||
|
||||
485
kernel/futex.c
485
kernel/futex.c
@@ -341,6 +341,12 @@ static inline bool should_fail_futex(bool fshared)
|
||||
}
|
||||
#endif /* CONFIG_FAIL_FUTEX */
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static void compat_exit_robust_list(struct task_struct *curr);
|
||||
#else
|
||||
static inline void compat_exit_robust_list(struct task_struct *curr) { }
|
||||
#endif
|
||||
|
||||
static inline void futex_get_mm(union futex_key *key)
|
||||
{
|
||||
mmgrab(key->private.mm);
|
||||
@@ -833,6 +839,29 @@ static struct futex_pi_state *alloc_pi_state(void)
|
||||
return pi_state;
|
||||
}
|
||||
|
||||
static void pi_state_update_owner(struct futex_pi_state *pi_state,
|
||||
struct task_struct *new_owner)
|
||||
{
|
||||
struct task_struct *old_owner = pi_state->owner;
|
||||
|
||||
lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
if (old_owner) {
|
||||
raw_spin_lock(&old_owner->pi_lock);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&old_owner->pi_lock);
|
||||
}
|
||||
|
||||
if (new_owner) {
|
||||
raw_spin_lock(&new_owner->pi_lock);
|
||||
WARN_ON(!list_empty(&pi_state->list));
|
||||
list_add(&pi_state->list, &new_owner->pi_state_list);
|
||||
pi_state->owner = new_owner;
|
||||
raw_spin_unlock(&new_owner->pi_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void get_pi_state(struct futex_pi_state *pi_state)
|
||||
{
|
||||
WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
|
||||
@@ -855,17 +884,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
||||
* and has cleaned up the pi_state already
|
||||
*/
|
||||
if (pi_state->owner) {
|
||||
struct task_struct *owner;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
|
||||
owner = pi_state->owner;
|
||||
if (owner) {
|
||||
raw_spin_lock(&owner->pi_lock);
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&owner->pi_lock);
|
||||
}
|
||||
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
|
||||
pi_state_update_owner(pi_state, NULL);
|
||||
rt_mutex_proxy_unlock(&pi_state->pi_mutex);
|
||||
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
|
||||
}
|
||||
|
||||
@@ -890,7 +913,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
||||
* Kernel cleans up PI-state, but userspace is likely hosed.
|
||||
* (Robust-futex cleanup is separate and might save the day for userspace.)
|
||||
*/
|
||||
void exit_pi_state_list(struct task_struct *curr)
|
||||
static void exit_pi_state_list(struct task_struct *curr)
|
||||
{
|
||||
struct list_head *next, *head = &curr->pi_state_list;
|
||||
struct futex_pi_state *pi_state;
|
||||
@@ -960,7 +983,8 @@ void exit_pi_state_list(struct task_struct *curr)
|
||||
}
|
||||
raw_spin_unlock_irq(&curr->pi_lock);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void exit_pi_state_list(struct task_struct *curr) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -1010,7 +1034,8 @@ void exit_pi_state_list(struct task_struct *curr)
|
||||
* FUTEX_OWNER_DIED bit. See [4]
|
||||
*
|
||||
* [10] There is no transient state which leaves owner and user space
|
||||
* TID out of sync.
|
||||
* TID out of sync. Except one error case where the kernel is denied
|
||||
* write access to the user address, see fixup_pi_state_owner().
|
||||
*
|
||||
*
|
||||
* Serialization and lifetime rules:
|
||||
@@ -1169,16 +1194,47 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_owner_exiting - Block until the owner has exited
|
||||
* @exiting: Pointer to the exiting task
|
||||
*
|
||||
* Caller must hold a refcount on @exiting.
|
||||
*/
|
||||
static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
|
||||
{
|
||||
if (ret != -EBUSY) {
|
||||
WARN_ON_ONCE(exiting);
|
||||
return;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
|
||||
return;
|
||||
|
||||
mutex_lock(&exiting->futex_exit_mutex);
|
||||
/*
|
||||
* No point in doing state checking here. If the waiter got here
|
||||
* while the task was in exec()->exec_futex_release() then it can
|
||||
* have any FUTEX_STATE_* value when the waiter has acquired the
|
||||
* mutex. OK, if running, EXITING or DEAD if it reached exit()
|
||||
* already. Highly unlikely and not a problem. Just one more round
|
||||
* through the futex maze.
|
||||
*/
|
||||
mutex_unlock(&exiting->futex_exit_mutex);
|
||||
|
||||
put_task_struct(exiting);
|
||||
}
|
||||
|
||||
static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
u32 uval2;
|
||||
|
||||
/*
|
||||
* If PF_EXITPIDONE is not yet set, then try again.
|
||||
* If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
|
||||
* caller that the alleged owner is busy.
|
||||
*/
|
||||
if (tsk && !(tsk->flags & PF_EXITPIDONE))
|
||||
return -EAGAIN;
|
||||
if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Reread the user space value to handle the following situation:
|
||||
@@ -1196,8 +1252,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
* *uaddr = 0xC0000000; tsk = get_task(PID);
|
||||
* } if (!tsk->flags & PF_EXITING) {
|
||||
* ... attach();
|
||||
* tsk->flags |= PF_EXITPIDONE; } else {
|
||||
* if (!(tsk->flags & PF_EXITPIDONE))
|
||||
* tsk->futex_state = } else {
|
||||
* FUTEX_STATE_DEAD; if (tsk->futex_state !=
|
||||
* FUTEX_STATE_DEAD)
|
||||
* return -EAGAIN;
|
||||
* return -ESRCH; <--- FAIL
|
||||
* }
|
||||
@@ -1228,7 +1285,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
* it after doing proper sanity checks.
|
||||
*/
|
||||
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
struct futex_pi_state **ps)
|
||||
struct futex_pi_state **ps,
|
||||
struct task_struct **exiting)
|
||||
{
|
||||
pid_t pid = uval & FUTEX_TID_MASK;
|
||||
struct futex_pi_state *pi_state;
|
||||
@@ -1253,22 +1311,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to look at the task state flags to figure out,
|
||||
* whether the task is exiting. To protect against the do_exit
|
||||
* change of the task flags, we do this protected by
|
||||
* p->pi_lock:
|
||||
* We need to look at the task state to figure out, whether the
|
||||
* task is exiting. To protect against the change of the task state
|
||||
* in futex_exit_release(), we do this protected by p->pi_lock:
|
||||
*/
|
||||
raw_spin_lock_irq(&p->pi_lock);
|
||||
if (unlikely(p->flags & PF_EXITING)) {
|
||||
if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
|
||||
/*
|
||||
* The task is on the way out. When PF_EXITPIDONE is
|
||||
* set, we know that the task has finished the
|
||||
* cleanup:
|
||||
* The task is on the way out. When the futex state is
|
||||
* FUTEX_STATE_DEAD, we know that the task has finished
|
||||
* the cleanup:
|
||||
*/
|
||||
int ret = handle_exit_race(uaddr, uval, p);
|
||||
|
||||
raw_spin_unlock_irq(&p->pi_lock);
|
||||
put_task_struct(p);
|
||||
/*
|
||||
* If the owner task is between FUTEX_STATE_EXITING and
|
||||
* FUTEX_STATE_DEAD then store the task pointer and keep
|
||||
* the reference on the task struct. The calling code will
|
||||
* drop all locks, wait for the task to reach
|
||||
* FUTEX_STATE_DEAD and then drop the refcount. This is
|
||||
* required to prevent a live lock when the current task
|
||||
* preempted the exiting task between the two states.
|
||||
*/
|
||||
if (ret == -EBUSY)
|
||||
*exiting = p;
|
||||
else
|
||||
put_task_struct(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1307,7 +1376,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
|
||||
static int lookup_pi_state(u32 __user *uaddr, u32 uval,
|
||||
struct futex_hash_bucket *hb,
|
||||
union futex_key *key, struct futex_pi_state **ps)
|
||||
union futex_key *key, struct futex_pi_state **ps,
|
||||
struct task_struct **exiting)
|
||||
{
|
||||
struct futex_q *top_waiter = futex_top_waiter(hb, key);
|
||||
|
||||
@@ -1322,7 +1392,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
|
||||
* We are the first waiter - try to look up the owner based on
|
||||
* @uval and attach to it.
|
||||
*/
|
||||
return attach_to_pi_owner(uaddr, uval, key, ps);
|
||||
return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
|
||||
}
|
||||
|
||||
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
@@ -1350,6 +1420,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
* lookup
|
||||
* @task: the task to perform the atomic lock work for. This will
|
||||
* be "current" except in the case of requeue pi.
|
||||
* @exiting: Pointer to store the task pointer of the owner task
|
||||
* which is in the middle of exiting
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Return:
|
||||
@@ -1358,11 +1430,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
* - <0 - error
|
||||
*
|
||||
* The hb->lock and futex_key refs shall be held by the caller.
|
||||
*
|
||||
* @exiting is only set when the return value is -EBUSY. If so, this holds
|
||||
* a refcount on the exiting task on return and the caller needs to drop it
|
||||
* after waiting for the exit to complete.
|
||||
*/
|
||||
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
||||
union futex_key *key,
|
||||
struct futex_pi_state **ps,
|
||||
struct task_struct *task, int set_waiters)
|
||||
struct task_struct *task,
|
||||
struct task_struct **exiting,
|
||||
int set_waiters)
|
||||
{
|
||||
u32 uval, newval, vpid = task_pid_vnr(task);
|
||||
struct futex_q *top_waiter;
|
||||
@@ -1432,7 +1510,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
||||
* attach to the owner. If that fails, no harm done, we only
|
||||
* set the FUTEX_WAITERS bit in the user space variable.
|
||||
*/
|
||||
return attach_to_pi_owner(uaddr, newval, key, ps);
|
||||
return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1537,26 +1615,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* This is a point of no return; once we modify the uval there is no
|
||||
* going back and subsequent operations must not fail.
|
||||
*/
|
||||
|
||||
raw_spin_lock(&pi_state->owner->pi_lock);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&pi_state->owner->pi_lock);
|
||||
|
||||
raw_spin_lock(&new_owner->pi_lock);
|
||||
WARN_ON(!list_empty(&pi_state->list));
|
||||
list_add(&pi_state->list, &new_owner->pi_state_list);
|
||||
pi_state->owner = new_owner;
|
||||
raw_spin_unlock(&new_owner->pi_lock);
|
||||
|
||||
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
|
||||
if (!ret) {
|
||||
/*
|
||||
* This is a point of no return; once we modified the uval
|
||||
* there is no going back and subsequent operations must
|
||||
* not fail.
|
||||
*/
|
||||
pi_state_update_owner(pi_state, new_owner);
|
||||
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
@@ -1853,6 +1920,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
* @key1: the from futex key
|
||||
* @key2: the to futex key
|
||||
* @ps: address to store the pi_state pointer
|
||||
* @exiting: Pointer to store the task pointer of the owner task
|
||||
* which is in the middle of exiting
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Try and get the lock on behalf of the top waiter if we can do it atomically.
|
||||
@@ -1860,16 +1929,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
|
||||
* hb1 and hb2 must be held by the caller.
|
||||
*
|
||||
* @exiting is only set when the return value is -EBUSY. If so, this holds
|
||||
* a refcount on the exiting task on return and the caller needs to drop it
|
||||
* after waiting for the exit to complete.
|
||||
*
|
||||
* Return:
|
||||
* - 0 - failed to acquire the lock atomically;
|
||||
* - >0 - acquired the lock, return value is vpid of the top_waiter
|
||||
* - <0 - error
|
||||
*/
|
||||
static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
struct futex_hash_bucket *hb1,
|
||||
struct futex_hash_bucket *hb2,
|
||||
union futex_key *key1, union futex_key *key2,
|
||||
struct futex_pi_state **ps, int set_waiters)
|
||||
static int
|
||||
futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
|
||||
struct futex_hash_bucket *hb2, union futex_key *key1,
|
||||
union futex_key *key2, struct futex_pi_state **ps,
|
||||
struct task_struct **exiting, int set_waiters)
|
||||
{
|
||||
struct futex_q *top_waiter = NULL;
|
||||
u32 curval;
|
||||
@@ -1906,7 +1979,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
*/
|
||||
vpid = task_pid_vnr(top_waiter->task);
|
||||
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
|
||||
set_waiters);
|
||||
exiting, set_waiters);
|
||||
if (ret == 1) {
|
||||
requeue_pi_wake_futex(top_waiter, key2, hb2);
|
||||
return vpid;
|
||||
@@ -2035,6 +2108,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
||||
}
|
||||
|
||||
if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
|
||||
struct task_struct *exiting = NULL;
|
||||
|
||||
/*
|
||||
* Attempt to acquire uaddr2 and wake the top waiter. If we
|
||||
* intend to requeue waiters, force setting the FUTEX_WAITERS
|
||||
@@ -2042,7 +2117,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
||||
* faults rather in the requeue loop below.
|
||||
*/
|
||||
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
|
||||
&key2, &pi_state, nr_requeue);
|
||||
&key2, &pi_state,
|
||||
&exiting, nr_requeue);
|
||||
|
||||
/*
|
||||
* At this point the top_waiter has either taken uaddr2 or is
|
||||
@@ -2069,7 +2145,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
||||
* If that call succeeds then we have pi_state and an
|
||||
* initial refcount on it.
|
||||
*/
|
||||
ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
|
||||
ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
|
||||
&pi_state, &exiting);
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
@@ -2087,17 +2164,24 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
||||
if (!ret)
|
||||
goto retry;
|
||||
goto out;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - Owner is exiting and we just wait for the
|
||||
* - EBUSY: Owner is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - The user space value changed.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
double_unlock_hb(hb1, hb2);
|
||||
hb_waiters_dec(hb2);
|
||||
put_futex_key(&key2);
|
||||
put_futex_key(&key1);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
@@ -2362,18 +2446,13 @@ static void unqueue_me_pi(struct futex_q *q)
|
||||
spin_unlock(q->lock_ptr);
|
||||
}
|
||||
|
||||
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
struct task_struct *argowner)
|
||||
static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
struct task_struct *argowner)
|
||||
{
|
||||
u32 uval, uninitialized_var(curval), newval, newtid;
|
||||
struct futex_pi_state *pi_state = q->pi_state;
|
||||
u32 uval, uninitialized_var(curval), newval;
|
||||
struct task_struct *oldowner, *newowner;
|
||||
u32 newtid;
|
||||
int ret, err = 0;
|
||||
|
||||
lockdep_assert_held(q->lock_ptr);
|
||||
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
int err = 0;
|
||||
|
||||
oldowner = pi_state->owner;
|
||||
|
||||
@@ -2407,14 +2486,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
* We raced against a concurrent self; things are
|
||||
* already fixed up. Nothing to do.
|
||||
*/
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
|
||||
/* We got the lock after all, nothing to fix. */
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
/* We got the lock. pi_state is correct. Tell caller. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2441,8 +2518,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
* We raced against a concurrent self; things are
|
||||
* already fixed up. Nothing to do.
|
||||
*/
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
return 1;
|
||||
}
|
||||
newowner = argowner;
|
||||
}
|
||||
@@ -2472,22 +2548,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
* We fixed up user space. Now we need to fix the pi_state
|
||||
* itself.
|
||||
*/
|
||||
if (pi_state->owner != NULL) {
|
||||
raw_spin_lock(&pi_state->owner->pi_lock);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&pi_state->owner->pi_lock);
|
||||
}
|
||||
pi_state_update_owner(pi_state, newowner);
|
||||
|
||||
pi_state->owner = newowner;
|
||||
|
||||
raw_spin_lock(&newowner->pi_lock);
|
||||
WARN_ON(!list_empty(&pi_state->list));
|
||||
list_add(&pi_state->list, &newowner->pi_state_list);
|
||||
raw_spin_unlock(&newowner->pi_lock);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
return 0;
|
||||
return argowner == current;
|
||||
|
||||
/*
|
||||
* In order to reschedule or handle a page fault, we need to drop the
|
||||
@@ -2508,17 +2571,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
|
||||
switch (err) {
|
||||
case -EFAULT:
|
||||
ret = fault_in_user_writeable(uaddr);
|
||||
err = fault_in_user_writeable(uaddr);
|
||||
break;
|
||||
|
||||
case -EAGAIN:
|
||||
cond_resched();
|
||||
ret = 0;
|
||||
err = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
ret = err;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2528,17 +2590,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
/*
|
||||
* Check if someone else fixed it for us:
|
||||
*/
|
||||
if (pi_state->owner != oldowner) {
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (pi_state->owner != oldowner)
|
||||
return argowner == current;
|
||||
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
/* Retry if err was -EAGAIN or the fault in succeeded */
|
||||
if (!err)
|
||||
goto retry;
|
||||
|
||||
goto retry;
|
||||
/*
|
||||
* fault_in_user_writeable() failed so user state is immutable. At
|
||||
* best we can make the kernel state consistent but user state will
|
||||
* be most likely hosed and any subsequent unlock operation will be
|
||||
* rejected due to PI futex rule [10].
|
||||
*
|
||||
* Ensure that the rtmutex owner is also the pi_state owner despite
|
||||
* the user space value claiming something different. There is no
|
||||
* point in unlocking the rtmutex if current is the owner as it
|
||||
* would need to wait until the next waiter has taken the rtmutex
|
||||
* to guarantee consistent state. Keep it simple. Userspace asked
|
||||
* for this wreckaged state.
|
||||
*
|
||||
* The rtmutex has an owner - either current or some other
|
||||
* task. See the EAGAIN loop above.
|
||||
*/
|
||||
pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
|
||||
|
||||
out_unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
struct task_struct *argowner)
|
||||
{
|
||||
struct futex_pi_state *pi_state = q->pi_state;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(q->lock_ptr);
|
||||
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
ret = __fixup_pi_state_owner(uaddr, q, argowner);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
return ret;
|
||||
}
|
||||
@@ -2562,8 +2651,6 @@ static long futex_wait_restart(struct restart_block *restart);
|
||||
*/
|
||||
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (locked) {
|
||||
/*
|
||||
* Got the lock. We might not be the anticipated owner if we
|
||||
@@ -2574,8 +2661,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||||
* stable state, anything else needs more attention.
|
||||
*/
|
||||
if (q->pi_state->owner != current)
|
||||
ret = fixup_pi_state_owner(uaddr, q, current);
|
||||
goto out;
|
||||
return fixup_pi_state_owner(uaddr, q, current);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2586,24 +2673,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||||
* Another speculative read; pi_state->owner == current is unstable
|
||||
* but needs our attention.
|
||||
*/
|
||||
if (q->pi_state->owner == current) {
|
||||
ret = fixup_pi_state_owner(uaddr, q, NULL);
|
||||
goto out;
|
||||
}
|
||||
if (q->pi_state->owner == current)
|
||||
return fixup_pi_state_owner(uaddr, q, NULL);
|
||||
|
||||
/*
|
||||
* Paranoia check. If we did not take the lock, then we should not be
|
||||
* the owner of the rt_mutex.
|
||||
* the owner of the rt_mutex. Warn and establish consistent state.
|
||||
*/
|
||||
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
|
||||
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
|
||||
"pi-state %p\n", ret,
|
||||
q->pi_state->pi_mutex.owner,
|
||||
q->pi_state->owner);
|
||||
}
|
||||
if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
|
||||
return fixup_pi_state_owner(uaddr, q, current);
|
||||
|
||||
out:
|
||||
return ret ? ret : locked;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2824,7 +2904,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
ktime_t *time, int trylock)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to = NULL;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct task_struct *exiting = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q = futex_q_init;
|
||||
@@ -2852,7 +2932,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
retry_private:
|
||||
hb = queue_lock(&q);
|
||||
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
|
||||
&exiting, 0);
|
||||
if (unlikely(ret)) {
|
||||
/*
|
||||
* Atomic work succeeded and we got the lock,
|
||||
@@ -2865,15 +2946,22 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
goto out_unlock_put_key;
|
||||
case -EFAULT:
|
||||
goto uaddr_faulted;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - Task is exiting and we just wait for the
|
||||
* - EBUSY: Task is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - The user space value changed.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
queue_unlock(hb);
|
||||
put_futex_key(&q.key);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
@@ -2958,23 +3046,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
if (res)
|
||||
ret = (res < 0) ? res : 0;
|
||||
|
||||
/*
|
||||
* If fixup_owner() faulted and was unable to handle the fault, unlock
|
||||
* it and return the fault to userspace.
|
||||
*/
|
||||
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
|
||||
pi_state = q.pi_state;
|
||||
get_pi_state(pi_state);
|
||||
}
|
||||
|
||||
/* Unqueue and drop the lock */
|
||||
unqueue_me_pi(&q);
|
||||
|
||||
if (pi_state) {
|
||||
rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
put_pi_state(pi_state);
|
||||
}
|
||||
|
||||
goto out_put_key;
|
||||
|
||||
out_unlock_put_key:
|
||||
@@ -3240,7 +3314,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
u32 __user *uaddr2)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to = NULL;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
union futex_key key2 = FUTEX_KEY_INIT;
|
||||
@@ -3325,16 +3398,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
if (q.pi_state && (q.pi_state->owner != current)) {
|
||||
spin_lock(q.lock_ptr);
|
||||
ret = fixup_pi_state_owner(uaddr2, &q, current);
|
||||
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
|
||||
pi_state = q.pi_state;
|
||||
get_pi_state(pi_state);
|
||||
}
|
||||
/*
|
||||
* Drop the reference to the pi state which
|
||||
* the requeue_pi() code acquired for us.
|
||||
*/
|
||||
put_pi_state(q.pi_state);
|
||||
spin_unlock(q.lock_ptr);
|
||||
/*
|
||||
* Adjust the return value. It's either -EFAULT or
|
||||
* success (1) but the caller expects 0 for success.
|
||||
*/
|
||||
ret = ret < 0 ? ret : 0;
|
||||
}
|
||||
} else {
|
||||
struct rt_mutex *pi_mutex;
|
||||
@@ -3365,25 +3439,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
if (res)
|
||||
ret = (res < 0) ? res : 0;
|
||||
|
||||
/*
|
||||
* If fixup_pi_state_owner() faulted and was unable to handle
|
||||
* the fault, unlock the rt_mutex and return the fault to
|
||||
* userspace.
|
||||
*/
|
||||
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
|
||||
pi_state = q.pi_state;
|
||||
get_pi_state(pi_state);
|
||||
}
|
||||
|
||||
/* Unqueue and drop the lock. */
|
||||
unqueue_me_pi(&q);
|
||||
}
|
||||
|
||||
if (pi_state) {
|
||||
rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
put_pi_state(pi_state);
|
||||
}
|
||||
|
||||
if (ret == -EINTR) {
|
||||
/*
|
||||
* We've already been requeued, but cannot restart by calling
|
||||
@@ -3625,7 +3684,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
|
||||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void exit_robust_list(struct task_struct *curr)
|
||||
static void exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct robust_list_head __user *head = curr->robust_list;
|
||||
struct robust_list __user *entry, *next_entry, *pending;
|
||||
@@ -3690,6 +3749,114 @@ void exit_robust_list(struct task_struct *curr)
|
||||
}
|
||||
}
|
||||
|
||||
static void futex_cleanup(struct task_struct *tsk)
|
||||
{
|
||||
if (unlikely(tsk->robust_list)) {
|
||||
exit_robust_list(tsk);
|
||||
tsk->robust_list = NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(tsk->compat_robust_list)) {
|
||||
compat_exit_robust_list(tsk);
|
||||
tsk->compat_robust_list = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
}
|
||||
|
||||
/**
|
||||
* futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
|
||||
* @tsk: task to set the state on
|
||||
*
|
||||
* Set the futex exit state of the task lockless. The futex waiter code
|
||||
* observes that state when a task is exiting and loops until the task has
|
||||
* actually finished the futex cleanup. The worst case for this is that the
|
||||
* waiter runs through the wait loop until the state becomes visible.
|
||||
*
|
||||
* This is called from the recursive fault handling path in do_exit().
|
||||
*
|
||||
* This is best effort. Either the futex exit code has run already or
|
||||
* not. If the OWNER_DIED bit has been set on the futex then the waiter can
|
||||
* take it over. If not, the problem is pushed back to user space. If the
|
||||
* futex exit code did not run yet, then an already queued waiter might
|
||||
* block forever, but there is nothing which can be done about that.
|
||||
*/
|
||||
void futex_exit_recursive(struct task_struct *tsk)
|
||||
{
|
||||
/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
|
||||
if (tsk->futex_state == FUTEX_STATE_EXITING)
|
||||
mutex_unlock(&tsk->futex_exit_mutex);
|
||||
tsk->futex_state = FUTEX_STATE_DEAD;
|
||||
}
|
||||
|
||||
static void futex_cleanup_begin(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Prevent various race issues against a concurrent incoming waiter
|
||||
* including live locks by forcing the waiter to block on
|
||||
* tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
|
||||
* attach_to_pi_owner().
|
||||
*/
|
||||
mutex_lock(&tsk->futex_exit_mutex);
|
||||
|
||||
/*
|
||||
* Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
|
||||
*
|
||||
* This ensures that all subsequent checks of tsk->futex_state in
|
||||
* attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
|
||||
* tsk->pi_lock held.
|
||||
*
|
||||
* It guarantees also that a pi_state which was queued right before
|
||||
* the state change under tsk->pi_lock by a concurrent waiter must
|
||||
* be observed in exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
tsk->futex_state = FUTEX_STATE_EXITING;
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
}
|
||||
|
||||
static void futex_cleanup_end(struct task_struct *tsk, int state)
|
||||
{
|
||||
/*
|
||||
* Lockless store. The only side effect is that an observer might
|
||||
* take another loop until it becomes visible.
|
||||
*/
|
||||
tsk->futex_state = state;
|
||||
/*
|
||||
* Drop the exit protection. This unblocks waiters which observed
|
||||
* FUTEX_STATE_EXITING to reevaluate the state.
|
||||
*/
|
||||
mutex_unlock(&tsk->futex_exit_mutex);
|
||||
}
|
||||
|
||||
void futex_exec_release(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* The state handling is done for consistency, but in the case of
|
||||
* exec() there is no way to prevent futher damage as the PID stays
|
||||
* the same. But for the unlikely and arguably buggy case that a
|
||||
* futex is held on exec(), this provides at least as much state
|
||||
* consistency protection which is possible.
|
||||
*/
|
||||
futex_cleanup_begin(tsk);
|
||||
futex_cleanup(tsk);
|
||||
/*
|
||||
* Reset the state to FUTEX_STATE_OK. The task is alive and about
|
||||
* exec a new binary.
|
||||
*/
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_OK);
|
||||
}
|
||||
|
||||
void futex_exit_release(struct task_struct *tsk)
|
||||
{
|
||||
futex_cleanup_begin(tsk);
|
||||
futex_cleanup(tsk);
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
|
||||
}
|
||||
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
u32 __user *uaddr2, u32 val2, u32 val3)
|
||||
{
|
||||
@@ -3817,7 +3984,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
|
||||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void compat_exit_robust_list(struct task_struct *curr)
|
||||
static void compat_exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct compat_robust_list_head __user *head = curr->compat_robust_list;
|
||||
struct robust_list __user *entry, *next_entry, *pending;
|
||||
|
||||
@@ -1719,8 +1719,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||
* possible because it belongs to the pi_state which is about to be freed
|
||||
* and it is not longer visible to other tasks.
|
||||
*/
|
||||
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner)
|
||||
void rt_mutex_proxy_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
debug_rt_mutex_proxy_unlock(lock);
|
||||
rt_mutex_set_owner(lock, NULL);
|
||||
|
||||
@@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
|
||||
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
|
||||
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
||||
struct rt_mutex_waiter *waiter,
|
||||
|
||||
@@ -4393,6 +4393,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
|
||||
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
||||
return;
|
||||
/* prevent another thread from changing buffer sizes */
|
||||
mutex_lock(&buffer->mutex);
|
||||
|
||||
atomic_inc(&buffer->resize_disabled);
|
||||
atomic_inc(&cpu_buffer->record_disabled);
|
||||
@@ -4416,6 +4418,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
|
||||
|
||||
atomic_dec(&cpu_buffer->record_disabled);
|
||||
atomic_dec(&buffer->resize_disabled);
|
||||
|
||||
mutex_unlock(&buffer->mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
|
||||
|
||||
|
||||
@@ -5815,10 +5815,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
|
||||
|
||||
s->kobj.kset = kset;
|
||||
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
|
||||
if (err) {
|
||||
kobject_put(&s->kobj);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = sysfs_create_group(&s->kobj, &slab_attr_group);
|
||||
if (err)
|
||||
|
||||
@@ -15,10 +15,6 @@ endef
|
||||
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
|
||||
$(call allow-override,LD,$(CROSS_COMPILE)ld)
|
||||
|
||||
HOSTCC ?= gcc
|
||||
HOSTLD ?= ld
|
||||
HOSTAR ?= ar
|
||||
|
||||
export HOSTCC HOSTLD HOSTAR
|
||||
|
||||
ifeq ($(V),1)
|
||||
|
||||
@@ -7,15 +7,6 @@ ARCH := x86
|
||||
endif
|
||||
|
||||
# always use the host compiler
|
||||
ifneq ($(LLVM),)
|
||||
HOSTAR ?= llvm-ar
|
||||
HOSTCC ?= clang
|
||||
HOSTLD ?= ld.lld
|
||||
else
|
||||
HOSTAR ?= ar
|
||||
HOSTCC ?= gcc
|
||||
HOSTLD ?= ld
|
||||
endif
|
||||
AR = $(HOSTAR)
|
||||
CC = $(HOSTCC)
|
||||
LD = $(HOSTLD)
|
||||
|
||||
@@ -148,10 +148,6 @@ endef
|
||||
|
||||
LD += $(EXTRA_LDFLAGS)
|
||||
|
||||
HOSTCC ?= gcc
|
||||
HOSTLD ?= ld
|
||||
HOSTAR ?= ar
|
||||
|
||||
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
|
||||
LLVM_CONFIG ?= llvm-config
|
||||
|
||||
|
||||
@@ -57,7 +57,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
|
||||
CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
|
||||
CROSS_COMPILE ?= $(CROSS)
|
||||
LD = $(CC)
|
||||
HOSTCC = gcc
|
||||
|
||||
# check if compiler option is supported
|
||||
cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
|
||||
|
||||
@@ -60,6 +60,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
|
||||
$(call allow-override,CXX,$(CROSS_COMPILE)g++)
|
||||
$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
|
||||
|
||||
ifneq ($(LLVM),)
|
||||
HOSTAR ?= llvm-ar
|
||||
HOSTCC ?= clang
|
||||
HOSTLD ?= ld.lld
|
||||
else
|
||||
HOSTAR ?= ar
|
||||
HOSTCC ?= gcc
|
||||
HOSTLD ?= ld
|
||||
endif
|
||||
|
||||
ifeq ($(CC_NO_CLANG), 1)
|
||||
EXTRA_WARNINGS += -Wstrict-aliasing=3
|
||||
endif
|
||||
|
||||
Reference in New Issue
Block a user