From a6067c6e630f99e9b994707b5659acdecb68a34d Mon Sep 17 00:00:00 2001 From: Ilya Trukhanov Date: Tue, 2 Jul 2019 13:37:16 +0300 Subject: [PATCH 001/101] HID: Add 044f:b320 ThrustMaster, Inc. 2 in 1 DT [ Upstream commit 65f11c72780fa9d598df88def045ccb6a885cf80 ] Enable force feedback for the Thrustmaster Dual Trigger 2 in 1 Rumble Force gamepad. Compared to other Thrustmaster devices, left and right rumble motors here are swapped. Signed-off-by: Ilya Trukhanov Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-tmff.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c index bea8def64f43..30b8c3256c99 100644 --- a/drivers/hid/hid-tmff.c +++ b/drivers/hid/hid-tmff.c @@ -34,6 +34,8 @@ #include "hid-ids.h" +#define THRUSTMASTER_DEVICE_ID_2_IN_1_DT 0xb320 + static const signed short ff_rumble[] = { FF_RUMBLE, -1 @@ -88,6 +90,7 @@ static int tmff_play(struct input_dev *dev, void *data, struct hid_field *ff_field = tmff->ff_field; int x, y; int left, right; /* Rumbling */ + int motor_swap; switch (effect->type) { case FF_CONSTANT: @@ -112,6 +115,13 @@ static int tmff_play(struct input_dev *dev, void *data, ff_field->logical_minimum, ff_field->logical_maximum); + /* 2-in-1 strong motor is left */ + if (hid->product == THRUSTMASTER_DEVICE_ID_2_IN_1_DT) { + motor_swap = left; + left = right; + right = motor_swap; + } + dbg_hid("(left,right)=(%08x, %08x)\n", left, right); ff_field->value[0] = left; ff_field->value[1] = right; @@ -238,6 +248,8 @@ static const struct hid_device_id tm_devices[] = { .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304), /* FireStorm Dual Power 2 (and 3) */ .driver_data = (unsigned long)ff_rumble }, + { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, THRUSTMASTER_DEVICE_ID_2_IN_1_DT), /* Dual Trigger 2-in-1 */ + .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323), /* Dual Trigger 3-in-1 (PC Mode) */ .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb324), /* Dual Trigger 3-in-1 (PS3 Mode) */ From 326175aa28fd29faf7b6554f4f370cae14155aae Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 13 May 2019 13:47:25 +0200 Subject: [PATCH 002/101] MIPS: kernel: only use i8253 clocksource with periodic clockevent [ Upstream commit a07e3324538a989b7cdbf2c679be6a7f9df2544f ] i8253 clocksource needs a free running timer. This could only be used, if i8253 clockevent is set up as periodic. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/kernel/i8253.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index 5f209f111e59..df7ddd246eaa 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -32,7 +32,8 @@ void __init setup_pit_timer(void) static int __init init_pit_clocksource(void) { - if (num_possible_cpus() > 1) /* PIT does not scale! */ + if (num_possible_cpus() > 1 || /* PIT does not scale! */ + !clockevent_state_periodic(&i8253_clockevent)) return 0; return clocksource_i8253_init(); From 70b4edd74b6dbd28e12486e5c679c7c128c07b54 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Tue, 16 Jul 2019 10:36:56 +0300 Subject: [PATCH 003/101] mips: fix cacheinfo [ Upstream commit b8bea8a5e5d942e62203416ab41edecaed4fda02 ] Because CONFIG_OF defined for MIPS, cacheinfo attempts to fill information from DT, ignoring data filled by architecture routine. This leads to error reported cacheinfo: Unable to detect cache hierarchy for CPU 0 Way to fix this provided in commit fac51482577d ("drivers: base: cacheinfo: fix x86 with CONFIG_OF enabled") Utilize same mechanism to report that cacheinfo set by architecture specific function Signed-off-by: Vladimir Kondratiev Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/mips/kernel/cacheinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 97d5239ca47b..428ef2189203 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -80,6 +80,8 @@ static int __populate_cache_leaves(unsigned int cpu) if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); + this_cpu_ci->cpu_map_populated = true; + return 0; } From 71305e8ee503f03561377ec6584d5b284973599f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 20 Jul 2019 07:22:45 -0500 Subject: [PATCH 004/101] netfilter: ebtables: fix a memory leak bug in compat [ Upstream commit 15a78ba1844a8e052c1226f930133de4cef4e7ad ] In compat_do_replace(), a temporary buffer is allocated through vmalloc() to hold entries copied from the user space. The buffer address is firstly saved to 'newinfo->entries', and later on assigned to 'entries_tmp'. Then the entries in this temporary buffer is copied to the internal kernel structure through compat_copy_entries(). If this copy process fails, compat_do_replace() should be terminated. However, the allocated temporary buffer is not freed on this path, leading to a memory leak. To fix the bug, free the buffer before returning from compat_do_replace(). Signed-off-by: Wenwen Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 995b3842ba7c..62ffc989a44a 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2274,8 +2274,10 @@ static int compat_do_replace(struct net *net, void __user *user, state.buf_kern_len = size64; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); - if (WARN_ON(ret < 0)) + if (WARN_ON(ret < 0)) { + vfree(entries_tmp); goto out_unlock; + } vfree(entries_tmp); tmp.entries_size = size64; From e6dc6409f47446ba632b8bd7415529e5986d99a2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 18 Jul 2019 09:43:33 +0100 Subject: [PATCH 005/101] ASoC: dapm: Fix handling of custom_stop_condition on DAPM graph walks [ Upstream commit 8dd26dff00c0636b1d8621acaeef3f6f3a39dd77 ] DPCM uses snd_soc_dapm_dai_get_connected_widgets to build a list of the widgets connected to a specific front end DAI so it can search through this list for available back end DAIs. The custom_stop_condition was added to is_connected_ep to facilitate this list not containing more widgets than is necessary. Doing so both speeds up the DPCM handling as less widgets need to be searched and avoids issues with CODEC to CODEC links as these would be confused with back end DAIs if they appeared in the list of available widgets. custom_stop_condition was implemented by aborting the graph walk when the condition is triggered, however there is an issue with this approach. Whilst walking the graph is_connected_ep should update the endpoints cache on each widget, if the walk is aborted the number of attached end points is unknown for that sub-graph. When the stop condition triggered, the original patch ignored the triggering widget and returned zero connected end points; a later patch updated this to set the triggering widget's cache to 1 and return that. Both of these approaches result in inaccurate values being stored in various end point caches as the values propagate back through the graph, which can result in later issues with widgets powering/not powering unexpectedly. As the original goal was to reduce the size of the widget list passed to the DPCM code, the simplest solution is to limit the functionality of the custom_stop_condition to the widget list. This means the rest of the graph will still be processed resulting in correct end point caches, but only widgets up to the stop condition will be added to the returned widget list. Fixes: 6742064aef7f ("ASoC: dapm: support user-defined stop condition in dai_get_connected_widgets") Fixes: 5fdd022c2026 ("ASoC: dpcm: play nice with CODEC<->CODEC links") Fixes: 09464974eaa8 ("ASoC: dapm: Fix to return correct path list in is_connected_ep.") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20190718084333.15598-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-dapm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 3bfc788372f3..4ce57510b623 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1145,8 +1145,8 @@ static __always_inline int is_connected_ep(struct snd_soc_dapm_widget *widget, list_add_tail(&widget->work_list, list); if (custom_stop_condition && custom_stop_condition(widget, dir)) { - widget->endpoints[dir] = 1; - return widget->endpoints[dir]; + list = NULL; + custom_stop_condition = NULL; } if ((widget->is_ep & SND_SOC_DAPM_DIR_TO_EP(dir)) && widget->connected) { @@ -1183,8 +1183,8 @@ static __always_inline int is_connected_ep(struct snd_soc_dapm_widget *widget, * * Optionally, can be supplied with a function acting as a stopping condition. * This function takes the dapm widget currently being examined and the walk - * direction as an arguments, it should return true if the walk should be - * stopped and false otherwise. + * direction as an arguments, it should return true if widgets from that point + * in the graph onwards should not be added to the widget list. */ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget, struct list_head *list, From b7038c195fd172b322ed1bf71d62ca3674407077 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 19 Jul 2019 11:06:11 +0200 Subject: [PATCH 006/101] selftests/bpf: fix sendmsg6_prog on s390 [ Upstream commit c8eee4135a456bc031d67cadc454e76880d1afd8 ] "sendmsg6: rewrite IP & port (C)" fails on s390, because the code in sendmsg_v6_prog() assumes that (ctx->user_ip6[0] & 0xFFFF) refers to leading IPv6 address digits, which is not the case on big-endian machines. Since checking bitwise operations doesn't seem to be the point of the test, replace two short comparisons with a single int comparison. Signed-off-by: Ilya Leoshkevich Acked-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/sendmsg6_prog.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c index 5aeaa284fc47..a68062820410 100644 --- a/tools/testing/selftests/bpf/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/sendmsg6_prog.c @@ -41,8 +41,7 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx) } /* Rewrite destination. */ - if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) && - ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) { + if (ctx->user_ip6[0] == bpf_htonl(0xFACEB00C)) { ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1); ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); From a1e5a76db8ddfd8dbd629af14c73e6b7de4955bd Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 16 Jul 2019 17:25:10 -0500 Subject: [PATCH 007/101] bonding: Force slave speed check after link state recovery for 802.3ad [ Upstream commit 12185dfe44360f814ac4ead9d22ad2af7511b2e9 ] The following scenario was encountered during testing of logical partition mobility on pseries partitions with bonded ibmvnic adapters in LACP mode. 1. Driver receives a signal that the device has been swapped, and it needs to reset to initialize the new device. 2. Driver reports loss of carrier and begins initialization. 3. Bonding driver receives NETDEV_CHANGE notifier and checks the slave's current speed and duplex settings. Because these are unknown at the time, the bond sets its link state to BOND_LINK_FAIL and handles the speed update, clearing AD_PORT_LACP_ENABLE. 4. Driver finishes recovery and reports that the carrier is on. 5. Bond receives a new notification and checks the speed again. The speeds are valid but miimon has not altered the link state yet. AD_PORT_LACP_ENABLE remains off. Because the slave's link state is still BOND_LINK_FAIL, no further port checks are made when it recovers. Though the slave devices are operational and have valid speed and duplex settings, the bond will not send LACPDU's. The simplest fix I can see is to force another speed check in bond_miimon_commit. This way the bond will update AD_PORT_LACP_ENABLE if needed when transitioning from BOND_LINK_FAIL to BOND_LINK_UP. CC: Jarod Wilson CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8f14f85b8e95..0d2392c4b625 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2190,6 +2190,15 @@ static void bond_miimon_commit(struct bonding *bond) bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { case BOND_LINK_NOCHANGE: + /* For 802.3ad mode, check current slave speed and + * duplex again in case its port was disabled after + * invalid speed/duplex reporting but recovered before + * link monitoring could make a decision on the actual + * link status + */ + if (BOND_MODE(bond) == BOND_MODE_8023AD && + slave->link == BOND_LINK_UP) + bond_3ad_adapter_speed_duplex_changed(slave); continue; case BOND_LINK_UP: From 192bbe9d57ed9dd1285796c5f0236bda91842c38 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 19 Jul 2019 16:38:48 +0200 Subject: [PATCH 008/101] net: mvpp2: Don't check for 3 consecutive Idle frames for 10G links [ Upstream commit bba18318e7d1d5c8b0bbafd65010a0cee3c65608 ] PPv2's XLGMAC can wait for 3 idle frames before triggering a link up event. This can cause the link to be stuck low when there's traffic on the interface, so disable this feature. Fixes: 4bb043262878 ("net: mvpp2: phylink support") Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 6455511457ca..9b608d23ff7e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4412,9 +4412,9 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, if (state->pause & MLO_PAUSE_RX) ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN; - ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC; - ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC | - MVPP22_XLG_CTRL4_EN_IDLE_CHECK; + ctrl4 &= ~(MVPP22_XLG_CTRL4_MACMODSELECT_GMAC | + MVPP22_XLG_CTRL4_EN_IDLE_CHECK); + ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC; writel(ctrl0, port->base + MVPP22_XLG_CTRL0_REG); writel(ctrl4, port->base + MVPP22_XLG_CTRL4_REG); From ef52e2b9a621d1a6ccbe2fe3e7edd7e14ff0f226 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 23 Jul 2019 11:19:25 +0300 Subject: [PATCH 009/101] selftests: forwarding: gre_multipath: Enable IPv4 forwarding [ Upstream commit efa7b79f675da0efafe3f32ba0d6efe916cf4867 ] The test did not enable IPv4 forwarding during its setup phase, which causes the test to fail on machines where IPv4 forwarding is disabled. Fixes: 54818c4c4b93 ("selftests: forwarding: Test multipath tunneling") Signed-off-by: Ido Schimmel Reported-by: Stephen Suryaputra Tested-by: Stephen Suryaputra Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/forwarding/gre_multipath.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index cca2baa03fb8..37d7297e1cf8 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -187,12 +187,16 @@ setup_prepare() sw1_create sw2_create h2_create + + forwarding_enable } cleanup() { pre_cleanup + forwarding_restore + h2_destroy sw2_destroy sw1_destroy From 43d31fd9a8f243cd2a021df682dce1e6979a2a2e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 23 Jul 2019 11:19:26 +0300 Subject: [PATCH 010/101] selftests: forwarding: gre_multipath: Fix flower filters [ Upstream commit 1be79d89b7ae96e004911bd228ce8c2b5cc6415f ] The TC filters used in the test do not work with veth devices because the outer Ethertype is 802.1Q and not IPv4. The test passes with mlxsw netdevs since the hardware always looks at "The first Ethertype that does not point to either: VLAN, CNTAG or configurable Ethertype". Fix this by matching on the VLAN ID instead, but on the ingress side. The reason why this is not performed at egress is explained in the commit cited below. Fixes: 541ad323db3a ("selftests: forwarding: gre_multipath: Update next-hop statistics match criteria") Signed-off-by: Ido Schimmel Reported-by: Stephen Suryaputra Tested-by: Stephen Suryaputra Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../selftests/net/forwarding/gre_multipath.sh | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index 37d7297e1cf8..a8d8e8b3dc81 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -93,18 +93,10 @@ sw1_create() ip route add vrf v$ol1 192.0.2.16/28 \ nexthop dev g1a \ nexthop dev g1b - - tc qdisc add dev $ul1 clsact - tc filter add dev $ul1 egress pref 111 prot ipv4 \ - flower dst_ip 192.0.2.66 action pass - tc filter add dev $ul1 egress pref 222 prot ipv4 \ - flower dst_ip 192.0.2.82 action pass } sw1_destroy() { - tc qdisc del dev $ul1 clsact - ip route del vrf v$ol1 192.0.2.16/28 ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 @@ -139,10 +131,18 @@ sw2_create() ip route add vrf v$ol2 192.0.2.0/28 \ nexthop dev g2a \ nexthop dev g2b + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass } sw2_destroy() { + tc qdisc del dev $ul2 clsact + ip route del vrf v$ol2 192.0.2.0/28 ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 @@ -215,15 +215,15 @@ multipath4_test() nexthop dev g1a weight $weight1 \ nexthop dev g1b weight $weight2 - local t0_111=$(tc_rule_stats_get $ul1 111 egress) - local t0_222=$(tc_rule_stats_get $ul1 222 egress) + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ -d 1msec -t udp "sp=1024,dp=0-32768" - local t1_111=$(tc_rule_stats_get $ul1 111 egress) - local t1_222=$(tc_rule_stats_get $ul1 222 egress) + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) local d111=$((t1_111 - t0_111)) local d222=$((t1_222 - t0_222)) From dbf790dcb8a9d7beb57f14004e0bba8235d833b8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 24 Jun 2019 08:34:13 +0000 Subject: [PATCH 011/101] can: dev: call netif_carrier_off() in register_candev() [ Upstream commit c63845609c4700488e5eacd6ab4d06d5d420e5ef ] CONFIG_CAN_LEDS is deprecated. When trying to use the generic netdev trigger as suggested, there's a small inconsistency with the link property: The LED is on initially, stays on when the device is brought up, and then turns off (as expected) when the device is brought down. Make sure the LED always reflects the state of the CAN device. Signed-off-by: Rasmus Villemoes Acked-by: Willem de Bruijn Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index c05e4d50d43d..bd127ce3aba2 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -1260,6 +1260,8 @@ int register_candev(struct net_device *dev) return -EINVAL; dev->rtnl_link_ops = &can_link_ops; + netif_carrier_off(dev); + return register_netdev(dev); } EXPORT_SYMBOL_GPL(register_candev); From 3257103502cf8c74d4979e96445d7409a0e7cd68 Mon Sep 17 00:00:00 2001 From: Weitao Hou Date: Tue, 25 Jun 2019 20:50:48 +0800 Subject: [PATCH 012/101] can: mcp251x: add error check when wq alloc failed [ Upstream commit 375f755899b8fc21196197e02aab26257df26e85 ] add error check when workqueue alloc failed, and remove redundant code to make it clear. Fixes: e0000163e30e ("can: Driver for the Microchip MCP251x SPI CAN controllers") Signed-off-by: Weitao Hou Acked-by: Willem de Bruijn Tested-by: Sean Nyekjaer Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/spi/mcp251x.c | 49 ++++++++++++++++------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index da64e71a62ee..fccb6bf21fad 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -678,17 +678,6 @@ static int mcp251x_power_enable(struct regulator *reg, int enable) return regulator_disable(reg); } -static void mcp251x_open_clean(struct net_device *net) -{ - struct mcp251x_priv *priv = netdev_priv(net); - struct spi_device *spi = priv->spi; - - free_irq(spi->irq, priv); - mcp251x_hw_sleep(spi); - mcp251x_power_enable(priv->transceiver, 0); - close_candev(net); -} - static int mcp251x_stop(struct net_device *net) { struct mcp251x_priv *priv = netdev_priv(net); @@ -954,37 +943,43 @@ static int mcp251x_open(struct net_device *net) flags | IRQF_ONESHOT, DEVICE_NAME, priv); if (ret) { dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq); - mcp251x_power_enable(priv->transceiver, 0); - close_candev(net); - goto open_unlock; + goto out_close; } priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM, 0); + if (!priv->wq) { + ret = -ENOMEM; + goto out_clean; + } INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler); INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler); ret = mcp251x_hw_reset(spi); - if (ret) { - mcp251x_open_clean(net); - goto open_unlock; - } + if (ret) + goto out_free_wq; ret = mcp251x_setup(net, spi); - if (ret) { - mcp251x_open_clean(net); - goto open_unlock; - } + if (ret) + goto out_free_wq; ret = mcp251x_set_normal_mode(spi); - if (ret) { - mcp251x_open_clean(net); - goto open_unlock; - } + if (ret) + goto out_free_wq; can_led_event(net, CAN_LED_EVENT_OPEN); netif_wake_queue(net); + mutex_unlock(&priv->mcp_lock); -open_unlock: + return 0; + +out_free_wq: + destroy_workqueue(priv->wq); +out_clean: + free_irq(spi->irq, priv); + mcp251x_hw_sleep(spi); +out_close: + mcp251x_power_enable(priv->transceiver, 0); + close_candev(net); mutex_unlock(&priv->mcp_lock); return ret; } From bd2f4c7c2d6a6f09a23756ae6d97b0615bdd05d7 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 18 May 2019 17:35:43 +0800 Subject: [PATCH 013/101] can: gw: Fix error path of cgw_module_init [ Upstream commit b7a14297f102b6e2ce6f16feffebbb9bde1e9b55 ] This patch add error path for cgw_module_init to avoid possible crash if some error occurs. Fixes: c1aabdf379bc ("can-gw: add netlink based CAN routing") Signed-off-by: YueHaibing Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/gw.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index 53859346dc9a..bd2161470e45 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1046,32 +1046,50 @@ static __init int cgw_module_init(void) pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n", max_hops); - register_pernet_subsys(&cangw_pernet_ops); + ret = register_pernet_subsys(&cangw_pernet_ops); + if (ret) + return ret; + + ret = -ENOMEM; cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); - if (!cgw_cache) - return -ENOMEM; + goto out_cache_create; /* set notifier */ notifier.notifier_call = cgw_notifier; - register_netdevice_notifier(¬ifier); + ret = register_netdevice_notifier(¬ifier); + if (ret) + goto out_register_notifier; ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0); - if (ret) { - unregister_netdevice_notifier(¬ifier); - kmem_cache_destroy(cgw_cache); - return -ENOBUFS; - } + if (ret) + goto out_rtnl_register1; - /* Only the first call to rtnl_register_module can fail */ - rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, - cgw_create_job, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, - cgw_remove_job, NULL, 0); + ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, + cgw_create_job, NULL, 0); + if (ret) + goto out_rtnl_register2; + ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, + cgw_remove_job, NULL, 0); + if (ret) + goto out_rtnl_register3; return 0; + +out_rtnl_register3: + rtnl_unregister(PF_CAN, RTM_NEWROUTE); +out_rtnl_register2: + rtnl_unregister(PF_CAN, RTM_GETROUTE); +out_rtnl_register1: + unregister_netdevice_notifier(¬ifier); +out_register_notifier: + kmem_cache_destroy(cgw_cache); +out_cache_create: + unregister_pernet_subsys(&cangw_pernet_ops); + + return ret; } static __exit void cgw_module_exit(void) From 714a8438fc8ae88aa22c25065e241bce0260db13 Mon Sep 17 00:00:00 2001 From: Ricard Wanderlof Date: Wed, 24 Jul 2019 11:38:44 +0200 Subject: [PATCH 014/101] ASoC: Fail card instantiation if DAI format setup fails [ Upstream commit 40aa5383e393d72f6aa3943a4e7b1aae25a1e43b ] If the DAI format setup fails, there is no valid communication format between CPU and CODEC, so fail card instantiation, rather than continue with a card that will most likely not function properly. Signed-off-by: Ricard Wanderlof Link: https://lore.kernel.org/r/alpine.DEB.2.20.1907241132350.6338@lnxricardw1.se.axis.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 62aa320c2070..dafc3b7f8d72 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1513,8 +1513,11 @@ static int soc_probe_link_dais(struct snd_soc_card *card, } } - if (dai_link->dai_fmt) - snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); + if (dai_link->dai_fmt) { + ret = snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); + if (ret) + return ret; + } ret = soc_post_component_init(rtd, dai_link->name); if (ret) From 0d73ba88f466b65a3c6877cc2cd16383a5548cba Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 23 Jul 2019 17:04:30 -0500 Subject: [PATCH 015/101] st21nfca_connectivity_event_received: null check the allocation [ Upstream commit 9891d06836e67324c9e9c4675ed90fc8b8110034 ] devm_kzalloc may fail and return null. So the null check is needed. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/st21nfca/se.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index 4bed9e842db3..fd967a38a94a 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -328,6 +328,8 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); + if (!transaction) + return -ENOMEM; transaction->aid_len = skb->data[1]; memcpy(transaction->aid, &skb->data[2], From c5cb10632c0fb4a4a4f7782e87e40aa6257bff9b Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 23 Jul 2019 17:11:51 -0500 Subject: [PATCH 016/101] st_nci_hci_connectivity_event_received: null check the allocation [ Upstream commit 3008e06fdf0973770370f97d5f1fba3701d8281d ] devm_kzalloc may fail and return NULL. So the null check is needed. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/st-nci/se.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index f55d082ace71..5d6e7e931bc6 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -344,6 +344,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); + if (!transaction) + return -ENOMEM; transaction->aid_len = skb->data[1]; memcpy(transaction->aid, &skb->data[2], transaction->aid_len); From dd0ff288e3441bd06d34dd37ead473cc41d413d4 Mon Sep 17 00:00:00 2001 From: Cheng-Yi Chiang Date: Fri, 26 Jul 2019 12:42:02 +0800 Subject: [PATCH 017/101] ASoC: rockchip: Fix mono capture [ Upstream commit 789e162a6255325325bd321ab0cd51dc7e285054 ] This reverts commit db51707b9c9aeedd310ebce60f15d5bb006567e0. Revert "ASoC: rockchip: i2s: Support mono capture" Previous discussion in https://patchwork.kernel.org/patch/10147153/ explains the issue of the patch. While device is configured as 1-ch, hardware is still generating a 2-ch stream. When user space reads the data and assumes it is a 1-ch stream, the rate will be slower by 2x. Revert the change so 1-ch is not supported. User space can selectively take one channel data out of two channel if 1-ch is preferred. Currently, both channels record identical data. Signed-off-by: Cheng-Yi Chiang Link: https://lore.kernel.org/r/20190726044202.26866-1-cychiang@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/rockchip/rockchip_i2s.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 60d43d53a8f5..11399f81c92f 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -329,7 +329,6 @@ static int rockchip_i2s_hw_params(struct snd_pcm_substream *substream, val |= I2S_CHN_4; break; case 2: - case 1: val |= I2S_CHN_2; break; default: @@ -462,7 +461,7 @@ static struct snd_soc_dai_driver rockchip_i2s_dai = { }, .capture = { .stream_name = "Capture", - .channels_min = 1, + .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_192000, .formats = (SNDRV_PCM_FMTBIT_S8 | @@ -662,7 +661,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) } if (!of_property_read_u32(node, "rockchip,capture-channels", &val)) { - if (val >= 1 && val <= 8) + if (val >= 2 && val <= 8) soc_dai->capture.channels_max = val; } From bfa713f5cec57aeaa0860bba24ca251e2d1cd6e1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 26 Jul 2019 09:42:43 +0300 Subject: [PATCH 018/101] ASoC: ti: davinci-mcasp: Correct slot_width posed constraint [ Upstream commit 1e112c35e3c96db7c8ca6ddaa96574f00c06e7db ] The slot_width is a property for the bus while the constraint for SNDRV_PCM_HW_PARAM_SAMPLE_BITS is for the in memory format. Applying slot_width constraint to sample_bits works most of the time, but it will blacklist valid formats in some cases. With slot_width 24 we can support S24_3LE and S24_LE formats as they both look the same on the bus, but a a 24 constraint on sample_bits would not allow S24_LE as it is stored in 32bits in memory. Implement a simple hw_rule function to allow all formats which require less or equal number of bits on the bus as slot_width (if configured). Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20190726064244.3762-2-peter.ujfalusi@ti.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/davinci/davinci-mcasp.c | 43 ++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 160b2764b2ad..6a8c279a4b20 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1150,6 +1150,28 @@ static int davinci_mcasp_trigger(struct snd_pcm_substream *substream, return ret; } +static int davinci_mcasp_hw_rule_slot_width(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct davinci_mcasp_ruledata *rd = rule->private; + struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + struct snd_mask nfmt; + int i, slot_width; + + snd_mask_none(&nfmt); + slot_width = rd->mcasp->slot_width; + + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { + if (snd_mask_test(fmt, i)) { + if (snd_pcm_format_width(i) <= slot_width) { + snd_mask_set(&nfmt, i); + } + } + } + + return snd_mask_refine(fmt, &nfmt); +} + static const unsigned int davinci_mcasp_dai_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100, 48000, 64000, 88200, 96000, 176400, 192000, @@ -1257,7 +1279,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, struct davinci_mcasp_ruledata *ruledata = &mcasp->ruledata[substream->stream]; u32 max_channels = 0; - int i, dir; + int i, dir, ret; int tdm_slots = mcasp->tdm_slots; /* Do not allow more then one stream per direction */ @@ -1286,6 +1308,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, max_channels++; } ruledata->serializers = max_channels; + ruledata->mcasp = mcasp; max_channels *= tdm_slots; /* * If the already active stream has less channels than the calculated @@ -1311,20 +1334,22 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, 0, SNDRV_PCM_HW_PARAM_CHANNELS, &mcasp->chconstr[substream->stream]); - if (mcasp->slot_width) - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_SAMPLE_BITS, - 8, mcasp->slot_width); + if (mcasp->slot_width) { + /* Only allow formats require <= slot_width bits on the bus */ + ret = snd_pcm_hw_rule_add(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_FORMAT, + davinci_mcasp_hw_rule_slot_width, + ruledata, + SNDRV_PCM_HW_PARAM_FORMAT, -1); + if (ret) + return ret; + } /* * If we rely on implicit BCLK divider setting we should * set constraints based on what we can provide. */ if (mcasp->bclk_master && mcasp->bclk_div == 0 && mcasp->sysclk_freq) { - int ret; - - ruledata->mcasp = mcasp; - ret = snd_pcm_hw_rule_add(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, davinci_mcasp_hw_rule_rate, From cb64e86ee0d1763cb7ac45117f1de1f62bb6ba7c Mon Sep 17 00:00:00 2001 From: Bob Ham Date: Wed, 24 Jul 2019 07:52:27 -0700 Subject: [PATCH 019/101] net: usb: qmi_wwan: Add the BroadMobi BM818 card [ Upstream commit 9a07406b00cdc6ec689dc142540739575c717f3c ] The BroadMobi BM818 M.2 card uses the QMI protocol Signed-off-by: Bob Ham Signed-off-by: Angus Ainslie (Purism) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 128c8a327d8e..51017c6bb3bc 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1231,6 +1231,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ + {QMI_FIXED_INTF(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ From 8a24df575c97c2076260bd359f921e6a8becf219 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Thu, 25 Jul 2019 13:59:55 +0300 Subject: [PATCH 020/101] qed: RDMA - Fix the hw_ver returned in device attributes [ Upstream commit 81af04b432fdfabcdbd2c06be2ee647e3ca41a22 ] The hw_ver field was initialized to zero. Return the chip revision. This is relevant for rdma driver. Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 13802b825d65..909422d93903 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -442,7 +442,7 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, /* Vendor specific information */ dev->vendor_id = cdev->vendor_id; dev->vendor_part_id = cdev->device_id; - dev->hw_ver = 0; + dev->hw_ver = cdev->chip_rev; dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION); From 32cbe03539ac50b4bcb85681b523292e18ab4f0d Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 26 Jul 2019 16:27:36 +0800 Subject: [PATCH 021/101] isdn: mISDN: hfcsusb: Fix possible null-pointer dereferences in start_isoc_chain() [ Upstream commit a0d57a552b836206ad7705a1060e6e1ce5a38203 ] In start_isoc_chain(), usb_alloc_urb() on line 1392 may fail and return NULL. At this time, fifo->iso[i].urb is assigned to NULL. Then, fifo->iso[i].urb is used at some places, such as: LINE 1405: fill_isoc_urb(fifo->iso[i].urb, ...) urb->number_of_packets = num_packets; urb->transfer_flags = URB_ISO_ASAP; urb->actual_length = 0; urb->interval = interval; LINE 1416: fifo->iso[i].urb->... LINE 1419: fifo->iso[i].urb->... Thus, possible null-pointer dereferences may occur. To fix these bugs, "continue" is added to avoid using fifo->iso[i].urb when it is NULL. These bugs are found by a static analysis tool STCheck written by us. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/hardware/mISDN/hfcsusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 060dc7fd66c1..cfdb130cb100 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1406,6 +1406,7 @@ start_isoc_chain(struct usb_fifo *fifo, int num_packets_per_urb, printk(KERN_DEBUG "%s: %s: alloc urb for fifo %i failed", hw->name, __func__, fifo->fifonum); + continue; } fifo->iso[i].owner_fifo = (struct usb_fifo *) fifo; fifo->iso[i].indx = i; From ba42212ac0a022104deb5971337602a81cc5242d Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 29 Jul 2019 16:23:32 +0800 Subject: [PATCH 022/101] mac80211_hwsim: Fix possible null-pointer dereferences in hwsim_dump_radio_nl() [ Upstream commit b55f3b841099e641bdb2701d361a4c304e2dbd6f ] In hwsim_dump_radio_nl(), when genlmsg_put() on line 3617 fails, hdr is assigned to NULL. Then hdr is used on lines 3622 and 3623: genl_dump_check_consistent(cb, hdr); genlmsg_end(skb, hdr); Thus, possible null-pointer dereferences may occur. To fix these bugs, hdr is used here when it is not NULL. This bug is found by a static analysis tool STCheck written by us. Signed-off-by: Jia-Ju Bai Link: https://lore.kernel.org/r/20190729082332.28895-1-baijiaju1990@gmail.com [put braces on all branches] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7cd428c0af43..ce2dd06af62e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3502,10 +3502,12 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb, hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &hwsim_genl_family, NLM_F_MULTI, HWSIM_CMD_GET_RADIO); - if (!hdr) + if (hdr) { + genl_dump_check_consistent(cb, hdr); + genlmsg_end(skb, hdr); + } else { res = -EMSGSIZE; - genl_dump_check_consistent(cb, hdr); - genlmsg_end(skb, hdr); + } } done: From 5a072ef6a296ecdb740998366cc441443a50d66f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 24 Jun 2019 15:20:11 +0200 Subject: [PATCH 023/101] netfilter: ipset: Actually allow destination MAC address for hash:ip,mac sets too [ Upstream commit b89d15480d0cacacae1a0fe0b3da01b529f2914f ] In commit 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets"), ipset.git commit 1543514c46a7, I removed the KADT check that prevents matching on destination MAC addresses for hash:mac sets, but forgot to remove the same check for hash:ip,mac set. Drop this check: functionality is now commented in man pages and there's no reason to restrict to source MAC address matching anymore. Reported-by: Chen Yi Fixes: 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_ipmac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index fd87de3ed55b..75c21c8b7651 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -95,10 +95,6 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; From ea08214d8cd0e1ea33c391b0d9dac1040d6c3429 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 24 Jun 2019 15:20:12 +0200 Subject: [PATCH 024/101] netfilter: ipset: Copy the right MAC address in bitmap:ip,mac and hash:ip,mac sets [ Upstream commit 1b4a75108d5bc153daf965d334e77e8e94534f96 ] In commit 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets"), ipset.git commit 1543514c46a7, I added to the KADT functions for sets matching on MAC addreses the copy of source or destination MAC address depending on the configured match. This was done correctly for hash:mac, but for hash:ip,mac and bitmap:ip,mac, copying and pasting the same code block presents an obvious problem: in these two set types, the MAC address is the second dimension, not the first one, and we are actually selecting the MAC address depending on whether the first dimension (IP address) specifies source or destination. Fix this by checking for the IPSET_DIM_TWO_SRC flag in option flags. This way, mixing source and destination matches for the two dimensions of ip,mac set types works as expected. With this setup: ip netns add A ip link add veth1 type veth peer name veth2 netns A ip addr add 192.0.2.1/24 dev veth1 ip -net A addr add 192.0.2.2/24 dev veth2 ip link set veth1 up ip -net A link set veth2 up dst=$(ip netns exec A cat /sys/class/net/veth2/address) ip netns exec A ipset create test_bitmap bitmap:ip,mac range 192.0.0.0/16 ip netns exec A ipset add test_bitmap 192.0.2.1,${dst} ip netns exec A iptables -A INPUT -m set ! --match-set test_bitmap src,dst -j DROP ip netns exec A ipset create test_hash hash:ip,mac ip netns exec A ipset add test_hash 192.0.2.1,${dst} ip netns exec A iptables -A INPUT -m set ! --match-set test_hash src,dst -j DROP ipset correctly matches a test packet: # ping -c1 192.0.2.2 >/dev/null # echo $? 0 Reported-by: Chen Yi Fixes: 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 +- net/netfilter/ipset/ip_set_hash_ipmac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 13ade5782847..4f01321e793c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -230,7 +230,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, e.id = ip_to_id(map, ip); - if (opt->flags & IPSET_DIM_ONE_SRC) + if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 75c21c8b7651..16ec822e4044 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -99,7 +99,7 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - if (opt->flags & IPSET_DIM_ONE_SRC) + if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); From 63dd147e7af0e69e5275191ff077e2c7f4ff53f8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 23 Jul 2019 10:25:55 +0200 Subject: [PATCH 025/101] netfilter: ipset: Fix rename concurrency with listing [ Upstream commit 6c1f7e2c1b96ab9b09ac97c4df2bd9dc327206f6 ] Shijie Luo reported that when stress-testing ipset with multiple concurrent create, rename, flush, list, destroy commands, it can result ipset : Broken LIST kernel message: missing DATA part! error messages and broken list results. The problem was the rename operation was not properly handled with respect of listing. The patch fixes the issue. Reported-by: Shijie Luo Signed-off-by: Jozsef Kadlecsik Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 1577f2f76060..e2538c578671 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1157,7 +1157,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, return -ENOENT; write_lock_bh(&ip_set_ref_lock); - if (set->ref != 0) { + if (set->ref != 0 || set->ref_netlink != 0) { ret = -IPSET_ERR_REFERENCED; goto out; } From 0d68fbc2d07b5914fb8712b05549bd558fccf1c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:42:50 +0100 Subject: [PATCH 026/101] rxrpc: Fix potential deadlock [ Upstream commit 60034d3d146b11922ab1db613bce062dddc0327a ] There is a potential deadlock in rxrpc_peer_keepalive_dispatch() whereby rxrpc_put_peer() is called with the peer_hash_lock held, but if it reduces the peer's refcount to 0, rxrpc_put_peer() calls __rxrpc_put_peer() - which the tries to take the already held lock. Fix this by providing a version of rxrpc_put_peer() that can be called in situations where the lock is already held. The bug may produce the following lockdep report: ============================================ WARNING: possible recursive locking detected 5.2.0-next-20190718 #41 Not tainted -------------------------------------------- kworker/0:3/21678 is trying to acquire lock: 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: spin_lock_bh /./include/linux/spinlock.h:343 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: __rxrpc_put_peer /net/rxrpc/peer_object.c:415 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: rxrpc_put_peer+0x2d3/0x6a0 /net/rxrpc/peer_object.c:435 but task is already holding lock: 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: spin_lock_bh /./include/linux/spinlock.h:343 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: rxrpc_peer_keepalive_dispatch /net/rxrpc/peer_event.c:378 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: rxrpc_peer_keepalive_worker+0x6b3/0xd02 /net/rxrpc/peer_event.c:430 Fixes: 330bdcfadcee ("rxrpc: Fix the keepalive generator [ver #2]") Reported-by: syzbot+72af434e4b3417318f84@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Marc Dionne Reviewed-by: Jeffrey Altman Signed-off-by: Sasha Levin --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/peer_event.c | 2 +- net/rxrpc/peer_object.c | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 03e0fc8c183f..a4c341828b72 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1057,6 +1057,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); void rxrpc_put_peer(struct rxrpc_peer *); +void rxrpc_put_peer_locked(struct rxrpc_peer *); /* * proc.c diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bd2fa3b7caa7..dc7fdaf20445 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -375,7 +375,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, spin_lock_bh(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - rxrpc_put_peer(peer); + rxrpc_put_peer_locked(peer); } spin_unlock_bh(&rxnet->peer_hash_lock); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 5691b7d266ca..71547e8673b9 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -440,6 +440,24 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) } } +/* + * Drop a ref on a peer record where the caller already holds the + * peer_hash_lock. + */ +void rxrpc_put_peer_locked(struct rxrpc_peer *peer) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&peer->usage); + trace_rxrpc_peer(peer, rxrpc_peer_put, n, here); + if (n == 0) { + hash_del_rcu(&peer->hash_link); + list_del_init(&peer->keepalive_link); + kfree_rcu(peer, rcu); + } +} + /* * Make sure all peer records have been discarded. */ From 4db2043eec468ed358ff1100a3024dcae3b1b5ad Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:42:50 +0100 Subject: [PATCH 027/101] rxrpc: Fix the lack of notification when sendmsg() fails on a DATA packet [ Upstream commit c69565ee6681e151e2bb80502930a16e04b553d1 ] Fix the fact that a notification isn't sent to the recvmsg side to indicate a call failed when sendmsg() fails to transmit a DATA packet with the error ENETUNREACH, EHOSTUNREACH or ECONNREFUSED. Without this notification, the afs client just sits there waiting for the call to complete in some manner (which it's not now going to do), which also pins the rxrpc call in place. This can be seen if the client has a scope-level IPv6 address, but not a global-level IPv6 address, and we try and transmit an operation to a server's IPv6 address. Looking in /proc/net/rxrpc/calls shows completed calls just sat there with an abort code of RX_USER_ABORT and an error code of -ENETUNREACH. Fixes: c54e43d752c7 ("rxrpc: Fix missing start of call timeout") Signed-off-by: David Howells Reviewed-by: Marc Dionne Reviewed-by: Jeffrey Altman Signed-off-by: Sasha Levin --- net/rxrpc/sendmsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index be01f9c5d963..5d6ab4f6fd7a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -230,6 +230,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_notify_socket(call); goto out; } _debug("need instant resend %d", ret); From 07efe13b95ab577085c2b920c86791fef5104541 Mon Sep 17 00:00:00 2001 From: Juliana Rodrigueiro Date: Wed, 31 Jul 2019 15:17:23 +0200 Subject: [PATCH 028/101] isdn: hfcsusb: Fix mISDN driver crash caused by transfer buffer on the stack [ Upstream commit d8a1de3d5bb881507602bc02e004904828f88711 ] Since linux 4.9 it is not possible to use buffers on the stack for DMA transfers. During usb probe the driver crashes with "transfer buffer is on stack" message. This fix k-allocates a buffer to be used on "read_reg_atomic", which is a macro that calls "usb_control_msg" under the hood. Kernel 4.19 backtrace: usb_hcd_submit_urb+0x3e5/0x900 ? sched_clock+0x9/0x10 ? log_store+0x203/0x270 ? get_random_u32+0x6f/0x90 ? cache_alloc_refill+0x784/0x8a0 usb_submit_urb+0x3b4/0x550 usb_start_wait_urb+0x4e/0xd0 usb_control_msg+0xb8/0x120 hfcsusb_probe+0x6bc/0xb40 [hfcsusb] usb_probe_interface+0xc2/0x260 really_probe+0x176/0x280 driver_probe_device+0x49/0x130 __driver_attach+0xa9/0xb0 ? driver_probe_device+0x130/0x130 bus_for_each_dev+0x5a/0x90 driver_attach+0x14/0x20 ? driver_probe_device+0x130/0x130 bus_add_driver+0x157/0x1e0 driver_register+0x51/0xe0 usb_register_driver+0x5d/0x120 ? 0xf81ed000 hfcsusb_drv_init+0x17/0x1000 [hfcsusb] do_one_initcall+0x44/0x190 ? free_unref_page_commit+0x6a/0xd0 do_init_module+0x46/0x1c0 load_module+0x1dc1/0x2400 sys_init_module+0xed/0x120 do_fast_syscall_32+0x7a/0x200 entry_SYSENTER_32+0x6b/0xbe Signed-off-by: Juliana Rodrigueiro Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/hardware/mISDN/hfcsusb.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index cfdb130cb100..c952002c6301 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1705,13 +1705,23 @@ hfcsusb_stop_endpoint(struct hfcsusb *hw, int channel) static int setup_hfcsusb(struct hfcsusb *hw) { + void *dmabuf = kmalloc(sizeof(u_char), GFP_KERNEL); u_char b; + int ret; if (debug & DBG_HFC_CALL_TRACE) printk(KERN_DEBUG "%s: %s\n", hw->name, __func__); + if (!dmabuf) + return -ENOMEM; + + ret = read_reg_atomic(hw, HFCUSB_CHIP_ID, dmabuf); + + memcpy(&b, dmabuf, sizeof(u_char)); + kfree(dmabuf); + /* check the chip id */ - if (read_reg_atomic(hw, HFCUSB_CHIP_ID, &b) != 1) { + if (ret != 1) { printk(KERN_DEBUG "%s: %s: cannot read chip id\n", hw->name, __func__); return 1; From 4533d08b65283ea673e112f38a32880813a78e25 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 30 Jul 2019 16:08:13 +0800 Subject: [PATCH 029/101] net: phy: phy_led_triggers: Fix a possible null-pointer dereference in phy_led_trigger_change_speed() [ Upstream commit 271da132e29b5341c31eca6ba6a72ea1302ebac8 ] In phy_led_trigger_change_speed(), there is an if statement on line 48 to check whether phy->last_triggered is NULL: if (!phy->last_triggered) When phy->last_triggered is NULL, it is used on line 52: led_trigger_event(&phy->last_triggered->trigger, LED_OFF); Thus, a possible null-pointer dereference may occur. To fix this bug, led_trigger_event(&phy->last_triggered->trigger, LED_OFF) is called when phy->last_triggered is not NULL. This bug is found by a static analysis tool STCheck written by the OSLAB group in Tsinghua University. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/phy_led_triggers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 491efc1bf5c4..7278eca70f9f 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -58,8 +58,9 @@ void phy_led_trigger_change_speed(struct phy_device *phy) if (!phy->last_triggered) led_trigger_event(&phy->led_link_trigger->trigger, LED_FULL); + else + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); - led_trigger_event(&phy->last_triggered->trigger, LED_OFF); led_trigger_event(&plt->trigger, LED_FULL); phy->last_triggered = plt; } From a3d1263c9b03b3c873be974705d50f17b363e3d0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 1 Aug 2019 16:26:42 +0200 Subject: [PATCH 030/101] perf bench numa: Fix cpu0 binding [ Upstream commit 6bbfe4e602691b90ac866712bd4c43c51e546a60 ] Michael reported an issue with perf bench numa failing with binding to cpu0 with '-0' option. # perf bench numa mem -p 3 -t 1 -P 512 -s 100 -zZcm0 --thp 1 -M 1 -ddd # Running 'numa/mem' benchmark: # Running main, "perf bench numa numa-mem -p 3 -t 1 -P 512 -s 100 -zZcm0 --thp 1 -M 1 -ddd" binding to node 0, mask: 0000000000000001 => -1 perf: bench/numa.c:356: bind_to_memnode: Assertion `!(ret)' failed. Aborted (core dumped) This happens when the cpu0 is not part of node0, which is the benchmark assumption and we can see that's not the case for some powerpc servers. Using correct node for cpu0 binding. Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Satheesh Rajendran Link: http://lkml.kernel.org/r/20190801142642.28004-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/numa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index fa56fde6e8d8..91c0a4434da2 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -378,8 +378,10 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags, /* Allocate and initialize all memory on CPU#0: */ if (init_cpu0) { - orig_mask = bind_to_node(0); - bind_to_memnode(0); + int node = numa_node_of_cpu(0); + + orig_mask = bind_to_node(node); + bind_to_memnode(node); } bytes = bytes0 + HPSIZE; From 895c8fcfedad2af3fe265e796afbbc43096c84a3 Mon Sep 17 00:00:00 2001 From: Wang Xiayang Date: Wed, 31 Jul 2019 15:31:14 +0800 Subject: [PATCH 031/101] can: sja1000: force the string buffer NULL-terminated [ Upstream commit cd28aa2e056cd1ea79fc5f24eed0ce868c6cab5c ] strncpy() does not ensure NULL-termination when the input string size equals to the destination buffer size IFNAMSIZ. The output string 'name' is passed to dev_info which relies on NULL-termination. Use strlcpy() instead. This issue is identified by a Coccinelle script. Signed-off-by: Wang Xiayang Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/sja1000/peak_pcmcia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index b8c39ede7cd5..179bfcd541f2 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -487,7 +487,7 @@ static void pcan_free_channels(struct pcan_pccard *card) if (!netdev) continue; - strncpy(name, netdev->name, IFNAMSIZ); + strlcpy(name, netdev->name, IFNAMSIZ); unregister_sja1000dev(netdev); From 35d9e9211baf303037c3aae155428666b279d295 Mon Sep 17 00:00:00 2001 From: Wang Xiayang Date: Wed, 31 Jul 2019 15:25:59 +0800 Subject: [PATCH 032/101] can: peak_usb: force the string buffer NULL-terminated [ Upstream commit e787f19373b8a5fa24087800ed78314fd17b984a ] strncpy() does not ensure NULL-termination when the input string size equals to the destination buffer size IFNAMSIZ. The output string is passed to dev_info() which relies on the NULL-termination. Use strlcpy() instead. This issue is identified by a Coccinelle script. Signed-off-by: Wang Xiayang Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 740ef47eab01..43b0fa2b9932 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -863,7 +863,7 @@ static void peak_usb_disconnect(struct usb_interface *intf) dev_prev_siblings = dev->prev_siblings; dev->state &= ~PCAN_USB_STATE_CONNECTED; - strncpy(name, netdev->name, IFNAMSIZ); + strlcpy(name, netdev->name, IFNAMSIZ); unregister_netdev(netdev); From d1ba0b81e70976ab8022aa5e1a1740c455f6fbfb Mon Sep 17 00:00:00 2001 From: Wang Xiayang Date: Wed, 31 Jul 2019 16:15:42 +0800 Subject: [PATCH 033/101] net/ethernet/qlogic/qed: force the string buffer NULL-terminated [ Upstream commit 3690c8c9a8edff0db077a38783112d8fe12a7dd2 ] strncpy() does not ensure NULL-termination when the input string size equals to the destination buffer size 30. The output string is passed to qed_int_deassertion_aeu_bit() which calls DP_INFO() and relies NULL-termination. Use strlcpy instead. The other conditional branch above strncpy() needs no fix as snprintf() ensures NULL-termination. This issue is identified by a Coccinelle script. Signed-off-by: Wang Xiayang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index b22f464ea3fa..f9e475075d3e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -939,7 +939,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, snprintf(bit_name, 30, p_aeu->bit_name, num); else - strncpy(bit_name, + strlcpy(bit_name, p_aeu->bit_name, 30); /* We now need to pass bitmask in its From 66daeec347f057032c5026086dbcb460f6131b9b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Aug 2019 10:11:27 -0400 Subject: [PATCH 034/101] NFSv4: Fix a potential sleep while atomic in nfs4_do_reclaim() [ Upstream commit c77e22834ae9a11891cb613bd9a551be1b94f2bc ] John Hubbard reports seeing the following stack trace: nfs4_do_reclaim rcu_read_lock /* we are now in_atomic() and must not sleep */ nfs4_purge_state_owners nfs4_free_state_owner nfs4_destroy_seqid_counter rpc_destroy_wait_queue cancel_delayed_work_sync __cancel_work_timer __flush_work start_flush_work might_sleep: (kernel/workqueue.c:2975: BUG) The solution is to separate out the freeing of the state owners from nfs4_purge_state_owners(), and perform that outside the atomic context. Reported-by: John Hubbard Fixes: 0aaaf5c424c7f ("NFS: Cache state owners after files are closed") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4client.c | 5 ++++- fs/nfs/nfs4state.c | 27 ++++++++++++++++++++++----- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 63287d911c08..5b61520dce88 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -469,7 +469,8 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t); extern void nfs4_put_state_owner(struct nfs4_state_owner *); -extern void nfs4_purge_state_owners(struct nfs_server *); +extern void nfs4_purge_state_owners(struct nfs_server *, struct list_head *); +extern void nfs4_free_state_owners(struct list_head *head); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8f53455c4765..86991bcfbeb1 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -754,9 +754,12 @@ int nfs41_walk_client_list(struct nfs_client *new, static void nfs4_destroy_server(struct nfs_server *server) { + LIST_HEAD(freeme); + nfs_server_return_all_delegations(server); unset_pnfs_layoutdriver(server); - nfs4_purge_state_owners(server); + nfs4_purge_state_owners(server, &freeme); + nfs4_free_state_owners(&freeme); } /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3ba2087469ac..c36ef75f2054 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -628,24 +628,39 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) /** * nfs4_purge_state_owners - Release all cached state owners * @server: nfs_server with cached state owners to release + * @head: resulting list of state owners * * Called at umount time. Remaining state owners will be on * the LRU with ref count of zero. + * Note that the state owners are not freed, but are added + * to the list @head, which can later be used as an argument + * to nfs4_free_state_owners. */ -void nfs4_purge_state_owners(struct nfs_server *server) +void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head) { struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *tmp; - LIST_HEAD(doomed); spin_lock(&clp->cl_lock); list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { - list_move(&sp->so_lru, &doomed); + list_move(&sp->so_lru, head); nfs4_remove_state_owner_locked(sp); } spin_unlock(&clp->cl_lock); +} - list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { +/** + * nfs4_purge_state_owners - Release all cached state owners + * @head: resulting list of state owners + * + * Frees a list of state owners that was generated by + * nfs4_purge_state_owners + */ +void nfs4_free_state_owners(struct list_head *head) +{ + struct nfs4_state_owner *sp, *tmp; + + list_for_each_entry_safe(sp, tmp, head, so_lru) { list_del(&sp->so_lru); nfs4_free_state_owner(sp); } @@ -1843,12 +1858,13 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov struct nfs4_state_owner *sp; struct nfs_server *server; struct rb_node *pos; + LIST_HEAD(freeme); int status = 0; restart: rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - nfs4_purge_state_owners(server); + nfs4_purge_state_owners(server, &freeme); spin_lock(&clp->cl_lock); for (pos = rb_first(&server->state_owners); pos != NULL; @@ -1877,6 +1893,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov spin_unlock(&clp->cl_lock); } rcu_read_unlock(); + nfs4_free_state_owners(&freeme); return 0; } From b6fb2f5b33f277604374f28c486e8e897462c005 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Aug 2019 13:39:24 -0400 Subject: [PATCH 035/101] NFS: Fix regression whereby fscache errors are appearing on 'nofsc' mounts [ Upstream commit dea1bb35c5f35e0577cfc61f79261d80b8715221 ] People are reporing seeing fscache errors being reported concerning duplicate cookies even in cases where they are not setting up fscache at all. The rule needs to be that if fscache is not enabled, then it should have no side effects at all. To ensure this is the case, we disable fscache completely on all superblocks for which the 'fsc' mount option was not set. In order to avoid issues with '-oremount', we also disable the ability to turn fscache on via remount. Fixes: f1fe29b4a02d ("NFS: Use i_writecount to control whether...") Link: https://bugzilla.kernel.org/show_bug.cgi?id=200145 Signed-off-by: Trond Myklebust Cc: Steve Dickson Cc: David Howells Signed-off-by: Sasha Levin --- fs/nfs/fscache.c | 7 ++++++- fs/nfs/fscache.h | 2 +- fs/nfs/super.c | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 4dc887813c71..a7bc4e0494f9 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -118,6 +118,10 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int struct rb_node **p, *parent; int diff; + nfss->fscache_key = NULL; + nfss->fscache = NULL; + if (!(nfss->options & NFS_OPTION_FSCACHE)) + return; if (!uniq) { uniq = ""; ulen = 1; @@ -230,10 +234,11 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) void nfs_fscache_init_inode(struct inode *inode) { struct nfs_fscache_inode_auxdata auxdata; + struct nfs_server *nfss = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); nfsi->fscache = NULL; - if (!S_ISREG(inode->i_mode)) + if (!(nfss->fscache && S_ISREG(inode->i_mode))) return; memset(&auxdata, 0, sizeof(auxdata)); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 161ba2edb9d0..6363ea956858 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -186,7 +186,7 @@ static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) */ static inline const char *nfs_server_fscache_state(struct nfs_server *server) { - if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) + if (server->fscache) return "yes"; return "no "; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6df9b85caf20..d90efdea9fbd 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2239,6 +2239,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, data->acdirmin != nfss->acdirmin / HZ || data->acdirmax != nfss->acdirmax / HZ || data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || data->nfs_server.port != nfss->port || data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, From f6f9c4491ec52e13c6621b04b0c05301611b1711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20V=C3=A1radi?= Date: Wed, 24 Jul 2019 20:09:18 +0200 Subject: [PATCH 036/101] HID: quirks: Set the INCREMENT_USAGE_ON_DUPLICATE quirk on Saitek X52 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7bc74853fd61432ec59f812a40425bf6d8c986a4 ] The Saitek X52 joystick has a pair of axes that are originally (by the Windows driver) used as mouse pointer controls. The corresponding usage->hid values are 0x50024 and 0x50026. Thus they are handled as unknown axes and both get mapped to ABS_MISC. The quirk makes the second axis to be mapped to ABS_MISC1 and thus made available separately. [jkosina@suse.cz: squashed two patches into one] Signed-off-by: István Váradi Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 2898bb061945..4a2fa57ddcb8 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -971,6 +971,7 @@ #define USB_DEVICE_ID_SAITEK_RAT7 0x0cd7 #define USB_DEVICE_ID_SAITEK_RAT9 0x0cfa #define USB_DEVICE_ID_SAITEK_MMO7 0x0cd0 +#define USB_DEVICE_ID_SAITEK_X52 0x075c #define USB_VENDOR_ID_SAMSUNG 0x0419 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index d29c7c9cd185..e553f6fae7a4 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -143,6 +143,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_RETROUSB, USB_DEVICE_ID_RETROUSB_SNES_RETROPAD), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_RETROUSB, USB_DEVICE_ID_RETROUSB_SNES_RETROPORT), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RUMBLEPAD), HID_QUIRK_BADPAD }, + { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB), HID_QUIRK_NOGET }, From 1c6ca09217054d9fe72977a168781ad0af9b1623 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 11 Jun 2019 14:13:20 +0200 Subject: [PATCH 037/101] HID: input: fix a4tech horizontal wheel custom usage [ Upstream commit 1c703b53e5bfb5c2205c30f0fb157ce271fd42fb ] Some a4tech mice use the 'GenericDesktop.00b8' usage to inform whether the previous wheel report was horizontal or vertical. Before c01908a14bf73 ("HID: input: add mapping for "Toggle Display" key") this usage was being mapped to 'Relative.Misc'. After the patch it's simply ignored (usage->type == 0 & usage->code == 0). Which ultimately makes hid-a4tech ignore the WHEEL/HWHEEL selection event, as it has no usage->type. We shouldn't rely on a mapping for that usage as it's nonstandard and doesn't really map to an input event. So we bypass the mapping and make sure the custom event handling properly handles both reports. Fixes: c01908a14bf73 ("HID: input: add mapping for "Toggle Display" key") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-a4tech.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c index 9428ea7cdf8a..c52bd163abb3 100644 --- a/drivers/hid/hid-a4tech.c +++ b/drivers/hid/hid-a4tech.c @@ -26,12 +26,36 @@ #define A4_2WHEEL_MOUSE_HACK_7 0x01 #define A4_2WHEEL_MOUSE_HACK_B8 0x02 +#define A4_WHEEL_ORIENTATION (HID_UP_GENDESK | 0x000000b8) + struct a4tech_sc { unsigned long quirks; unsigned int hw_wheel; __s32 delayed_value; }; +static int a4_input_mapping(struct hid_device *hdev, struct hid_input *hi, + struct hid_field *field, struct hid_usage *usage, + unsigned long **bit, int *max) +{ + struct a4tech_sc *a4 = hid_get_drvdata(hdev); + + if (a4->quirks & A4_2WHEEL_MOUSE_HACK_B8 && + usage->hid == A4_WHEEL_ORIENTATION) { + /* + * We do not want to have this usage mapped to anything as it's + * nonstandard and doesn't really behave like an HID report. + * It's only selecting the orientation (vertical/horizontal) of + * the previous mouse wheel report. The input_events will be + * generated once both reports are recorded in a4_event(). + */ + return -1; + } + + return 0; + +} + static int a4_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) @@ -53,8 +77,7 @@ static int a4_event(struct hid_device *hdev, struct hid_field *field, struct a4tech_sc *a4 = hid_get_drvdata(hdev); struct input_dev *input; - if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput || - !usage->type) + if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput) return 0; input = field->hidinput->input; @@ -65,7 +88,7 @@ static int a4_event(struct hid_device *hdev, struct hid_field *field, return 1; } - if (usage->hid == 0x000100b8) { + if (usage->hid == A4_WHEEL_ORIENTATION) { input_event(input, EV_REL, value ? REL_HWHEEL : REL_WHEEL, a4->delayed_value); return 1; @@ -129,6 +152,7 @@ MODULE_DEVICE_TABLE(hid, a4_devices); static struct hid_driver a4_driver = { .name = "a4tech", .id_table = a4_devices, + .input_mapping = a4_input_mapping, .input_mapped = a4_input_mapped, .event = a4_event, .probe = a4_probe, From 6cb4997861c8bf9a73b54b123c02fab436b852a6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 2 Aug 2019 11:46:16 -0700 Subject: [PATCH 038/101] drm/rockchip: Suspend DP late [ Upstream commit f7ccbed656f78212593ca965d9a8f34bf24e0aab ] In commit fe64ba5c6323 ("drm/rockchip: Resume DP early") we moved resume to be early but left suspend at its normal time. This seems like it could be OK, but casues problems if a suspend gets interrupted partway through. The OS only balances matching suspend/resume levels. ...so if suspend was called then resume will be called. If suspend late was called then resume early will be called. ...but if suspend was called resume early might not get called. This leads to an unbalance in the clock enables / disables. Lets take the simple fix and just move suspend to be late to match. This makes the PM core take proper care in keeping things balanced. Fixes: fe64ba5c6323 ("drm/rockchip: Resume DP early") Signed-off-by: Douglas Anderson Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190802184616.44822-1-dianders@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/analogix_dp-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 080f05352195..6a4da3a0ff1c 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -436,7 +436,7 @@ static int rockchip_dp_resume(struct device *dev) static const struct dev_pm_ops rockchip_dp_pm_ops = { #ifdef CONFIG_PM_SLEEP - .suspend = rockchip_dp_suspend, + .suspend_late = rockchip_dp_suspend, .resume_early = rockchip_dp_resume, #endif }; From fab5a1fd17a62017871c52e568b40aacc0c23fe8 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 22 Jul 2019 11:38:22 -0700 Subject: [PATCH 039/101] SMB3: Fix potential memory leak when processing compound chain [ Upstream commit 3edeb4a4146dc3b54d6fa71b7ee0585cb52ebfdf ] When a reconnect happens in the middle of processing a compound chain the code leaks a buffer from the memory pool. Fix this by properly checking for a return code and freeing buffers in case of error. Also maintain a buf variable to be equal to either smallbuf or bigbuf depending on a response buffer size while parsing a chain and when returning to the caller. Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0ccf8f9b63a2..97fdbec54db9 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3121,7 +3121,6 @@ receive_encrypted_standard(struct TCP_Server_Info *server, { int ret, length; char *buf = server->smallbuf; - char *tmpbuf; struct smb2_sync_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; @@ -3151,18 +3150,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server, return length; next_is_large = server->large_buf; - one_more: +one_more: shdr = (struct smb2_sync_hdr *)buf; if (shdr->NextCommand) { - if (next_is_large) { - tmpbuf = server->bigbuf; + if (next_is_large) next_buffer = (char *)cifs_buf_get(); - } else { - tmpbuf = server->smallbuf; + else next_buffer = (char *)cifs_small_buf_get(); - } memcpy(next_buffer, - tmpbuf + le32_to_cpu(shdr->NextCommand), + buf + le32_to_cpu(shdr->NextCommand), pdu_length - le32_to_cpu(shdr->NextCommand)); } @@ -3191,12 +3187,21 @@ receive_encrypted_standard(struct TCP_Server_Info *server, pdu_length -= le32_to_cpu(shdr->NextCommand); server->large_buf = next_is_large; if (next_is_large) - server->bigbuf = next_buffer; + server->bigbuf = buf = next_buffer; else - server->smallbuf = next_buffer; - - buf += le32_to_cpu(shdr->NextCommand); + server->smallbuf = buf = next_buffer; goto one_more; + } else if (ret != 0) { + /* + * ret != 0 here means that we didn't get to handle_mid() thus + * server->smallbuf and server->bigbuf are still valid. We need + * to free next_buffer because it is not going to be used + * anywhere. + */ + if (next_is_large) + free_rsp_buf(CIFS_LARGE_BUFFER, next_buffer); + else + free_rsp_buf(CIFS_SMALL_BUFFER, next_buffer); } return ret; From 33bdea175df0ffbb2327b6ea90012dd581eb501f Mon Sep 17 00:00:00 2001 From: Sebastien Tisserant Date: Thu, 1 Aug 2019 12:06:08 -0500 Subject: [PATCH 040/101] SMB3: Kernel oops mounting a encryptData share with CONFIG_DEBUG_VIRTUAL [ Upstream commit ee9d66182392695535cc9fccfcb40c16f72de2a9 ] Fix kernel oops when mounting a encryptData CIFS share with CONFIG_DEBUG_VIRTUAL Signed-off-by: Sebastien Tisserant Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 97fdbec54db9..cc9e846a3865 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2545,7 +2545,15 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { - sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); + void *addr; + /* + * VMAP_STACK (at least) puts stack into the vmalloc address space + */ + if (is_vmalloc_addr(buf)) + addr = vmalloc_to_page(buf); + else + addr = virt_to_page(buf); + sg_set_page(sg, addr, buflen, offset_in_page(buf)); } /* Assumes the first rqst has a transform header as the first iov. From db106f695414144d3cc1de97c7e601eecdb48ae8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Aug 2019 14:25:16 +0200 Subject: [PATCH 041/101] s390: put _stext and _etext into .text section [ Upstream commit 24350fdadbdec780406a1ef988e6cd3875e374a8 ] Perf relies on _etext and _stext symbols being one of 't', 'T', 'v' or 'V'. Put them into .text section to guarantee that. Also moves padding to page boundary inside .text which has an effect that .text section is now padded with nops rather than 0's, which apparently has been the initial intention for specifying 0x0700 fill expression. Reported-by: Thomas Richter Tested-by: Thomas Richter Suggested-by: Andreas Krebbel Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/vmlinux.lds.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b43f8d33a369..18ede6e806b9 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -31,10 +31,9 @@ PHDRS { SECTIONS { . = 0x100000; - _stext = .; /* Start of text section */ .text : { - /* Text and read-only data */ - _text = .; + _stext = .; /* Start of text section */ + _text = .; /* Text and read-only data */ HEAD_TEXT TEXT_TEXT SCHED_TEXT @@ -46,11 +45,10 @@ SECTIONS *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) + . = ALIGN(PAGE_SIZE); + _etext = .; /* End of text section */ } :text = 0x0700 - . = ALIGN(PAGE_SIZE); - _etext = .; /* End of text section */ - NOTES :text :note .dummy : { *(.dummy) } :data From 45e7e4e66b08984dd3fce1a7a16f6487bbb1bbf6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 6 Aug 2019 10:55:12 +0200 Subject: [PATCH 042/101] net: cxgb3_main: Fix a resource leak in a error path in 'init_one()' [ Upstream commit debea2cd3193ac868289e8893c3a719c265b0612 ] A call to 'kfree_skb()' is missing in the error handling path of 'init_one()'. This is already present in 'remove_one()' but is missing here. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index c34ea385fe4a..6be6de0774b6 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3270,7 +3270,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!adapter->regs) { dev_err(&pdev->dev, "cannot map device registers\n"); err = -ENOMEM; - goto out_free_adapter; + goto out_free_adapter_nofail; } adapter->pdev = pdev; @@ -3398,6 +3398,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (adapter->port[i]) free_netdev(adapter->port[i]); +out_free_adapter_nofail: + kfree_skb(adapter->nofail_skb); + out_free_adapter: kfree(adapter); From b6cd6d18423eb59be98a361bfae40844de534c62 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 6 Aug 2019 15:16:17 +0200 Subject: [PATCH 043/101] net: stmmac: Fix issues when number of Queues >= 4 [ Upstream commit e8df7e8c233a18d2704e37ecff47583b494789d3 ] When queues >= 4 we use different registers but we were not subtracting the offset of 4. Fix this. Found out by Coverity. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 4 ++++ drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index d0e6e1503581..48cf5e2b2441 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -88,6 +88,8 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, u32 value; base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; + if (queue >= 4) + queue -= 4; value = readl(ioaddr + base_register); @@ -105,6 +107,8 @@ static void dwmac4_tx_queue_priority(struct mac_device_info *hw, u32 value; base_register = (queue < 4) ? GMAC_TXQ_PRTY_MAP0 : GMAC_TXQ_PRTY_MAP1; + if (queue >= 4) + queue -= 4; value = readl(ioaddr + base_register); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index d182f82f7b58..870302a7177e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -106,6 +106,8 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, u32 value, reg; reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; + if (queue >= 4) + queue -= 4; value = readl(ioaddr + reg); value &= ~XGMAC_PSRQ(queue); @@ -169,6 +171,8 @@ static void dwxgmac2_map_mtl_to_dma(struct mac_device_info *hw, u32 queue, u32 value, reg; reg = (queue < 4) ? XGMAC_MTL_RXQ_DMA_MAP0 : XGMAC_MTL_RXQ_DMA_MAP1; + if (queue >= 4) + queue -= 4; value = readl(ioaddr + reg); value &= ~XGMAC_QxMDMACH(queue); From b8d03c79e4033dc0d516e1d7ff14065439986a5c Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 6 Aug 2019 15:16:18 +0200 Subject: [PATCH 044/101] net: stmmac: tc: Do not return a fragment entry [ Upstream commit 4a6a1385a4db5f42258a40fcd497cbfd22075968 ] Do not try to return a fragment entry from TC list. Otherwise we may not clean properly allocated entries. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 58ea18af9813..37c0bc699cd9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -37,7 +37,7 @@ static struct stmmac_tc_entry *tc_find_entry(struct stmmac_priv *priv, entry = &priv->tc_entries[i]; if (!entry->in_use && !first && free) first = entry; - if (entry->handle == loc && !free) + if ((entry->handle == loc) && !free && !entry->is_frag) dup = entry; } From 09ec5bf10749f75e2f0c5dceacf929b6af8c3caa Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Sat, 3 Aug 2019 20:31:39 +0800 Subject: [PATCH 045/101] net: hisilicon: make hip04_tx_reclaim non-reentrant [ Upstream commit 1a2c070ae805910a853b4a14818481ed2e17c727 ] If hip04_tx_reclaim is interrupted while it is running and then __napi_schedule continues to execute hip04_rx_poll->hip04_tx_reclaim, reentrancy occurs and oops is generated. So you need to mask the interrupt during the hip04_tx_reclaim run. The kernel oops exception stack is as follows: Unable to handle kernel NULL pointer dereference at virtual address 00000050 pgd = c0003000 [00000050] *pgd=80000000a04003, *pmd=00000000 Internal error: Oops: 206 [#1] SMP ARM Modules linked in: hip04_eth mtdblock mtd_blkdevs mtd ohci_platform ehci_platform ohci_hcd ehci_hcd vfat fat sd_mod usb_storage scsi_mod usbcore usb_common CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.4.185 #1 Hardware name: Hisilicon A15 task: c0a250e0 task.stack: c0a00000 PC is at hip04_tx_reclaim+0xe0/0x17c [hip04_eth] LR is at hip04_tx_reclaim+0x30/0x17c [hip04_eth] pc : [] lr : [] psr: 600e0313 sp : c0a01d88 ip : 00000000 fp : c0601f9c r10: 00000000 r9 : c3482380 r8 : 00000001 r7 : 00000000 r6 : 000000e1 r5 : c3482000 r4 : 0000000c r3 : f2209800 r2 : 00000000 r1 : 00000000 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 32c5387d Table: 03d28c80 DAC: 55555555 Process swapper/0 (pid: 0, stack limit = 0xc0a00190) Stack: (0xc0a01d88 to 0xc0a02000) [] (hip04_tx_reclaim [hip04_eth]) from [] (hip04_rx_poll+0x88/0x368 [hip04_eth]) [] (hip04_rx_poll [hip04_eth]) from [] (net_rx_action+0x114/0x34c) [] (net_rx_action) from [] (__do_softirq+0x218/0x318) [] (__do_softirq) from [] (irq_exit+0x88/0xac) [] (irq_exit) from [] (msa_irq_exit+0x11c/0x1d4) [] (msa_irq_exit) from [] (__handle_domain_irq+0x110/0x148) [] (__handle_domain_irq) from [] (gic_handle_irq+0xd4/0x118) [] (gic_handle_irq) from [] (__irq_svc+0x40/0x58) Exception stack(0xc0a01f30 to 0xc0a01f78) 1f20: c0ae8b40 00000000 00000000 00000000 1f40: 00000002 ffffe000 c0601f9c 00000000 ffffffff c0a2257c c0a22440 c0831a38 1f60: c0a01ec4 c0a01f80 c0203714 c0203718 600e0213 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0x20/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0x244/0x29c) [] (cpu_startup_entry) from [] (rest_init+0xc8/0x10c) [] (rest_init) from [] (start_kernel+0x468/0x514) Code: a40599e5 016086e2 018088e2 7660efe6 (503090e5) ---[ end trace 1db21d6d09c49d74 ]--- Kernel panic - not syncing: Fatal exception in interrupt CPU3: stopping CPU: 3 PID: 0 Comm: swapper/3 Tainted: G D O 4.4.185 #1 Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 6127697ede12..57c0afa25f9f 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -497,6 +497,9 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) u16 len; u32 err; + /* clean up tx descriptors */ + tx_remaining = hip04_tx_reclaim(ndev, false); + while (cnt && !last) { buf = priv->rx_buf[priv->rx_head]; skb = build_skb(buf, priv->rx_buf_size); @@ -557,8 +560,7 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) } napi_complete_done(napi, rx); done: - /* clean up tx descriptors and start a new timer if necessary */ - tx_remaining = hip04_tx_reclaim(ndev, false); + /* start a new timer if necessary */ if (rx < budget && tx_remaining) hip04_start_tx_timer(priv); From 4ab3052568e5a59a41a00f4110942e366733e164 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Sat, 3 Aug 2019 20:31:40 +0800 Subject: [PATCH 046/101] net: hisilicon: fix hip04-xmit never return TX_BUSY [ Upstream commit f2243b82785942be519016067ee6c55a063bbfe2 ] TX_DESC_NUM is 256, in tx_count, the maximum value of mod(TX_DESC_NUM - 1) is 254, the variable "count" in the hip04_mac_start_xmit function is never equal to (TX_DESC_NUM - 1), so hip04_mac_start_xmit never return NETDEV_TX_BUSY. tx_count is modified to mod(TX_DESC_NUM) so that the maximum value of tx_count can reach (TX_DESC_NUM - 1), then hip04_mac_start_xmit can reurn NETDEV_TX_BUSY. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 57c0afa25f9f..fe3b1637fd5f 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -185,7 +185,7 @@ struct hip04_priv { static inline unsigned int tx_count(unsigned int head, unsigned int tail) { - return (head - tail) % (TX_DESC_NUM - 1); + return (head - tail) % TX_DESC_NUM; } static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex) From e0c030221b1ff10164918c32fb75a310deee5688 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Sat, 3 Aug 2019 20:31:41 +0800 Subject: [PATCH 047/101] net: hisilicon: Fix dma_map_single failed on arm64 [ Upstream commit 96a50c0d907ac8f5c3d6b051031a19eb8a2b53e3 ] On the arm64 platform, executing "ifconfig eth0 up" will fail, returning "ifconfig: SIOCSIFFLAGS: Input/output error." ndev->dev is not initialized, dma_map_single->get_dma_ops-> dummy_dma_ops->__dummy_map_page will return DMA_ERROR_CODE directly, so when we use dma_map_single, the first parameter is to use the device of platform_device. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hip04_eth.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index fe3b1637fd5f..a91d49dd92ea 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -157,6 +157,7 @@ struct hip04_priv { unsigned int reg_inten; struct napi_struct napi; + struct device *dev; struct net_device *ndev; struct tx_desc *tx_desc; @@ -387,7 +388,7 @@ static int hip04_tx_reclaim(struct net_device *ndev, bool force) } if (priv->tx_phys[tx_tail]) { - dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail], + dma_unmap_single(priv->dev, priv->tx_phys[tx_tail], priv->tx_skb[tx_tail]->len, DMA_TO_DEVICE); priv->tx_phys[tx_tail] = 0; @@ -437,8 +438,8 @@ static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_BUSY; } - phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(&ndev->dev, phys)) { + phys = dma_map_single(priv->dev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(priv->dev, phys)) { dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -508,7 +509,7 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) goto refill; } - dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head], + dma_unmap_single(priv->dev, priv->rx_phys[priv->rx_head], RX_BUF_SIZE, DMA_FROM_DEVICE); priv->rx_phys[priv->rx_head] = 0; @@ -537,9 +538,9 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) buf = netdev_alloc_frag(priv->rx_buf_size); if (!buf) goto done; - phys = dma_map_single(&ndev->dev, buf, + phys = dma_map_single(priv->dev, buf, RX_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, phys)) + if (dma_mapping_error(priv->dev, phys)) goto done; priv->rx_buf[priv->rx_head] = buf; priv->rx_phys[priv->rx_head] = phys; @@ -642,9 +643,9 @@ static int hip04_mac_open(struct net_device *ndev) for (i = 0; i < RX_DESC_NUM; i++) { dma_addr_t phys; - phys = dma_map_single(&ndev->dev, priv->rx_buf[i], + phys = dma_map_single(priv->dev, priv->rx_buf[i], RX_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(&ndev->dev, phys)) + if (dma_mapping_error(priv->dev, phys)) return -EIO; priv->rx_phys[i] = phys; @@ -678,7 +679,7 @@ static int hip04_mac_stop(struct net_device *ndev) for (i = 0; i < RX_DESC_NUM; i++) { if (priv->rx_phys[i]) { - dma_unmap_single(&ndev->dev, priv->rx_phys[i], + dma_unmap_single(priv->dev, priv->rx_phys[i], RX_BUF_SIZE, DMA_FROM_DEVICE); priv->rx_phys[i] = 0; } @@ -822,6 +823,7 @@ static int hip04_mac_probe(struct platform_device *pdev) return -ENOMEM; priv = netdev_priv(ndev); + priv->dev = d; priv->ndev = ndev; platform_set_drvdata(pdev, ndev); SET_NETDEV_DEV(ndev, &pdev->dev); From 3b84bbef51c486af18fe93320ba85c065cf40caa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2019 12:20:52 -0600 Subject: [PATCH 048/101] libata: have ata_scsi_rw_xlat() fail invalid passthrough requests [ Upstream commit 2d7271501720038381d45fb3dcbe4831228fc8cc ] For passthrough requests, libata-scsi takes what the user passes in as gospel. This can be problematic if the user fills in the CDB incorrectly. One example of that is in request sizes. For read/write commands, the CDB contains fields describing the transfer length of the request. These should match with the SG_IO header fields, but libata-scsi currently does no validation of that. Check that the number of blocks in the CDB for passthrough requests matches what was mapped into the request. If the CDB asks for more data then the validated SG_IO header fields, error it. Reported-by: Krishna Ram Prakash R Reviewed-by: Kees Cook Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1984fc78c750..3a64fa4aaf7e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1803,6 +1803,21 @@ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc) return 1; } +static bool ata_check_nblocks(struct scsi_cmnd *scmd, u32 n_blocks) +{ + struct request *rq = scmd->request; + u32 req_blocks; + + if (!blk_rq_is_passthrough(rq)) + return true; + + req_blocks = blk_rq_bytes(rq) / scmd->device->sector_size; + if (n_blocks > req_blocks) + return false; + + return true; +} + /** * ata_scsi_rw_xlat - Translate SCSI r/w command into an ATA one * @qc: Storage for translated ATA taskfile @@ -1847,6 +1862,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) scsi_10_lba_len(cdb, &block, &n_block); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; + if (!ata_check_nblocks(scmd, n_block)) + goto invalid_fld; break; case READ_6: case WRITE_6: @@ -1861,6 +1878,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) */ if (!n_block) n_block = 256; + if (!ata_check_nblocks(scmd, n_block)) + goto invalid_fld; break; case READ_16: case WRITE_16: @@ -1871,6 +1890,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) scsi_16_lba_len(cdb, &block, &n_block); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; + if (!ata_check_nblocks(scmd, n_block)) + goto invalid_fld; break; default: DPRINTK("no-byte command\n"); From 3ca013cd63be479be95ea953d08295f05f550c19 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2019 12:23:57 -0600 Subject: [PATCH 049/101] libata: add SG safety checks in SFF pio transfers [ Upstream commit 752ead44491e8c91e14d7079625c5916b30921c5 ] Abort processing of a command if we run out of mapped data in the SG list. This should never happen, but a previous bug caused it to be possible. Play it safe and attempt to abort nicely if we don't have more SG segments left. Reviewed-by: Kees Cook Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-sff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index c5ea0fc635e5..873cc0906055 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -674,6 +674,10 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned int offset; unsigned char *buf; + if (!qc->cursg) { + qc->curbytes = qc->nbytes; + return; + } if (qc->curbytes == qc->nbytes - qc->sect_size) ap->hsm_task_state = HSM_ST_LAST; @@ -699,6 +703,8 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) if (qc->cursg_ofs == qc->cursg->length) { qc->cursg = sg_next(qc->cursg); + if (!qc->cursg) + ap->hsm_task_state = HSM_ST_LAST; qc->cursg_ofs = 0; } } From 923de016dc8842710e76311167957b1d2dbc60a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Wed, 7 Aug 2019 23:27:17 -0400 Subject: [PATCH 050/101] x86/lib/cpu: Address missing prototypes warning [ Upstream commit 04f5bda84b0712d6f172556a7e8dca9ded5e73b9 ] When building with W=1, warnings about missing prototypes are emitted: CC arch/x86/lib/cpu.o arch/x86/lib/cpu.c:5:14: warning: no previous prototype for 'x86_family' [-Wmissing-prototypes] 5 | unsigned int x86_family(unsigned int sig) | ^~~~~~~~~~ arch/x86/lib/cpu.c:18:14: warning: no previous prototype for 'x86_model' [-Wmissing-prototypes] 18 | unsigned int x86_model(unsigned int sig) | ^~~~~~~~~ arch/x86/lib/cpu.c:33:14: warning: no previous prototype for 'x86_stepping' [-Wmissing-prototypes] 33 | unsigned int x86_stepping(unsigned int sig) | ^~~~~~~~~~~~ Add the proper include file so the prototypes are there. Signed-off-by: Valdis Kletnieks Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/42513.1565234837@turing-police Signed-off-by: Sasha Levin --- arch/x86/lib/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index 2dd1fe13a37b..19f707992db2 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -1,5 +1,6 @@ #include #include +#include unsigned int x86_family(unsigned int sig) { From fa6f4687805a38af3dd27cd02445daa806736335 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Jun 2019 09:39:59 -0700 Subject: [PATCH 051/101] drm/vmwgfx: fix memory leak when too many retries have occurred [ Upstream commit 6b7c3b86f0b63134b2ab56508921a0853ffa687a ] Currently when too many retries have occurred there is a memory leak on the allocation for reply on the error return path. Fix this by kfree'ing reply before returning. Addresses-Coverity: ("Resource leak") Fixes: a9cd9c044aa9 ("drm/vmwgfx: Add a check to handle host message failure") Signed-off-by: Colin Ian King Reviewed-by: Deepak Rawat Signed-off-by: Deepak Rawat Signed-off-by: Thomas Hellstrom Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index e4e09d47c5c0..59e9d05ab928 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -389,8 +389,10 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, break; } - if (retries == RETRIES) + if (retries == RETRIES) { + kfree(reply); return -EINVAL; + } *msg_len = reply_len; *msg = reply; From 7aa8dfa450b44a42b66be219de4d249e988d1605 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 7 Aug 2019 19:21:11 +0200 Subject: [PATCH 052/101] block, bfq: handle NULL return value by bfq_init_rq() [ Upstream commit fd03177c33b287c6541f4048f1d67b7b45a1abc9 ] As reported in [1], the call bfq_init_rq(rq) may return NULL in case of OOM (in particular, if rq->elv.icq is NULL because memory allocation failed in failed in ioc_create_icq()). This commit handles this circumstance. [1] https://lkml.org/lkml/2019/7/22/824 Cc: Hsin-Yi Wang Cc: Nicolas Boichat Cc: Doug Anderson Reported-by: Guenter Roeck Reported-by: Hsin-Yi Wang Reviewed-by: Guenter Roeck Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index becd793a258c..d8d2ac294b0c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1886,9 +1886,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { struct bfq_queue *bfqq = bfq_init_rq(req); - struct bfq_data *bfqd = bfqq->bfqd; + struct bfq_data *bfqd; struct request *prev, *next_rq; + if (!bfqq) + return; + + bfqd = bfqq->bfqd; + /* Reposition request in its sort_list */ elv_rb_del(&bfqq->sort_list, req); elv_rb_add(&bfqq->sort_list, req); @@ -1930,6 +1935,9 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct bfq_queue *bfqq = bfq_init_rq(rq), *next_bfqq = bfq_init_rq(next); + if (!bfqq) + return; + /* * If next and rq belong to the same bfq_queue and next is older * than rq, then reposition rq in the fifo (by substituting next @@ -4590,12 +4598,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); - if (at_head || blk_rq_is_passthrough(rq)) { + if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else list_add_tail(&rq->queuelist, &bfqd->dispatch); - } else { /* bfqq is assumed to be non null here */ + } else { idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred From e49cfed0a8cb94942f03ff11e7405b2f8368d415 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 2 Aug 2019 16:29:51 +0800 Subject: [PATCH 053/101] perf ftrace: Fix failure to set cpumask when only one cpu is present [ Upstream commit cf30ae726c011e0372fd4c2d588466c8b50a8907 ] The buffer containing the string used to set cpumask is overwritten at the end of the string later in cpu_map__snprint_mask due to not enough memory space, when there is only one cpu. And thus causes the following failure: $ perf ftrace ls failed to reset ftrace $ This patch fixes the calculation of the cpumask string size. Signed-off-by: He Zhe Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Fixes: dc23103278c5 ("perf ftrace: Add support for -a and -C option") Link: http://lkml.kernel.org/r/1564734592-15624-1-git-send-email-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index f42f228e8899..137955197ba8 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -174,7 +174,7 @@ static int set_tracing_cpumask(struct cpu_map *cpumap) int last_cpu; last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); - mask_size = (last_cpu + 3) / 4 + 1; + mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ cpumask = malloc(mask_size); From 06ed429b901877e256e7829e6effb2799014b41e Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 2 Aug 2019 16:29:52 +0800 Subject: [PATCH 054/101] perf cpumap: Fix writing to illegal memory in handling cpumap mask [ Upstream commit 5f5e25f1c7933a6e1673515c0b1d5acd82fea1ed ] cpu_map__snprint_mask() would write to illegal memory pointed by zalloc(0) when there is only one cpu. This patch fixes the calculation and adds sanity check against the input parameters. Signed-off-by: He Zhe Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Fixes: 4400ac8a9a90 ("perf cpumap: Introduce cpu_map__snprint_mask()") Link: http://lkml.kernel.org/r/1564734592-15624-2-git-send-email-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/cpumap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 383674f448fc..f93846edc1e0 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -701,7 +701,10 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) unsigned char *bitmap; int last_cpu = cpu_map__cpu(map, map->nr - 1); - bitmap = zalloc((last_cpu + 7) / 8); + if (buf == NULL) + return 0; + + bitmap = zalloc(last_cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; From 590549487679473835ba8c64454baae7e81d9b71 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 29 Jul 2019 15:27:55 +0800 Subject: [PATCH 055/101] perf pmu-events: Fix missing "cpu_clk_unhalted.core" event [ Upstream commit 8e6e5bea2e34c61291d00cb3f47560341aa84bc3 ] The events defined in pmu-events JSON are parsed and added into perf tool. For fixed counters, we handle the encodings between JSON and perf by using a static array fixed[]. But the fixed[] has missed an important event "cpu_clk_unhalted.core". For example, on the Tremont platform, [root@localhost ~]# perf stat -e cpu_clk_unhalted.core -a event syntax error: 'cpu_clk_unhalted.core' \___ parser error With this patch, the event cpu_clk_unhalted.core can be parsed. [root@localhost perf]# ./perf stat -e cpu_clk_unhalted.core -a -vvv ------------------------------------------------------------ perf_event_attr: type 4 size 112 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ ... Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190729072755.2166-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/pmu-events/jevents.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 68c92bb599ee..6b36b7110669 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -450,6 +450,7 @@ static struct fixed { { "inst_retired.any_p", "event=0xc0" }, { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, { "cpu_clk_unhalted.thread", "event=0x3c" }, + { "cpu_clk_unhalted.core", "event=0x3c" }, { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, { NULL, NULL}, }; From d5cb5b493030a7856906e4efd6b9d695ec8eeea5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Aug 2019 10:34:51 +0100 Subject: [PATCH 056/101] KVM: arm64: Don't write junk to sysregs on reset [ Upstream commit 03fdfb2690099c19160a3f2c5b77db60b3afeded ] At the moment, the way we reset system registers is mildly insane: We write junk to them, call the reset functions, and then check that we have something else in them. The "fun" thing is that this can happen while the guest is running (PSCI, for example). If anything in KVM has to evaluate the state of a system register while junk is in there, bad thing may happen. Let's stop doing that. Instead, we track that we have called a reset function for that register, and assume that the reset function has done something. This requires fixing a couple of sysreg refinition in the trap table. In the end, the very need of this reset check is pretty dubious, as it doesn't check everything (a lot of the sysregs leave outside of the sys_regs[] array). It may well be axed in the near future. Tested-by: Zenghui Yu Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- arch/arm64/kvm/sys_regs.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d112af75680b..6da2bbdb9648 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -626,7 +626,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + __vcpu_sys_reg(vcpu, r->reg) = val; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) @@ -968,13 +968,13 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ - trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ + trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \ { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ - trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ + trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \ { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ - trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ + trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \ { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ - trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } + trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ @@ -1359,7 +1359,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 }, - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, @@ -2072,13 +2072,19 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, } static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num) + const struct sys_reg_desc *table, size_t num, + unsigned long *bmap) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) + if (table[i].reset) { + int reg = table[i].reg; + table[i].reset(vcpu, &table[i]); + if (reg > 0 && reg < NR_SYS_REGS) + set_bit(reg, bmap); + } } /** @@ -2576,18 +2582,16 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { size_t num; const struct sys_reg_desc *table; - - /* Catch someone adding a register without putting in reset entry. */ - memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); + DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num); + reset_sys_reg_descs(vcpu, table, num, bmap); for (num = 1; num < NR_SYS_REGS; num++) { - if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242, + if (WARN(!test_bit(num, bmap), "Didn't reset __vcpu_sys_reg(%zi)\n", num)) break; } From ef61b79017ff626f1ab75afcc37bf1a629449ed1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Aug 2019 10:34:51 +0100 Subject: [PATCH 057/101] KVM: arm: Don't write junk to CP15 registers on reset [ Upstream commit c69509c70aa45a8c4954c88c629a64acf4ee4a36 ] At the moment, the way we reset CP15 registers is mildly insane: We write junk to them, call the reset functions, and then check that we have something else in them. The "fun" thing is that this can happen while the guest is running (PSCI, for example). If anything in KVM has to evaluate the state of a CP15 register while junk is in there, bad thing may happen. Let's stop doing that. Instead, we track that we have called a reset function for that register, and assume that the reset function has done something. In the end, the very need of this reset check is pretty dubious, as it doesn't check everything (a lot of the CP15 reg leave outside of the cp15_regs[] array). It may well be axed in the near future. Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- arch/arm/kvm/coproc.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index fd6cde23bb5d..871fa50a09f1 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -658,13 +658,22 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) } static void reset_coproc_regs(struct kvm_vcpu *vcpu, - const struct coproc_reg *table, size_t num) + const struct coproc_reg *table, size_t num, + unsigned long *bmap) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) + if (table[i].reset) { + int reg = table[i].reg; + table[i].reset(vcpu, &table[i]); + if (reg > 0 && reg < NR_CP15_REGS) { + set_bit(reg, bmap); + if (table[i].is_64bit) + set_bit(reg + 1, bmap); + } + } } static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu) @@ -1439,17 +1448,15 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) { size_t num; const struct coproc_reg *table; - - /* Catch someone adding a register without putting in reset entry. */ - memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15)); + DECLARE_BITMAP(bmap, NR_CP15_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); + reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs), bmap); table = get_target_table(vcpu->arch.target, &num); - reset_coproc_regs(vcpu, table, num); + reset_coproc_regs(vcpu, table, num, bmap); for (num = 1; num < NR_CP15_REGS; num++) - WARN(vcpu_cp15(vcpu, num) == 0x42424242, + WARN(!test_bit(num, bmap), "Didn't reset vcpu_cp15(vcpu, %zi)", num); } From 3c4b283a0deb62e4a4f1b7f93c7a43985f47be2e Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Wed, 7 Aug 2019 13:58:14 +0000 Subject: [PATCH 058/101] selftests: kvm: Adding config fragments [ Upstream commit c096397c78f766db972f923433031f2dec01cae0 ] selftests kvm test cases need pre-required kernel configs for the test to get pass. Signed-off-by: Naresh Kamboju Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/testing/selftests/kvm/config diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config new file mode 100644 index 000000000000..63ed533f73d6 --- /dev/null +++ b/tools/testing/selftests/kvm/config @@ -0,0 +1,3 @@ +CONFIG_KVM=y +CONFIG_KVM_INTEL=y +CONFIG_KVM_AMD=y From 8317fe4a39066fbbee69dca5d848e10c4b40eeb6 Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Fri, 16 Aug 2019 12:00:54 -0700 Subject: [PATCH 059/101] HID: wacom: correct misreported EKR ring values commit fcf887e7caaa813eea821d11bf2b7619a37df37a upstream. The EKR ring claims a range of 0 to 71 but actually reports values 1 to 72. The ring is used in relative mode so this change should not affect users. Signed-off-by: Aaron Armstrong Skomra Fixes: 72b236d60218f ("HID: wacom: Add support for Express Key Remote.") Cc: # v4.3+ Reviewed-by: Ping Cheng Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index e56dc97fe4b6..c46aab6319c4 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1061,7 +1061,7 @@ static int wacom_remote_irq(struct wacom_wac *wacom_wac, size_t len) input_report_key(input, BTN_BASE2, (data[11] & 0x02)); if (data[12] & 0x80) - input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f)); + input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f) - 1); else input_report_abs(input, ABS_WHEEL, 0); From 375c6c72f56cb2468595c365c82af2fa640d6430 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 7 Aug 2019 14:11:55 -0700 Subject: [PATCH 060/101] HID: wacom: Correct distance scale for 2nd-gen Intuos devices commit b72fb1dcd2ea9d29417711cb302cef3006fa8d5a upstream. Distance values reported by 2nd-gen Intuos tablets are on an inverted scale (0 == far, 63 == near). We need to change them over to a normal scale before reporting to userspace or else userspace drivers and applications can get confused. Ref: https://github.com/linuxwacom/input-wacom/issues/98 Fixes: eda01dab53 ("HID: wacom: Add four new Intuos devices") Signed-off-by: Jason Gerecke Cc: # v4.4+ Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index c46aab6319c4..50ef7b6cd195 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -848,6 +848,8 @@ static int wacom_intuos_general(struct wacom_wac *wacom) y >>= 1; distance >>= 1; } + if (features->type == INTUOSHT2) + distance = features->distance_max - distance; input_report_abs(input, ABS_X, x); input_report_abs(input, ABS_Y, y); input_report_abs(input, ABS_DISTANCE, distance); From b608a5a238d52d89e0b60a87400d3166f937c010 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 8 Aug 2019 05:40:04 -0400 Subject: [PATCH 061/101] Revert "dm bufio: fix deadlock with loop device" commit cf3591ef832915892f2499b7e54b51d4c578b28c upstream. Revert the commit bd293d071ffe65e645b4d8104f9d8fe15ea13862. The proper fix has been made available with commit d0a255e795ab ("loop: set PF_MEMALLOC_NOIO for the worker thread"). Note that the fix offered by commit bd293d071ffe doesn't really prevent the deadlock from occuring - if we look at the stacktrace reported by Junxiao Bi, we see that it hangs in bit_wait_io and not on the mutex - i.e. it has already successfully taken the mutex. Changing the mutex from mutex_lock to mutex_trylock won't help with deadlocks that happen afterwards. PID: 474 TASK: ffff8813e11f4600 CPU: 10 COMMAND: "kswapd0" #0 [ffff8813dedfb938] __schedule at ffffffff8173f405 #1 [ffff8813dedfb990] schedule at ffffffff8173fa27 #2 [ffff8813dedfb9b0] schedule_timeout at ffffffff81742fec #3 [ffff8813dedfba60] io_schedule_timeout at ffffffff8173f186 #4 [ffff8813dedfbaa0] bit_wait_io at ffffffff8174034f #5 [ffff8813dedfbac0] __wait_on_bit at ffffffff8173fec8 #6 [ffff8813dedfbb10] out_of_line_wait_on_bit at ffffffff8173ff81 #7 [ffff8813dedfbb90] __make_buffer_clean at ffffffffa038736f [dm_bufio] #8 [ffff8813dedfbbb0] __try_evict_buffer at ffffffffa0387bb8 [dm_bufio] #9 [ffff8813dedfbbd0] dm_bufio_shrink_scan at ffffffffa0387cc3 [dm_bufio] #10 [ffff8813dedfbc40] shrink_slab at ffffffff811a87ce #11 [ffff8813dedfbd30] shrink_zone at ffffffff811ad778 #12 [ffff8813dedfbdc0] kswapd at ffffffff811ae92f #13 [ffff8813dedfbec0] kthread at ffffffff810a8428 #14 [ffff8813dedfbf50] ret_from_fork at ffffffff81745242 Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: bd293d071ffe ("dm bufio: fix deadlock with loop device") Depends-on: d0a255e795ab ("loop: set PF_MEMALLOC_NOIO for the worker thread") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b1d0ae2dbd3d..dc385b70e4c3 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1602,7 +1602,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (!dm_bufio_trylock(c)) + if (sc->gfp_mask & __GFP_FS) + dm_bufio_lock(c); + else if (!dm_bufio_trylock(c)) return SHRINK_STOP; freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); From a8f7703f221347b7dcfa1d4077695782edddbf78 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 14 Aug 2019 10:30:14 -0500 Subject: [PATCH 062/101] clk: socfpga: stratix10: fix rate caclulationg for cnt_clks commit c7ec75ea4d5316518adc87224e3cff47192579e7 upstream. Checking bypass_reg is incorrect for calculating the cnt_clk rates. Instead we should be checking that there is a proper hardware register that holds the clock divider. Cc: stable@vger.kernel.org Signed-off-by: Dinh Nguyen Link: https://lkml.kernel.org/r/20190814153014.12962-1-dinguyen@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/socfpga/clk-periph-s10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/clk-periph-s10.c b/drivers/clk/socfpga/clk-periph-s10.c index 568f59b58ddf..e7c877d354c7 100644 --- a/drivers/clk/socfpga/clk-periph-s10.c +++ b/drivers/clk/socfpga/clk-periph-s10.c @@ -37,7 +37,7 @@ static unsigned long clk_peri_cnt_clk_recalc_rate(struct clk_hw *hwclk, if (socfpgaclk->fixed_div) { div = socfpgaclk->fixed_div; } else { - if (!socfpgaclk->bypass_reg) + if (socfpgaclk->hw.reg) div = ((readl(socfpgaclk->hw.reg) & 0x7ff) + 1); } From 7bed2889cd184df203aab27ab7293b26b61816c6 Mon Sep 17 00:00:00 2001 From: Erqi Chen Date: Wed, 24 Jul 2019 10:26:09 +0800 Subject: [PATCH 063/101] ceph: clear page dirty before invalidate page commit c95f1c5f436badb9bb87e9b30fd573f6b3d59423 upstream. clear_page_dirty_for_io(page) before mapping->a_ops->invalidatepage(). invalidatepage() clears page's private flag, if dirty flag is not cleared, the page may cause BUG_ON failure in ceph_set_page_dirty(). Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/40862 Signed-off-by: Erqi Chen Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9c332a6f6667..476728bdae8c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -913,8 +913,9 @@ static int ceph_writepages_start(struct address_space *mapping, if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - if (ceph_wbc.size_stable || - page_offset(page) >= i_size_read(inode)) + if ((ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) && + clear_page_dirty_for_io(page)) mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); From f2951720629e7af751eccf4d8245d858cffc4d2c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Aug 2019 06:23:38 -0400 Subject: [PATCH 064/101] ceph: don't try fill file_lock on unsuccessful GETFILELOCK reply commit 28a282616f56990547b9dcd5c6fbd2001344664c upstream. When ceph_mdsc_do_request returns an error, we can't assume that the filelock_reply pointer will be set. Only try to fetch fields out of the r_reply_info when it returns success. Cc: stable@vger.kernel.org Reported-by: Hector Martin Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/locks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 9dae2ec7e1fa..6a8f4a99582e 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -111,8 +111,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, req->r_wait_for_completion = ceph_lock_wait_for_completion; err = ceph_mdsc_do_request(mdsc, inode, req); - - if (operation == CEPH_MDS_OP_GETFILELOCK) { + if (!err && operation == CEPH_MDS_OP_GETFILELOCK) { fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid); if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) fl->fl_type = F_RDLCK; From 51f6afddb1475a3debe3feb60610ae0df0346f18 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 20 Aug 2019 16:40:33 +0200 Subject: [PATCH 065/101] libceph: fix PG split vs OSD (re)connect race commit a561372405cf6bc6f14239b3a9e57bb39f2788b0 upstream. We can't rely on ->peer_features in calc_target() because it may be called both when the OSD session is established and open and when it's not. ->peer_features is not valid unless the OSD session is open. If this happens on a PG split (pg_num increase), that could mean we don't resend a request that should have been resent, hanging the client indefinitely. In userspace this was fixed by looking at require_osd_release and get_xinfo[osd].features fields of the osdmap. However these fields belong to the OSD section of the osdmap, which the kernel doesn't decode (only the client section is decoded). Instead, let's drop this feature check. It effectively checks for luminous, so only pre-luminous OSDs would be affected in that on a PG split the kernel might resend a request that should not have been resent. Duplicates can occur in other scenarios, so both sides should already be prepared for them: see dup/replay logic on the OSD side and retry_attempt check on the client side. Cc: stable@vger.kernel.org Fixes: 7de030d6b10a ("libceph: resend on PG splits if OSD has RESEND_ON_SPLIT") Link: https://tracker.ceph.com/issues/41162 Reported-by: Jerry Lee Signed-off-by: Ilya Dryomov Tested-by: Jerry Lee Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 60934bd8796c..76c41a84550e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1423,7 +1423,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, struct ceph_osds up, acting; bool force_resend = false; bool unpaused = false; - bool legacy_change; + bool legacy_change = false; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); bool recovery_deletes = ceph_osdmap_flag(osdc, @@ -1511,15 +1511,14 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, t->osd = acting.primary; } - if (unpaused || legacy_change || force_resend || - (split && con && CEPH_HAVE_FEATURE(con->peer_features, - RESEND_ON_SPLIT))) + if (unpaused || legacy_change || force_resend || split) ct_res = CALC_TARGET_NEED_RESEND; else ct_res = CALC_TARGET_NO_ACTION; out: - dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd); + dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused, + legacy_change, force_resend, split, ct_res, t->osd); return ct_res; } From f88c31b43babfab3bcd85d18aede278455f0776d Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 25 Jul 2019 15:40:01 -0400 Subject: [PATCH 066/101] drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX commit c358ebf59634f06d8ed176da651ec150df3c8686 upstream. While I had thought I had fixed this issue in: commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after ->fini()") It turns out that while I did fix the error messages I was seeing on my P50 when trying to access i2c busses with the GPU in runtime suspend, I accidentally had missed one important detail that was mentioned on the bug report this commit was supposed to fix: that the CPU would only lock up when trying to access i2c busses _on connected devices_ _while the GPU is not in runtime suspend_. Whoops. That definitely explains why I was not able to get my machine to hang with i2c bus interactions until now, as plugging my P50 into it's dock with an HDMI monitor connected allowed me to finally reproduce this locally. Now that I have managed to reproduce this issue properly, it looks like the problem is much simpler then it looks. It turns out that some connected devices, such as MST laptop docks, will actually ACK i2c reads even if no data was actually read: [ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1 [ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000 [ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001 [ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 Because we don't handle the situation of i2c ack without any data, we end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the value of cnt always remains at 0. This finally properly explains how this could result in a CPU hang like the ones observed in the aforementioned commit. So, fix this by retrying transactions if no data is written or received, and give up and fail the transaction if we continue to not write or receive any data after 32 retries. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c index b4e7404fe660..a11637b0f6cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c @@ -40,8 +40,7 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) u8 *ptr = msg->buf; while (remaining) { - u8 cnt = (remaining > 16) ? 16 : remaining; - u8 cmd; + u8 cnt, retries, cmd; if (msg->flags & I2C_M_RD) cmd = 1; @@ -51,10 +50,19 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (mcnt || remaining > 16) cmd |= 4; /* MOT */ - ret = aux->func->xfer(aux, true, cmd, msg->addr, ptr, &cnt); - if (ret < 0) { - nvkm_i2c_aux_release(aux); - return ret; + for (retries = 0, cnt = 0; + retries < 32 && !cnt; + retries++) { + cnt = min_t(u8, remaining, 16); + ret = aux->func->xfer(aux, true, cmd, + msg->addr, ptr, &cnt); + if (ret < 0) + goto out; + } + if (!cnt) { + AUX_TRACE(aux, "no data after 32 retries"); + ret = -EIO; + goto out; } ptr += cnt; @@ -64,8 +72,10 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) msg++; } + ret = num; +out: nvkm_i2c_aux_release(aux); - return num; + return ret; } static u32 From 3783c7ee9920fac998117a9f8ec715693c5ed34b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 6 Aug 2019 13:41:51 +0200 Subject: [PATCH 067/101] gpiolib: never report open-drain/source lines as 'input' to user-space commit 2c60e6b5c9241b24b8b523fefd3e44fb85622cda upstream. If the driver doesn't support open-drain/source config options, we emulate this behavior when setting the direction by calling gpiod_direction_input() if the default value is 0 (open-source) or 1 (open-drain), thus not actively driving the line in those cases. This however clears the FLAG_IS_OUT bit for the GPIO line descriptor and makes the LINEINFO ioctl() incorrectly report this line's mode as 'input' to user-space. This commit modifies the ioctl() to always set the GPIOLINE_FLAG_IS_OUT bit in the lineinfo structure's flags field. Since it's impossible to use the input mode and open-drain/source options at the same time, we can be sure the reported information will be correct. Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information") Cc: stable Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20190806114151.17652-1-brgl@bgdev.pl Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b308ce92685d..53395852f012 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1082,9 +1082,11 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) lineinfo.flags |= GPIOLINE_FLAG_ACTIVE_LOW; if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) - lineinfo.flags |= GPIOLINE_FLAG_OPEN_DRAIN; + lineinfo.flags |= (GPIOLINE_FLAG_OPEN_DRAIN | + GPIOLINE_FLAG_IS_OUT); if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) - lineinfo.flags |= GPIOLINE_FLAG_OPEN_SOURCE; + lineinfo.flags |= (GPIOLINE_FLAG_OPEN_SOURCE | + GPIOLINE_FLAG_IS_OUT); if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) return -EFAULT; From a6f236e1bd97d89d80c07dfe0e0b566044473b5a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 7 May 2019 07:46:55 +0000 Subject: [PATCH 068/101] Drivers: hv: vmbus: Fix virt_to_hvpfn() for X86_PAE commit a9fc4340aee041dd186d1fb8f1b5d1e9caf28212 upstream. In the case of X86_PAE, unsigned long is u32, but the physical address type should be u64. Due to the bug here, the netvsc driver can not load successfully, and sometimes the VM can panic due to memory corruption (the hypervisor writes data to the wrong location). Fixes: 6ba34171bcbd ("Drivers: hv: vmbus: Remove use of slow_virt_to_phys()") Cc: stable@vger.kernel.org Cc: Michael Kelley Reported-and-tested-by: Juliana Rodrigueiro Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 2f164bd74687..fdb0f832fade 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -38,7 +38,7 @@ static unsigned long virt_to_hvpfn(void *addr) { - unsigned long paddr; + phys_addr_t paddr; if (is_vmalloc_addr(addr)) paddr = page_to_phys(vmalloc_to_page(addr)) + From cf13e30c58d4e4d19f773bc77a054d4892e26da9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Aug 2019 17:54:56 -0700 Subject: [PATCH 069/101] userfaultfd_release: always remove uffd flags and clear vm_userfaultfd_ctx commit 46d0b24c5ee10a15dfb25e20642f5a5ed59c5003 upstream. userfaultfd_release() should clear vm_flags/vm_userfaultfd_ctx even if mm->core_state != NULL. Otherwise a page fault can see userfaultfd_missing() == T and use an already freed userfaultfd_ctx. Link: http://lkml.kernel.org/r/20190820160237.GB4983@redhat.com Fixes: 04f5866e41fb ("coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping") Signed-off-by: Oleg Nesterov Reported-by: Kefeng Wang Reviewed-by: Andrea Arcangeli Tested-by: Kefeng Wang Cc: Peter Xu Cc: Mike Rapoport Cc: Jann Horn Cc: Jason Gunthorpe Cc: Michal Hocko Cc: Tetsuo Handa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index e1ebdbe40032..9c2955f67f70 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -881,6 +881,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) /* len == 0 means wake all */ struct userfaultfd_wake_range range = { .len = 0, }; unsigned long new_flags; + bool still_valid; WRITE_ONCE(ctx->released, true); @@ -896,8 +897,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * taking the mmap_sem for writing. */ down_write(&mm->mmap_sem); - if (!mmget_still_valid(mm)) - goto skip_mm; + still_valid = mmget_still_valid(mm); prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { cond_resched(); @@ -908,19 +908,20 @@ static int userfaultfd_release(struct inode *inode, struct file *file) continue; } new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP); - prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, - new_flags, vma->anon_vma, - vma->vm_file, vma->vm_pgoff, - vma_policy(vma), - NULL_VM_UFFD_CTX); - if (prev) - vma = prev; - else - prev = vma; + if (still_valid) { + prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, + new_flags, vma->anon_vma, + vma->vm_file, vma->vm_pgoff, + vma_policy(vma), + NULL_VM_UFFD_CTX); + if (prev) + vma = prev; + else + prev = vma; + } vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } -skip_mm: up_write(&mm->mmap_sem); mmput(mm); wakeup: From f9747104a5c80bbaa5c4ea3204f96cca35ae0dbd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 Aug 2019 14:11:22 -0700 Subject: [PATCH 070/101] x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386 commit b63f20a778c88b6a04458ed6ffc69da953d3a109 upstream. Use 'lea' instead of 'add' when adjusting %rsp in CALL_NOSPEC so as to avoid clobbering flags. KVM's emulator makes indirect calls into a jump table of sorts, where the destination of the CALL_NOSPEC is a small blob of code that performs fast emulation by executing the target instruction with fixed operands. adcb_al_dl: 0x000339f8 <+0>: adc %dl,%al 0x000339fa <+2>: ret A major motiviation for doing fast emulation is to leverage the CPU to handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is both an input and output to the target of CALL_NOSPEC. Clobbering flags results in all sorts of incorrect emulation, e.g. Jcc instructions often take the wrong path. Sans the nops... asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" 0x0003595a <+58>: mov 0xc0(%ebx),%eax 0x00035960 <+64>: mov 0x60(%ebx),%edx 0x00035963 <+67>: mov 0x90(%ebx),%ecx 0x00035969 <+73>: push %edi 0x0003596a <+74>: popf 0x0003596b <+75>: call *%esi 0x000359a0 <+128>: pushf 0x000359a1 <+129>: pop %edi 0x000359a2 <+130>: mov %eax,0xc0(%ebx) 0x000359b1 <+145>: mov %edx,0x60(%ebx) ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); 0x000359a8 <+136>: mov -0x10(%ebp),%eax 0x000359ab <+139>: and $0x8d5,%edi 0x000359b4 <+148>: and $0xfffff72a,%eax 0x000359b9 <+153>: or %eax,%edi 0x000359bd <+157>: mov %edi,0x4(%ebx) For the most part this has gone unnoticed as emulation of guest code that can trigger fast emulation is effectively limited to MMIO when running on modern hardware, and MMIO is rarely, if ever, accessed by instructions that affect or consume flags. Breakage is almost instantaneous when running with unrestricted guest disabled, in which case KVM must emulate all instructions when the guest has invalid state, e.g. when the guest is in Big Real Mode during early BIOS. Fixes: 776b043848fd2 ("x86/retpoline: Add initial retpoline support") Fixes: 1a29b5b7f347a ("KVM: x86: Make indirect calls in emulator speculation safe") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190822211122.27579-1-sean.j.christopherson@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 599c273f5d00..28cb2b31527a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -202,7 +202,7 @@ " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ - "903: addl $4, %%esp;\n" \ + "903: lea 4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n" \ From 685e598e447ed062a3a2dd375e83576bf86c506d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Aug 2019 14:54:07 +0200 Subject: [PATCH 071/101] x86/apic: Handle missing global clockevent gracefully commit f897e60a12f0b9146357780d317879bce2a877dc upstream. Some newer machines do not advertise legacy timers. The kernel can handle that situation if the TSC and the CPU frequency are enumerated by CPUID or MSRs and the CPU supports TSC deadline timer. If the CPU does not support TSC deadline timer the local APIC timer frequency has to be known as well. Some Ryzens machines do not advertize legacy timers, but there is no reliable way to determine the bus frequency which feeds the local APIC timer when the machine allows overclocking of that frequency. As there is no legacy timer the local APIC timer calibration crashes due to a NULL pointer dereference when accessing the not installed global clock event device. Switch the calibration loop to a non interrupt based one, which polls either TSC (if frequency is known) or jiffies. The latter requires a global clockevent. As the machines which do not have a global clockevent installed have a known TSC frequency this is a non issue. For older machines where TSC frequency is not known, there is no known case where the legacy timers do not exist as that would have been reported long ago. Reported-by: Daniel Drake Reported-by: Jiri Slaby Signed-off-by: Thomas Gleixner Tested-by: Daniel Drake Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908091443030.21433@nanos.tec.linutronix.de Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1142926#c12 Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 68 +++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 272a12865b2a..b316bd61a6ac 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -715,7 +715,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; /* - * Temporary interrupt handler. + * Temporary interrupt handler and polled calibration function. */ static void __init lapic_cal_handler(struct clock_event_device *dev) { @@ -799,7 +799,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); - void (*real_handler)(struct clock_event_device *dev); + u64 tsc_perj = 0, tsc_start = 0; + unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; int pm_referenced = 0; @@ -830,29 +831,65 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" "calibrating APIC timer ...\n"); + /* + * There are platforms w/o global clockevent devices. Instead of + * making the calibration conditional on that, use a polling based + * approach everywhere. + */ local_irq_disable(); - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - /* * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ __setup_APIC_LVTT(0xffffffff, 0, 0); - /* Let the interrupts run */ + /* + * Methods to terminate the calibration loop: + * 1) Global clockevent if available (jiffies) + * 2) TSC if available and frequency is known + */ + jif_start = READ_ONCE(jiffies); + + if (tsc_khz) { + tsc_start = rdtsc(); + tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); + } + + /* + * Enable interrupts so the tick can fire, if a global + * clockevent device is available + */ local_irq_enable(); - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { + /* Wait for a tick to elapse */ + while (1) { + if (tsc_khz) { + u64 tsc_now = rdtsc(); + if ((tsc_now - tsc_start) >= tsc_perj) { + tsc_start += tsc_perj; + break; + } + } else { + unsigned long jif_now = READ_ONCE(jiffies); + + if (time_after(jif_now, jif_start)) { + jif_start = jif_now; + break; + } + } + cpu_relax(); + } + + /* Invoke the calibration routine */ + local_irq_disable(); + lapic_cal_handler(NULL); + local_irq_enable(); + } local_irq_disable(); - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - /* Build delta t1-t2 as apic timer counts down */ delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); @@ -904,10 +941,11 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* - * PM timer calibration failed or not turned on - * so lets try APIC timer based calibration + * PM timer calibration failed or not turned on so lets try APIC + * timer based calibration, if a global clockevent device is + * available. */ - if (!pm_referenced) { + if (!pm_referenced && global_clock_event) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); /* From e063b03b451a26acdb840f9080871faf3a00b28d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 19 Aug 2019 15:52:35 +0000 Subject: [PATCH 072/101] x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h commit c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 upstream. There have been reports of RDRAND issues after resuming from suspend on some AMD family 15h and family 16h systems. This issue stems from a BIOS not performing the proper steps during resume to ensure RDRAND continues to function properly. RDRAND support is indicated by CPUID Fn00000001_ECX[30]. This bit can be reset by clearing MSR C001_1004[62]. Any software that checks for RDRAND support using CPUID, including the kernel, will believe that RDRAND is not supported. Update the CPU initialization to clear the RDRAND CPUID bit for any family 15h and 16h processor that supports RDRAND. If it is known that the family 15h or family 16h system does not have an RDRAND resume issue or that the system will not be placed in suspend, the "rdrand=force" kernel parameter can be used to stop the clearing of the RDRAND CPUID bit. Additionally, update the suspend and resume path to save and restore the MSR C001_1004 value to ensure that the RDRAND CPUID setting remains in place after resuming from suspend. Note, that clearing the RDRAND CPUID bit does not prevent a processor that normally supports the RDRAND instruction from executing it. So any code that determined the support based on family and model won't #UD. Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: Andrew Cooper Cc: Andrew Morton Cc: Chen Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: "linux-doc@vger.kernel.org" Cc: "linux-pm@vger.kernel.org" Cc: Nathan Chancellor Cc: Paolo Bonzini Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Cc: Thomas Gleixner Cc: "x86@kernel.org" Link: https://lkml.kernel.org/r/7543af91666f491547bd86cebb1e17c66824ab9f.1566229943.git.thomas.lendacky@amd.com Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 7 ++ arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 66 ++++++++++++++ arch/x86/power/cpu.c | 86 ++++++++++++++++--- 4 files changed, 147 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c96a8e9ad5c2..e8ddf0ef232e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3948,6 +3948,13 @@ Run specified binary instead of /init from the ramdisk, used for early userspace startup. See initrd. + rdrand= [X86] + force - Override the decision by the kernel to hide the + advertisement of RDRAND support (this affects + certain AMD processors because of buggy BIOS + support, specifically around the suspend/resume + path). + rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f85f43db9225..a1d22e4428f6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -334,6 +334,7 @@ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index da1f5e78363e..f86f912ce215 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -799,6 +799,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c) msr_set_bit(MSR_AMD64_DE_CFG, 31); } +static bool rdrand_force; + +static int __init rdrand_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "force")) + rdrand_force = true; + else + return -EINVAL; + + return 0; +} +early_param("rdrand", rdrand_cmdline); + +static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c) +{ + /* + * Saving of the MSR used to hide the RDRAND support during + * suspend/resume is done by arch/x86/power/cpu.c, which is + * dependent on CONFIG_PM_SLEEP. + */ + if (!IS_ENABLED(CONFIG_PM_SLEEP)) + return; + + /* + * The nordrand option can clear X86_FEATURE_RDRAND, so check for + * RDRAND support using the CPUID function directly. + */ + if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force) + return; + + msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62); + + /* + * Verify that the CPUID change has occurred in case the kernel is + * running virtualized and the hypervisor doesn't support the MSR. + */ + if (cpuid_ecx(1) & BIT(30)) { + pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n"); + return; + } + + clear_cpu_cap(c, X86_FEATURE_RDRAND); + pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n"); +} + +static void init_amd_jg(struct cpuinfo_x86 *c) +{ + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -813,6 +871,13 @@ static void init_amd_bd(struct cpuinfo_x86 *c) wrmsrl_safe(MSR_F15H_IC_CFG, value); } } + + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); } static void init_amd_zn(struct cpuinfo_x86 *c) @@ -855,6 +920,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; case 0x17: init_amd_zn(c); break; } diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 513ce09e9950..3aa3149df07f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -24,7 +25,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -398,15 +399,14 @@ static int __init bsp_pm_check_init(void) core_initcall(bsp_pm_check_init); -static int msr_init_context(const u32 *msr_id, const int total_num) +static int msr_build_context(const u32 *msr_id, const int num) { - int i = 0; + struct saved_msrs *saved_msrs = &saved_context.saved_msrs; struct saved_msr *msr_array; + int total_num; + int i, j; - if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) { - pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n"); - return -EINVAL; - } + total_num = saved_msrs->num + num; msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL); if (!msr_array) { @@ -414,19 +414,30 @@ static int msr_init_context(const u32 *msr_id, const int total_num) return -ENOMEM; } - for (i = 0; i < total_num; i++) { - msr_array[i].info.msr_no = msr_id[i]; + if (saved_msrs->array) { + /* + * Multiple callbacks can invoke this function, so copy any + * MSR save requests from previous invocations. + */ + memcpy(msr_array, saved_msrs->array, + sizeof(struct saved_msr) * saved_msrs->num); + + kfree(saved_msrs->array); + } + + for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + msr_array[i].info.msr_no = msr_id[j]; msr_array[i].valid = false; msr_array[i].info.reg.q = 0; } - saved_context.saved_msrs.num = total_num; - saved_context.saved_msrs.array = msr_array; + saved_msrs->num = total_num; + saved_msrs->array = msr_array; return 0; } /* - * The following section is a quirk framework for problematic BIOSen: + * The following sections are a quirk framework for problematic BIOSen: * Sometimes MSRs are modified by the BIOSen after suspended to * RAM, this might cause unexpected behavior after wakeup. * Thus we save/restore these specified MSRs across suspend/resume @@ -441,7 +452,7 @@ static int msr_initialize_bdw(const struct dmi_system_id *d) u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL }; pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident); - return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); + return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); } static const struct dmi_system_id msr_save_dmi_table[] = { @@ -456,9 +467,58 @@ static const struct dmi_system_id msr_save_dmi_table[] = { {} }; +static int msr_save_cpuid_features(const struct x86_cpu_id *c) +{ + u32 cpuid_msr_id[] = { + MSR_AMD64_CPUID_FN_1, + }; + + pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n", + c->family); + + return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id)); +} + +static const struct x86_cpu_id msr_save_cpu_table[] = { + { + .vendor = X86_VENDOR_AMD, + .family = 0x15, + .model = X86_MODEL_ANY, + .feature = X86_FEATURE_ANY, + .driver_data = (kernel_ulong_t)msr_save_cpuid_features, + }, + { + .vendor = X86_VENDOR_AMD, + .family = 0x16, + .model = X86_MODEL_ANY, + .feature = X86_FEATURE_ANY, + .driver_data = (kernel_ulong_t)msr_save_cpuid_features, + }, + {} +}; + +typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *); +static int pm_cpu_check(const struct x86_cpu_id *c) +{ + const struct x86_cpu_id *m; + int ret = 0; + + m = x86_match_cpu(msr_save_cpu_table); + if (m) { + pm_cpu_match_t fn; + + fn = (pm_cpu_match_t)m->driver_data; + ret = fn(m); + } + + return ret; +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); + pm_cpu_check(msr_save_cpu_table); + return 0; } From d955601166f8e738f6c098d924e2f4147632bf4b Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 30 Jul 2019 22:46:27 -0700 Subject: [PATCH 073/101] x86/boot: Save fields explicitly, zero out everything else commit a90118c445cc7f07781de26a9684d4ec58bfcfd1 upstream. Recent gcc compilers (gcc 9.1) generate warnings about an out of bounds memset, if the memset goes accross several fields of a struct. This generated a couple of warnings on x86_64 builds in sanitize_boot_params(). Fix this by explicitly saving the fields in struct boot_params that are intended to be preserved, and zeroing all the rest. [ tglx: Tagged for stable as it breaks the warning free build there as well ] Suggested-by: Thomas Gleixner Suggested-by: H. Peter Anvin Signed-off-by: John Hubbard Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190731054627.5627-2-jhubbard@nvidia.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/bootparam_utils.h | 60 ++++++++++++++++++++------ 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index a07ffd23e4dd..18575047d201 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -18,6 +18,20 @@ * Note: efi_info is commonly left uninitialized, but that field has a * private magic, so it is better to leave it unchanged. */ + +#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); }) + +#define BOOT_PARAM_PRESERVE(struct_member) \ + { \ + .start = offsetof(struct boot_params, struct_member), \ + .len = sizeof_mbr(struct boot_params, struct_member), \ + } + +struct boot_params_to_save { + unsigned int start; + unsigned int len; +}; + static void sanitize_boot_params(struct boot_params *boot_params) { /* @@ -36,19 +50,39 @@ static void sanitize_boot_params(struct boot_params *boot_params) */ if (boot_params->sentinel) { /* fields in boot_params are left uninitialized, clear them */ - memset(&boot_params->ext_ramdisk_image, 0, - (char *)&boot_params->efi_info - - (char *)&boot_params->ext_ramdisk_image); - memset(&boot_params->kbd_status, 0, - (char *)&boot_params->hdr - - (char *)&boot_params->kbd_status); - memset(&boot_params->_pad7[0], 0, - (char *)&boot_params->edd_mbr_sig_buffer[0] - - (char *)&boot_params->_pad7[0]); - memset(&boot_params->_pad8[0], 0, - (char *)&boot_params->eddbuf[0] - - (char *)&boot_params->_pad8[0]); - memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); + static struct boot_params scratch; + char *bp_base = (char *)boot_params; + char *save_base = (char *)&scratch; + int i; + + const struct boot_params_to_save to_save[] = { + BOOT_PARAM_PRESERVE(screen_info), + BOOT_PARAM_PRESERVE(apm_bios_info), + BOOT_PARAM_PRESERVE(tboot_addr), + BOOT_PARAM_PRESERVE(ist_info), + BOOT_PARAM_PRESERVE(hd0_info), + BOOT_PARAM_PRESERVE(hd1_info), + BOOT_PARAM_PRESERVE(sys_desc_table), + BOOT_PARAM_PRESERVE(olpc_ofw_header), + BOOT_PARAM_PRESERVE(efi_info), + BOOT_PARAM_PRESERVE(alt_mem_k), + BOOT_PARAM_PRESERVE(scratch), + BOOT_PARAM_PRESERVE(e820_entries), + BOOT_PARAM_PRESERVE(eddbuf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(e820_table), + BOOT_PARAM_PRESERVE(eddbuf), + }; + + memset(&scratch, 0, sizeof(scratch)); + + for (i = 0; i < ARRAY_SIZE(to_save); i++) { + memcpy(save_base + to_save[i].start, + bp_base + to_save[i].start, to_save[i].len); + } + + memcpy(boot_params, save_base, sizeof(*boot_params)); } } From f7d157f330018da765995e21244d2e68dff20eec Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 21 Aug 2019 12:25:13 -0700 Subject: [PATCH 074/101] x86/boot: Fix boot regression caused by bootparam sanitizing commit 7846f58fba964af7cb8cf77d4d13c33254725211 upstream. commit a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") had two errors: * It preserved boot_params.acpi_rsdp_addr, and * It failed to preserve boot_params.hdr Therefore, zero out acpi_rsdp_addr, and preserve hdr. Fixes: a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") Reported-by: Neil MacLeod Suggested-by: Thomas Gleixner Signed-off-by: John Hubbard Signed-off-by: Thomas Gleixner Tested-by: Neil MacLeod Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190821192513.20126-1-jhubbard@nvidia.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/bootparam_utils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 18575047d201..d3983fdf1012 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -71,6 +71,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(eddbuf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(hdr), BOOT_PARAM_PRESERVE(e820_table), BOOT_PARAM_PRESERVE(eddbuf), }; From e0fb8135de9e29f5594bc29cee5b2ce1f52e5b9e Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Mon, 5 Aug 2019 16:56:03 -0700 Subject: [PATCH 075/101] dm kcopyd: always complete failed jobs commit d1fef41465f0e8cae0693fb184caa6bfafb6cd16 upstream. This patch fixes a problem in dm-kcopyd that may leave jobs in complete queue indefinitely in the event of backing storage failure. This behavior has been observed while running 100% write file fio workload against an XFS volume created on top of a dm-zoned target device. If the underlying storage of dm-zoned goes to offline state under I/O, kcopyd sometimes never issues the end copy callback and dm-zoned reclaim work hangs indefinitely waiting for that completion. This behavior was traced down to the error handling code in process_jobs() function that places the failed job to complete_jobs queue, but doesn't wake up the job handler. In case of backing device failure, all outstanding jobs may end up going to complete_jobs queue via this code path and then stay there forever because there are no more successful I/O jobs to wake up the job handler. This patch adds a wake() call to always wake up kcopyd job wait queue for all I/O jobs that fail before dm_io() gets called for that job. The patch also sets the write error status in all sub jobs that are failed because their master job has failed. Fixes: b73c67c2cbb00 ("dm kcopyd: add sequential write feature") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-kcopyd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 671c24332802..3f694d9061ec 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -548,8 +548,10 @@ static int run_io_job(struct kcopyd_job *job) * no point in continuing. */ if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && - job->master_job->write_err) + job->master_job->write_err) { + job->write_err = job->master_job->write_err; return -EIO; + } io_job_start(job->kc->throttle); @@ -601,6 +603,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, else job->read_err = 1; push(&kc->complete_jobs, job); + wake(kc); break; } From 8114012de6c111188306ca8e0b3ae2510cf51f8c Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 17 Aug 2019 13:32:40 +0800 Subject: [PATCH 076/101] dm btree: fix order of block initialization in btree_split_beneath commit e4f9d6013820d1eba1432d51dd1c5795759aa77f upstream. When btree_split_beneath() splits a node to two new children, it will allocate two blocks: left and right. If right block's allocation failed, the left block will be unlocked and marked dirty. If this happened, the left block'ss content is zero, because it wasn't initialized with the btree struct before the attempot to allocate the right block. Upon return, when flushing the left block to disk, the validator will fail when check this block. Then a BUG_ON is raised. Fix this by completely initializing the left block before allocating and initializing the right block. Fixes: 4dcb8b57df359 ("dm btree: fix leak of bufio-backed block in btree_split_beneath error path") Cc: stable@vger.kernel.org Signed-off-by: ZhangXiaoxu Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 31 ++++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 58b319757b1e..8aae0624a297 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -628,39 +628,40 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) new_parent = shadow_current(s); + pn = dm_block_data(new_parent); + size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? + sizeof(__le64) : s->info->value_type.size; + + /* create & init the left block */ r = new_block(s->info, &left); if (r < 0) return r; + ln = dm_block_data(left); + nr_left = le32_to_cpu(pn->header.nr_entries) / 2; + + ln->header.flags = pn->header.flags; + ln->header.nr_entries = cpu_to_le32(nr_left); + ln->header.max_entries = pn->header.max_entries; + ln->header.value_size = pn->header.value_size; + memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); + memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); + + /* create & init the right block */ r = new_block(s->info, &right); if (r < 0) { unlock_block(s->info, left); return r; } - pn = dm_block_data(new_parent); - ln = dm_block_data(left); rn = dm_block_data(right); - - nr_left = le32_to_cpu(pn->header.nr_entries) / 2; nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; - ln->header.flags = pn->header.flags; - ln->header.nr_entries = cpu_to_le32(nr_left); - ln->header.max_entries = pn->header.max_entries; - ln->header.value_size = pn->header.value_size; - rn->header.flags = pn->header.flags; rn->header.nr_entries = cpu_to_le32(nr_right); rn->header.max_entries = pn->header.max_entries; rn->header.value_size = pn->header.value_size; - - memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); - - size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? - sizeof(__le64) : s->info->value_type.size; - memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), nr_right * size); From 795b0572729bc828710f8783feb432678da87ccb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 10 Aug 2019 12:30:27 -0400 Subject: [PATCH 077/101] dm integrity: fix a crash due to BUG_ON in __journal_read_write() commit 5729b6e5a1bcb0bbc28abe82d749c7392f66d2c7 upstream. Fix a crash that was introduced by the commit 724376a04d1a. The crash is reported here: https://gitlab.com/cryptsetup/cryptsetup/issues/468 When reading from the integrity device, the function dm_integrity_map_continue calls find_journal_node to find out if the location to read is present in the journal. Then, it calculates how many sectors are consecutively stored in the journal. Then, it locks the range with add_new_range and wait_and_add_new_range. The problem is that during wait_and_add_new_range, we hold no locks (we don't hold ic->endio_wait.lock and we don't hold a range lock), so the journal may change arbitrarily while wait_and_add_new_range sleeps. The code then goes to __journal_read_write and hits BUG_ON(journal_entry_get_sector(je) != logical_sector); because the journal has changed. In order to fix this bug, we need to re-check the journal location after wait_and_add_new_range. We restrict the length to one block in order to not complicate the code too much. Fixes: 724376a04d1a ("dm integrity: implement fair range locks") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index dbdcc543832d..2e22d588f056 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1749,7 +1749,22 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map queue_work(ic->wait_wq, &dio->work); return; } + if (journal_read_pos != NOT_FOUND) + dio->range.n_sectors = ic->sectors_per_block; wait_and_add_new_range(ic, &dio->range); + /* + * wait_and_add_new_range drops the spinlock, so the journal + * may have been changed arbitrarily. We need to recheck. + * To simplify the code, we restrict I/O size to just one block. + */ + if (journal_read_pos != NOT_FOUND) { + sector_t next_sector; + unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); + if (unlikely(new_pos != journal_read_pos)) { + remove_range_unlocked(ic, &dio->range); + goto retry; + } + } } spin_unlock_irq(&ic->endio_wait.lock); From 2cff6c87a0dcb83b886b07e32e69f840e5b84cfd Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sun, 18 Aug 2019 19:18:34 -0500 Subject: [PATCH 078/101] dm raid: add missing cleanup in raid_ctr() commit dc1a3e8e0cc6b2293b48c044710e63395aeb4fb4 upstream. If rs_prepare_reshape() fails, no cleanup is executed, leading to leak of the raid_set structure allocated at the beginning of raid_ctr(). To fix this issue, go to the label 'bad' if the error occurs. Fixes: 11e4723206683 ("dm raid: stop keeping raid set frozen altogether") Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c44925e4e481..b78a8a4d061c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3199,7 +3199,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) */ r = rs_prepare_reshape(rs); if (r) - return r; + goto bad; /* Reshaping ain't recovery, so disable recovery */ rs_setup_recovery(rs, MaxSector); From 53e73d1079d7550f607db3d946e863b28573cc52 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Mon, 19 Aug 2019 11:31:21 +0800 Subject: [PATCH 079/101] dm space map metadata: fix missing store of apply_bops() return value commit ae148243d3f0816b37477106c05a2ec7d5f32614 upstream. In commit 6096d91af0b6 ("dm space map metadata: fix occasional leak of a metadata block on resize"), we refactor the commit logic to a new function 'apply_bops'. But when that logic was replaced in out() the return value was not stored. This may lead out() returning a wrong value to the caller. Fixes: 6096d91af0b6 ("dm space map metadata: fix occasional leak of a metadata block on resize") Cc: stable@vger.kernel.org Signed-off-by: ZhangXiaoxu Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-space-map-metadata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index aec449243966..25328582cc48 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -249,7 +249,7 @@ static int out(struct sm_metadata *smm) } if (smm->recursion_count == 1) - apply_bops(smm); + r = apply_bops(smm); smm->recursion_count--; From ded8e524cfa6deb20e499ffcc51079ae3787d30a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 23 Aug 2019 09:54:09 -0400 Subject: [PATCH 080/101] dm table: fix invalid memory accesses with too high sector number commit 1cfd5d3399e87167b7f9157ef99daa0e959f395d upstream. If the sector number is too high, dm_table_find_target() should return a pointer to a zeroed dm_target structure (the caller should test it with dm_target_is_valid). However, for some table sizes, the code in dm_table_find_target() that performs btree lookup will access out of bound memory structures. Fix this bug by testing the sector number at the beginning of dm_table_find_target(). Also, add an "inline" keyword to the function dm_table_get_size() because this is a hot path. Fixes: 512875bd9661 ("dm: table detect io beyond device") Cc: stable@vger.kernel.org Reported-by: Zhang Tao Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 34ab30dd5de9..36275c59e4e7 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1349,7 +1349,7 @@ void dm_table_event(struct dm_table *t) } EXPORT_SYMBOL(dm_table_event); -sector_t dm_table_get_size(struct dm_table *t) +inline sector_t dm_table_get_size(struct dm_table *t) { return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; } @@ -1374,6 +1374,9 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) unsigned int l, n = 0, k = 0; sector_t *node; + if (unlikely(sector >= dm_table_get_size(t))) + return &t->targets[t->num_targets]; + for (l = 0; l < t->depth; l++) { n = get_child(n, k); node = get_node(t, l, n); From 8b7c17bb2753aacbe7a1ca220865f2b8954c5e65 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:09 -0700 Subject: [PATCH 081/101] dm zoned: improve error handling in reclaim commit b234c6d7a703661b5045c5bf569b7c99d2edbf88 upstream. There are several places in reclaim code where errors are not propagated to the main function, dmz_reclaim(). This function is responsible for unlocking zones that might be still locked at the end of any failed reclaim iterations. As the result, some device zones may be left permanently locked for reclaim, degrading target's capability to reclaim zones. This patch fixes these issues as follows - Make sure that dmz_reclaim_buf(), dmz_reclaim_seq_data() and dmz_reclaim_rnd_data() return error codes to the caller. dmz_reclaim() function is renamed to dmz_do_reclaim() to avoid clashing with "struct dmz_reclaim" and is modified to return the error to the caller. dmz_get_zone_for_reclaim() now returns an error instead of NULL pointer and reclaim code checks for that error. Error logging/debug messages are added where necessary. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 4 ++-- drivers/md/dm-zoned-reclaim.c | 28 +++++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 4cdde7a02e94..1b8df136b7f6 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1534,7 +1534,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) struct dm_zone *zone; if (list_empty(&zmd->map_rnd_list)) - return NULL; + return ERR_PTR(-EBUSY); list_for_each_entry(zone, &zmd->map_rnd_list, link) { if (dmz_is_buf(zone)) @@ -1545,7 +1545,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) return dzone; } - return NULL; + return ERR_PTR(-EBUSY); } /* diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index edf4b95eb075..e381354dc136 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -215,7 +215,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -259,7 +259,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -312,7 +312,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -334,7 +334,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone) /* * Find a candidate zone for reclaim and process it. */ -static void dmz_reclaim(struct dmz_reclaim *zrc) +static int dmz_do_reclaim(struct dmz_reclaim *zrc) { struct dmz_metadata *zmd = zrc->metadata; struct dm_zone *dzone; @@ -344,8 +344,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) /* Get a data zone */ dzone = dmz_get_zone_for_reclaim(zmd); - if (!dzone) - return; + if (IS_ERR(dzone)) + return PTR_ERR(dzone); start = jiffies; @@ -391,13 +391,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) out: if (ret) { dmz_unlock_zone_reclaim(dzone); - return; + return ret; } - (void) dmz_flush_metadata(zrc->metadata); + ret = dmz_flush_metadata(zrc->metadata); + if (ret) { + dmz_dev_debug(zrc->dev, + "Metadata flush for zone %u failed, err %d\n", + dmz_id(zmd, rzone), ret); + return ret; + } dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); + return 0; } /* @@ -442,6 +449,7 @@ static void dmz_reclaim_work(struct work_struct *work) struct dmz_metadata *zmd = zrc->metadata; unsigned int nr_rnd, nr_unmap_rnd; unsigned int p_unmap_rnd; + int ret; if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); @@ -471,7 +479,9 @@ static void dmz_reclaim_work(struct work_struct *work) (dmz_target_idle(zrc) ? "Idle" : "Busy"), p_unmap_rnd, nr_unmap_rnd, nr_rnd); - dmz_reclaim(zrc); + ret = dmz_do_reclaim(zrc); + if (ret) + dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); dmz_schedule_reclaim(zrc); } From 4530f2f1a79ab2ff096eb0122655a6a0a51d4c37 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:10 -0700 Subject: [PATCH 082/101] dm zoned: improve error handling in i/o map code commit d7428c50118e739e672656c28d2b26b09375d4e0 upstream. Some errors are ignored in the I/O path during queueing chunks for processing by chunk works. Since at least these errors are transient in nature, it should be possible to retry the failed incoming commands. The fix - Errors that can happen while queueing chunks are carried upwards to the main mapping function and it now returns DM_MAPIO_REQUEUE for any incoming requests that can not be properly queued. Error logging/debug messages are added where needed. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-target.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 85fb2baa8a7f..91beacfc966f 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -513,22 +513,24 @@ static void dmz_flush_work(struct work_struct *work) * Get a chunk work and start it to process a new BIO. * If the BIO chunk has no work yet, create one. */ -static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) +static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) { unsigned int chunk = dmz_bio_chunk(dmz->dev, bio); struct dm_chunk_work *cw; + int ret = 0; mutex_lock(&dmz->chunk_lock); /* Get the BIO chunk work. If one is not active yet, create one */ cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); if (!cw) { - int ret; /* Create a new chunk work */ cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); - if (!cw) + if (unlikely(!cw)) { + ret = -ENOMEM; goto out; + } INIT_WORK(&cw->work, dmz_chunk_work); atomic_set(&cw->refcount, 0); @@ -539,7 +541,6 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw); if (unlikely(ret)) { kfree(cw); - cw = NULL; goto out; } } @@ -547,10 +548,12 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) bio_list_add(&cw->bio_list, bio); dmz_get_chunk_work(cw); + dmz_reclaim_bio_acc(dmz->reclaim); if (queue_work(dmz->chunk_wq, &cw->work)) dmz_get_chunk_work(cw); out: mutex_unlock(&dmz->chunk_lock); + return ret; } /* @@ -564,6 +567,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; unsigned int nr_sectors = bio_sectors(bio); sector_t chunk_sector; + int ret; dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", bio_op(bio), (unsigned long long)sector, nr_sectors, @@ -601,8 +605,14 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector); /* Now ready to handle this BIO */ - dmz_reclaim_bio_acc(dmz->reclaim); - dmz_queue_chunk_work(dmz, bio); + ret = dmz_queue_chunk_work(dmz, bio); + if (ret) { + dmz_dev_debug(dmz->dev, + "BIO op %d, can't process chunk %llu, err %i\n", + bio_op(bio), (u64)dmz_bio_chunk(dmz->dev, bio), + ret); + return DM_MAPIO_REQUEUE; + } return DM_MAPIO_SUBMITTED; } From c14fe4e8fd011c702c8867c8dc685d396fb5f538 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:11 -0700 Subject: [PATCH 083/101] dm zoned: properly handle backing device failure commit 75d66ffb48efb30f2dd42f041ba8b39c5b2bd115 upstream. dm-zoned is observed to lock up or livelock in case of hardware failure or some misconfiguration of the backing zoned device. This patch adds a new dm-zoned target function that checks the status of the backing device. If the request queue of the backing device is found to be in dying state or the SCSI backing device enters offline state, the health check code sets a dm-zoned target flag prompting all further incoming I/O to be rejected. In order to detect backing device failures timely, this new function is called in the request mapping path, at the beginning of every reclaim run and before performing any metadata I/O. The proper way out of this situation is to do dmsetup remove and recreate the target when the problem with the backing device is resolved. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-zoned-metadata.c | 51 +++++++++++++++++++++++++++------- drivers/md/dm-zoned-reclaim.c | 18 ++++++++++-- drivers/md/dm-zoned-target.c | 45 ++++++++++++++++++++++++++++-- drivers/md/dm-zoned.h | 10 +++++++ 4 files changed, 110 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 1b8df136b7f6..00e7a343eacf 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -401,15 +401,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return ERR_PTR(-EIO); + /* Get a new block and a BIO to read it */ mblk = dmz_alloc_mblock(zmd, mblk_no); if (!mblk) - return NULL; + return ERR_PTR(-ENOMEM); bio = bio_alloc(GFP_NOIO, 1); if (!bio) { dmz_free_mblock(zmd, mblk); - return NULL; + return ERR_PTR(-ENOMEM); } spin_lock(&zmd->mblk_lock); @@ -540,8 +543,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, if (!mblk) { /* Cache miss: read the block from disk */ mblk = dmz_get_mblock_slow(zmd, mblk_no); - if (!mblk) - return ERR_PTR(-ENOMEM); + if (IS_ERR(mblk)) + return mblk; } /* Wait for on-going read I/O and check for error */ @@ -569,16 +572,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) /* * Issue a metadata block write BIO. */ -static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, - unsigned int set) +static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, + unsigned int set) { sector_t block = zmd->sb[set].block + mblk->no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) { set_bit(DMZ_META_ERROR, &mblk->state); - return; + return -ENOMEM; } set_bit(DMZ_META_WRITING, &mblk->state); @@ -590,6 +596,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); + + return 0; } /* @@ -601,6 +609,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, struct bio *bio; int ret; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) return -ENOMEM; @@ -658,22 +669,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, { struct dmz_mblock *mblk; struct blk_plug plug; - int ret = 0; + int ret = 0, nr_mblks_submitted = 0; /* Issue writes */ blk_start_plug(&plug); - list_for_each_entry(mblk, write_list, link) - dmz_write_mblock(zmd, mblk, set); + list_for_each_entry(mblk, write_list, link) { + ret = dmz_write_mblock(zmd, mblk, set); + if (ret) + break; + nr_mblks_submitted++; + } blk_finish_plug(&plug); /* Wait for completion */ list_for_each_entry(mblk, write_list, link) { + if (!nr_mblks_submitted) + break; wait_on_bit_io(&mblk->state, DMZ_META_WRITING, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); ret = -EIO; } + nr_mblks_submitted--; } /* Flush drive cache (this will also sync data) */ @@ -735,6 +753,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ dmz_lock_flush(zmd); + if (dmz_bdev_is_dying(zmd->dev)) { + ret = -EIO; + goto out; + } + /* Get dirty blocks */ spin_lock(&zmd->mblk_lock); list_splice_init(&zmd->mblk_dirty_list, &write_list); @@ -1623,6 +1646,10 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu /* Alloate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + dzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } @@ -1720,6 +1747,10 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, /* Alloate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + bzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index e381354dc136..9470b8f77a33 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -37,7 +37,7 @@ enum { /* * Number of seconds of target BIO inactivity to consider the target idle. */ -#define DMZ_IDLE_PERIOD (10UL * HZ) +#define DMZ_IDLE_PERIOD (10UL * HZ) /* * Percentage of unmapped (free) random zones below which reclaim starts @@ -134,6 +134,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, set_bit(DM_KCOPYD_WRITE_SEQ, &flags); while (block < end_block) { + if (dev->flags & DMZ_BDEV_DYING) + return -EIO; + /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, src_zone, &block); if (ret <= 0) @@ -451,6 +454,9 @@ static void dmz_reclaim_work(struct work_struct *work) unsigned int p_unmap_rnd; int ret; + if (dmz_bdev_is_dying(zrc->dev)) + return; + if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); return; @@ -480,8 +486,16 @@ static void dmz_reclaim_work(struct work_struct *work) p_unmap_rnd, nr_unmap_rnd, nr_rnd); ret = dmz_do_reclaim(zrc); - if (ret) + if (ret) { dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); + if (ret == -EIO) + /* + * LLD might be performing some error handling sequence + * at the underlying device. To not interfere, do not + * attempt to schedule the next reclaim run immediately. + */ + return; + } dmz_schedule_reclaim(zrc); } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 91beacfc966f..1030c42add05 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -133,6 +133,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, atomic_inc(&bioctx->ref); generic_make_request(clone); + if (clone->bi_status == BLK_STS_IOERR) + return -EIO; if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) zone->wp_block += nr_blocks; @@ -277,8 +279,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, /* Get the buffer zone. One will be allocated if needed */ bzone = dmz_get_chunk_buffer(zmd, zone); - if (!bzone) - return -ENOSPC; + if (IS_ERR(bzone)) + return PTR_ERR(bzone); if (dmz_is_readonly(bzone)) return -EROFS; @@ -389,6 +391,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, dmz_lock_metadata(zmd); + if (dmz->dev->flags & DMZ_BDEV_DYING) { + ret = -EIO; + goto out; + } + /* * Get the data zone mapping the chunk. There may be no * mapping for read and discard. If a mapping is obtained, @@ -493,6 +500,8 @@ static void dmz_flush_work(struct work_struct *work) /* Flush dirty metadata blocks */ ret = dmz_flush_metadata(dmz->metadata); + if (ret) + dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret); /* Process queued flush requests */ while (1) { @@ -556,6 +565,32 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) return ret; } +/* + * Check the backing device availability. If it's on the way out, + * start failing I/O. Reclaim and metadata components also call this + * function to cleanly abort operation in the event of such failure. + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) +{ + struct gendisk *disk; + + if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { + disk = dmz_dev->bdev->bd_disk; + if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { + dmz_dev_warn(dmz_dev, "Backing device queue dying"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } else if (disk->fops->check_events) { + if (disk->fops->check_events(disk, 0) & + DISK_EVENT_MEDIA_CHANGE) { + dmz_dev_warn(dmz_dev, "Backing device offline"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } + } + } + + return dmz_dev->flags & DMZ_BDEV_DYING; +} + /* * Process a new BIO. */ @@ -569,6 +604,9 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) sector_t chunk_sector; int ret; + if (dmz_bdev_is_dying(dmz->dev)) + return DM_MAPIO_KILL; + dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", bio_op(bio), (unsigned long long)sector, nr_sectors, (unsigned long long)dmz_bio_chunk(dmz->dev, bio), @@ -866,6 +904,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct dmz_target *dmz = ti->private; + if (dmz_bdev_is_dying(dmz->dev)) + return -ENODEV; + *bdev = dmz->dev->bdev; return 0; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index ed8de49c9a08..93a64529f219 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -56,6 +56,8 @@ struct dmz_dev { unsigned int nr_zones; + unsigned int flags; + sector_t zone_nr_sectors; unsigned int zone_nr_sectors_shift; @@ -67,6 +69,9 @@ struct dmz_dev { (dev)->zone_nr_sectors_shift) #define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) +/* Device flags. */ +#define DMZ_BDEV_DYING (1 << 0) + /* * Zone descriptor. */ @@ -245,4 +250,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc); void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); void dmz_schedule_reclaim(struct dmz_reclaim *zrc); +/* + * Functions defined in dm-zoned-target.c + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); + #endif /* DM_ZONED_H */ From 42731deff2ea9629ff655549f5c3cd3908887f8e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 1 Aug 2019 23:53:53 +0000 Subject: [PATCH 084/101] genirq: Properly pair kobject_del() with kobject_add() commit d0ff14fdc987303aeeb7de6f1bd72c3749ae2a9b upstream. If alloc_descs() fails before irq_sysfs_init() has run, free_desc() in the cleanup path will call kobject_del() even though the kobject has not been added with kobject_add(). Fix this by making the call to kobject_del() conditional on whether irq_sysfs_init() has run. This problem surfaced because commit aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") makes kobject_del() stricter about pairing with kobject_add(). If the pairing is incorrrect, a WARNING and backtrace occur in sysfs_remove_group() because there is no parent. [ tglx: Add a comment to the code and make it work with CONFIG_SYSFS=n ] Fixes: ecb3f394c5db ("genirq: Expose interrupt information through sysfs") Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Acked-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1564703564-4116-1-git-send-email-mikelley@microsoft.com Signed-off-by: Greg Kroah-Hartman --- kernel/irq/irqdesc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 8e009cee6517..26814a14013c 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -294,6 +294,18 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) } } +static void irq_sysfs_del(struct irq_desc *desc) +{ + /* + * If irq_sysfs_init() has not yet been invoked (early boot), then + * irq_kobj_base is NULL and the descriptor was never added. + * kobject_del() complains about a object with no parent, so make + * it conditional. + */ + if (irq_kobj_base) + kobject_del(&desc->kobj); +} + static int __init irq_sysfs_init(void) { struct irq_desc *desc; @@ -324,6 +336,7 @@ static struct kobj_type irq_kobj_type = { }; static void irq_sysfs_add(int irq, struct irq_desc *desc) {} +static void irq_sysfs_del(struct irq_desc *desc) {} #endif /* CONFIG_SYSFS */ @@ -437,7 +450,7 @@ static void free_desc(unsigned int irq) * The sysfs entry must be serialized against a concurrent * irq_sysfs_init() as well. */ - kobject_del(&desc->kobj); + irq_sysfs_del(desc); delete_irq_desc(irq); /* From db67ac0316550a4f1b79a931750609be1f206b6c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sat, 24 Aug 2019 17:54:59 -0700 Subject: [PATCH 085/101] mm, page_owner: handle THP splits correctly commit f7da677bc6e72033f0981b9d58b5c5d409fa641e upstream. THP splitting path is missing the split_page_owner() call that split_page() has. As a result, split THP pages are wrongly reported in the page_owner file as order-9 pages. Furthermore when the former head page is freed, the remaining former tail pages are not listed in the page_owner file at all. This patch fixes that by adding the split_page_owner() call into __split_huge_page(). Link: http://lkml.kernel.org/r/20190820131828.22684-2-vbabka@suse.cz Fixes: a9627bc5e34e ("mm/page_owner: introduce split_page_owner and replace manual handling") Reported-by: Kirill A. Shutemov Signed-off-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6fad1864ba03..09ce8528bbdd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -2477,6 +2478,9 @@ static void __split_huge_page(struct page *page, struct list_head *list, } ClearPageCompound(head); + + split_page_owner(head, HPAGE_PMD_ORDER); + /* See comment in __split_huge_page_tail() */ if (PageAnon(head)) { /* Additional pin to radix tree of swap cache */ From b30a2f608e942321efb6b26e5a152555e6bb68c4 Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Sat, 24 Aug 2019 17:55:03 -0700 Subject: [PATCH 086/101] mm/zsmalloc.c: migration can leave pages in ZS_EMPTY indefinitely commit 1a87aa03597efa9641e92875b883c94c7f872ccb upstream. In zs_page_migrate() we call putback_zspage() after we have finished migrating all pages in this zspage. However, the return value is ignored. If a zs_free() races in between zs_page_isolate() and zs_page_migrate(), freeing the last object in the zspage, putback_zspage() will leave the page in ZS_EMPTY for potentially an unbounded amount of time. To fix this, we need to do the same thing as zs_page_putback() does: schedule free_work to occur. To avoid duplicated code, move the sequence to a new putback_zspage_deferred() function which both zs_page_migrate() and zs_page_putback() call. Link: http://lkml.kernel.org/r/20190809181751.219326-1-henryburns@google.com Fixes: 48b4800a1c6a ("zsmalloc: page migration support") Signed-off-by: Henry Burns Reviewed-by: Sergey Senozhatsky Cc: Henry Burns Cc: Minchan Kim Cc: Shakeel Butt Cc: Jonathan Adams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 9da65552e7ca..69d2063bb112 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1882,6 +1882,18 @@ static void dec_zspage_isolation(struct zspage *zspage) zspage->isolated--; } +static void putback_zspage_deferred(struct zs_pool *pool, + struct size_class *class, + struct zspage *zspage) +{ + enum fullness_group fg; + + fg = putback_zspage(class, zspage); + if (fg == ZS_EMPTY) + schedule_work(&pool->free_work); + +} + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -2051,7 +2063,7 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, * the list if @page is final isolated subpage in the zspage. */ if (!is_zspage_isolated(zspage)) - putback_zspage(class, zspage); + putback_zspage_deferred(pool, class, zspage); reset_page(page); put_page(page); @@ -2097,14 +2109,13 @@ static void zs_page_putback(struct page *page) spin_lock(&class->lock); dec_zspage_isolation(zspage); if (!is_zspage_isolated(zspage)) { - fg = putback_zspage(class, zspage); /* * Due to page_lock, we cannot free zspage immediately * so let's defer. */ - if (fg == ZS_EMPTY) - schedule_work(&pool->free_work); + putback_zspage_deferred(pool, class, zspage); } + spin_unlock(&class->lock); } From ed11e60033147c6e94a56ccccf9bcdb1f57722ed Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Sat, 24 Aug 2019 17:55:06 -0700 Subject: [PATCH 087/101] mm/zsmalloc.c: fix race condition in zs_destroy_pool commit 701d678599d0c1623aaf4139c03eea260a75b027 upstream. In zs_destroy_pool() we call flush_work(&pool->free_work). However, we have no guarantee that migration isn't happening in the background at that time. Since migration can't directly free pages, it relies on free_work being scheduled to free the pages. But there's nothing preventing an in-progress migrate from queuing the work *after* zs_unregister_migration() has called flush_work(). Which would mean pages still pointing at the inode when we free it. Since we know at destroy time all objects should be free, no new migrations can come in (since zs_page_isolate() fails for fully-free zspages). This means it is sufficient to track a "# isolated zspages" count by class, and have the destroy logic ensure all such pages have drained before proceeding. Keeping that state under the class spinlock keeps the logic straightforward. In this case a memory leak could lead to an eventual crash if compaction hits the leaked page. This crash would only occur if people are changing their zswap backend at runtime (which eventually starts destruction). Link: http://lkml.kernel.org/r/20190809181751.219326-2-henryburns@google.com Fixes: 48b4800a1c6a ("zsmalloc: page migration support") Signed-off-by: Henry Burns Reviewed-by: Sergey Senozhatsky Cc: Henry Burns Cc: Minchan Kim Cc: Shakeel Butt Cc: Jonathan Adams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 69d2063bb112..c2c4f739da8f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -267,6 +268,10 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct inode *inode; struct work_struct free_work; + /* A wait queue for when migration races with async_free_zspage() */ + struct wait_queue_head migration_wait; + atomic_long_t isolated_pages; + bool destroying; #endif }; @@ -1894,6 +1899,19 @@ static void putback_zspage_deferred(struct zs_pool *pool, } +static inline void zs_pool_dec_isolated(struct zs_pool *pool) +{ + VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); + atomic_long_dec(&pool->isolated_pages); + /* + * There's no possibility of racing, since wait_for_isolated_drain() + * checks the isolated count under &class->lock after enqueuing + * on migration_wait. + */ + if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) + wake_up_all(&pool->migration_wait); +} + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1963,6 +1981,7 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) */ if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { get_zspage_mapping(zspage, &class_idx, &fullness); + atomic_long_inc(&pool->isolated_pages); remove_zspage(class, zspage, fullness); } @@ -2062,8 +2081,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, * Page migration is done so let's putback isolated zspage to * the list if @page is final isolated subpage in the zspage. */ - if (!is_zspage_isolated(zspage)) + if (!is_zspage_isolated(zspage)) { + /* + * We cannot race with zs_destroy_pool() here because we wait + * for isolation to hit zero before we start destroying. + * Also, we ensure that everyone can see pool->destroying before + * we start waiting. + */ putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); + } reset_page(page); put_page(page); @@ -2114,8 +2141,8 @@ static void zs_page_putback(struct page *page) * so let's defer. */ putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); } - spin_unlock(&class->lock); } @@ -2138,8 +2165,36 @@ static int zs_register_migration(struct zs_pool *pool) return 0; } +static bool pool_isolated_are_drained(struct zs_pool *pool) +{ + return atomic_long_read(&pool->isolated_pages) == 0; +} + +/* Function for resolving migration */ +static void wait_for_isolated_drain(struct zs_pool *pool) +{ + + /* + * We're in the process of destroying the pool, so there are no + * active allocations. zs_page_isolate() fails for completely free + * zspages, so we need only wait for the zs_pool's isolated + * count to hit zero. + */ + wait_event(pool->migration_wait, + pool_isolated_are_drained(pool)); +} + static void zs_unregister_migration(struct zs_pool *pool) { + pool->destroying = true; + /* + * We need a memory barrier here to ensure global visibility of + * pool->destroying. Thus pool->isolated pages will either be 0 in which + * case we don't care, or it will be > 0 and pool->destroying will + * ensure that we wake up once isolation hits 0. + */ + smp_mb(); + wait_for_isolated_drain(pool); /* This can block */ flush_work(&pool->free_work); iput(pool->inode); } @@ -2377,6 +2432,8 @@ struct zs_pool *zs_create_pool(const char *name) if (!pool->name) goto err; + init_waitqueue_head(&pool->migration_wait); + if (create_cache(pool)) goto err; From 11f85d4d77afb8f1cb1989f1565b26df21280118 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2019 20:55:54 -0700 Subject: [PATCH 088/101] xfs: fix missing ILOCK unlock when xfs_setattr_nonsize fails due to EDQUOT commit 1fb254aa983bf190cfd685d40c64a480a9bafaee upstream. Benjamin Moody reported to Debian that XFS partially wedges when a chgrp fails on account of being out of disk quota. I ran his reproducer script: # adduser dummy # adduser dummy plugdev # dd if=/dev/zero bs=1M count=100 of=test.img # mkfs.xfs test.img # mount -t xfs -o gquota test.img /mnt # mkdir -p /mnt/dummy # chown -c dummy /mnt/dummy # xfs_quota -xc 'limit -g bsoft=100k bhard=100k plugdev' /mnt (and then as user dummy) $ dd if=/dev/urandom bs=1M count=50 of=/mnt/dummy/foo $ chgrp plugdev /mnt/dummy/foo and saw: ================================================ WARNING: lock held when returning to user space! 5.3.0-rc5 #rc5 Tainted: G W ------------------------------------------------ chgrp/47006 is leaving the kernel with locks still held! 1 lock held by chgrp/47006: #0: 000000006664ea2d (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0xd2/0x290 [xfs] ...which is clearly caused by xfs_setattr_nonsize failing to unlock the ILOCK after the xfs_qm_vop_chown_reserve call fails. Add the missing unlock. Reported-by: benjamin.moody@gmail.com Fixes: 253f4911f297 ("xfs: better xfs_trans_alloc interface") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 74047bd0c1ae..e427ad097e2e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -803,6 +803,7 @@ xfs_setattr_nonsize( out_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); From 17c2b7af71f27ed33ce1ba65596301c57ce73f0d Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 24 Jul 2019 06:34:46 +0000 Subject: [PATCH 089/101] xfs: don't trip over uninitialized buffer on extent read of corrupted inode commit 6958d11f77d45db80f7e22a21a74d4d5f44dc667 upstream. We've had rather rare reports of bmap btree block corruption where the bmap root block has a level count of zero. The root cause of the corruption is so far unknown. We do have verifier checks to detect this form of on-disk corruption, but this doesn't cover a memory corruption variant of the problem. The latter is a reasonable possibility because the root block is part of the inode fork and can reside in-core for some time before inode extents are read. If this occurs, it leads to a system crash such as the following: BUG: unable to handle kernel paging request at ffffffff00000221 PF error: [normal kernel read fault] ... RIP: 0010:xfs_trans_brelse+0xf/0x200 [xfs] ... Call Trace: xfs_iread_extents+0x379/0x540 [xfs] xfs_file_iomap_begin_delay+0x11a/0xb40 [xfs] ? xfs_attr_get+0xd1/0x120 [xfs] ? iomap_write_begin.constprop.40+0x2d0/0x2d0 xfs_file_iomap_begin+0x4c4/0x6d0 [xfs] ? __vfs_getxattr+0x53/0x70 ? iomap_write_begin.constprop.40+0x2d0/0x2d0 iomap_apply+0x63/0x130 ? iomap_write_begin.constprop.40+0x2d0/0x2d0 iomap_file_buffered_write+0x62/0x90 ? iomap_write_begin.constprop.40+0x2d0/0x2d0 xfs_file_buffered_aio_write+0xe4/0x3b0 [xfs] __vfs_write+0x150/0x1b0 vfs_write+0xba/0x1c0 ksys_pwrite64+0x64/0xa0 do_syscall_64+0x5a/0x1d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The crash occurs because xfs_iread_extents() attempts to release an uninitialized buffer pointer as the level == 0 value prevented the buffer from ever being allocated or read. Change the level > 0 assert to an explicit error check in xfs_iread_extents() to avoid crashing the kernel in the event of localized, in-core inode corruption. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_bmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 3a496ffe6551..ab2465bc413a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1178,7 +1178,10 @@ xfs_iread_extents( * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); + if (unlikely(level == 0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); From a9912f346bdc6db395c9a13512a3517eb4ad9bf6 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 24 Jul 2019 06:34:47 +0000 Subject: [PATCH 090/101] xfs: Move fs/xfs/xfs_attr.h to fs/xfs/libxfs/xfs_attr.h commit e2421f0b5ff3ce279573036f5cfcb0ce28b422a9 upstream. This patch moves fs/xfs/xfs_attr.h to fs/xfs/libxfs/xfs_attr.h since xfs_attr.c is in libxfs. We will need these later in xfsprogs. Signed-off-by: Allison Henderson Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- fs/xfs/{ => libxfs}/xfs_attr.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fs/xfs/{ => libxfs}/xfs_attr.h (100%) diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h similarity index 100% rename from fs/xfs/xfs_attr.h rename to fs/xfs/libxfs/xfs_attr.h From b3a248f2307c6d73536851b2b487c538ccf1c106 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 24 Jul 2019 06:34:48 +0000 Subject: [PATCH 091/101] xfs: Add helper function xfs_attr_try_sf_addname commit 4c74a56b9de76bb6b581274b76b52535ad77c2a7 upstream. This patch adds a subroutine xfs_attr_try_sf_addname used by xfs_attr_set. This subrotine will attempt to add the attribute name specified in args in shortform, as well and perform error handling previously done in xfs_attr_set. This patch helps to pre-simplify xfs_attr_set for reviewing purposes and reduce indentation. New function will be added in the next patch. [dgc: moved commit to helper function, too.] Signed-off-by: Allison Henderson Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_attr.c | 53 +++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c6299f82a6e4..c15a1debec90 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -191,6 +191,33 @@ xfs_attr_calc_size( return nblks; } +STATIC int +xfs_attr_try_sf_addname( + struct xfs_inode *dp, + struct xfs_da_args *args) +{ + + struct xfs_mount *mp = dp->i_mount; + int error, error2; + + error = xfs_attr_shortform_addname(args); + if (error == -ENOSPC) + return error; + + /* + * Commit the shortform mods, and we're done. + * NOTE: this is also the error path (EEXIST, etc). + */ + if (!error && (args->flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(args->trans); + + error2 = xfs_trans_commit(args->trans); + return error ? error : error2; +} + int xfs_attr_set( struct xfs_inode *dp, @@ -204,7 +231,7 @@ xfs_attr_set( struct xfs_da_args args; struct xfs_trans_res tres; int rsvd = (flags & ATTR_ROOT) != 0; - int error, err2, local; + int error, local; XFS_STATS_INC(mp, xs_attr_set); @@ -281,30 +308,10 @@ xfs_attr_set( * Try to add the attr to the attribute list in * the inode. */ - error = xfs_attr_shortform_addname(&args); + error = xfs_attr_try_sf_addname(dp, &args); if (error != -ENOSPC) { - /* - * Commit the shortform mods, and we're done. - * NOTE: this is also the error path (EEXIST, etc). - */ - ASSERT(args.trans != NULL); - - /* - * If this is a synchronous mount, make sure that - * the transaction goes to disk before returning - * to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); - - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_trans_ichgtime(args.trans, dp, - XFS_ICHGTIME_CHG); - } - err2 = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); - - return error ? error : err2; + return error; } /* From b21ff6cfcc240e0aee5ac94975dc7f65dfccaf0b Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 24 Jul 2019 06:34:49 +0000 Subject: [PATCH 092/101] xfs: Add attibute set and helper functions commit 2f3cd8091963810d85e6a5dd6ed1247e10e9e6f2 upstream. This patch adds xfs_attr_set_args and xfs_bmap_set_attrforkoff. These sub-routines set the attributes specified in @args. We will use this later for setting parent pointers as a deferred attribute operation. [dgc: remove attr fork init code from xfs_attr_set_args().] [dgc: xfs_attr_try_sf_addname() NULLs args.trans after commit.] [dgc: correct sf add error handling.] Signed-off-by: Allison Henderson Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_attr.c | 151 +++++++++++++++++++++------------------ fs/xfs/libxfs/xfs_attr.h | 1 + fs/xfs/libxfs/xfs_bmap.c | 49 ++++++++----- fs/xfs/libxfs/xfs_bmap.h | 1 + 4 files changed, 115 insertions(+), 87 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c15a1debec90..25431ddba1fa 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -215,9 +215,80 @@ xfs_attr_try_sf_addname( xfs_trans_set_sync(args->trans); error2 = xfs_trans_commit(args->trans); + args->trans = NULL; return error ? error : error2; } +/* + * Set the attribute specified in @args. + */ +int +xfs_attr_set_args( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) +{ + struct xfs_inode *dp = args->dp; + int error; + + /* + * If the attribute list is non-existent or a shortform list, + * upgrade it to a single-leaf-block attribute list. + */ + if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || + (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + dp->i_d.di_anextents == 0)) { + + /* + * Build initial attribute list (if required). + */ + if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) + xfs_attr_shortform_create(args); + + /* + * Try to add the attr to the attribute list in the inode. + */ + error = xfs_attr_try_sf_addname(dp, args); + if (error != -ENOSPC) + return error; + + /* + * It won't fit in the shortform, transform to a leaf block. + * GROT: another possible req'mt for a double-split btree op. + */ + error = xfs_attr_shortform_to_leaf(args, leaf_bp); + if (error) + return error; + + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args->trans, *leaf_bp); + + error = xfs_defer_finish(&args->trans); + if (error) + return error; + + /* + * Commit the leaf transformation. We'll need another + * (linked) transaction to add the new attribute to the + * leaf. + */ + error = xfs_trans_roll_inode(&args->trans, dp); + if (error) + return error; + xfs_trans_bjoin(args->trans, *leaf_bp); + *leaf_bp = NULL; + } + + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) + error = xfs_attr_leaf_addname(args); + else + error = xfs_attr_node_addname(args); + return error; +} + int xfs_attr_set( struct xfs_inode *dp, @@ -282,73 +353,17 @@ xfs_attr_set( error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); - if (error) { - xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_trans_cancel(args.trans); - return error; - } + if (error) + goto out_trans_cancel; xfs_trans_ijoin(args.trans, dp, 0); - - /* - * If the attribute list is non-existent or a shortform list, - * upgrade it to a single-leaf-block attribute list. - */ - if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { - - /* - * Build initial attribute list (if required). - */ - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) - xfs_attr_shortform_create(&args); - - /* - * Try to add the attr to the attribute list in - * the inode. - */ - error = xfs_attr_try_sf_addname(dp, &args); - if (error != -ENOSPC) { - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; - } - - /* - * It won't fit in the shortform, transform to a leaf block. - * GROT: another possible req'mt for a double-split btree op. - */ - error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); - if (error) - goto out; - /* - * Prevent the leaf buffer from being unlocked so that a - * concurrent AIL push cannot grab the half-baked leaf - * buffer and run into problems with the write verifier. - */ - xfs_trans_bhold(args.trans, leaf_bp); - error = xfs_defer_finish(&args.trans); - if (error) - goto out; - - /* - * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf, which - * means that we have to hold & join the leaf buffer here too. - */ - error = xfs_trans_roll_inode(&args.trans, dp); - if (error) - goto out; - xfs_trans_bjoin(args.trans, leaf_bp); - leaf_bp = NULL; - } - - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) - error = xfs_attr_leaf_addname(&args); - else - error = xfs_attr_node_addname(&args); + error = xfs_attr_set_args(&args, &leaf_bp); if (error) - goto out; + goto out_release_leaf; + if (!args.trans) { + /* shortform attribute has already been committed */ + goto out_unlock; + } /* * If this is a synchronous mount, make sure that the @@ -365,17 +380,17 @@ xfs_attr_set( */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans); +out_unlock: xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; -out: +out_release_leaf: if (leaf_bp) xfs_trans_brelse(args.trans, leaf_bp); +out_trans_cancel: if (args.trans) xfs_trans_cancel(args.trans); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; + goto out_unlock; } /* diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 033ff8c478e2..f608ac8f306f 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -140,6 +140,7 @@ int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, unsigned char *value, int *valuelenp, int flags); int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags); +int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp); int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ab2465bc413a..06a7da8dbda5 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1019,6 +1019,34 @@ xfs_bmap_add_attrfork_local( return -EFSCORRUPTED; } +/* Set an inode attr fork off based on the format */ +int +xfs_bmap_set_attrforkoff( + struct xfs_inode *ip, + int size, + int *version) +{ + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_DEV: + ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; + break; + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); + if (!ip->i_d.di_forkoff) + ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; + else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version) + *version = 2; + break; + default: + ASSERT(0); + return -EINVAL; + } + + return 0; +} + /* * Convert inode from non-attributed to attributed. * Must not be in a transaction, ip must not be locked. @@ -1070,26 +1098,9 @@ xfs_bmap_add_attrfork( xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_DEV: - ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; - break; - case XFS_DINODE_FMT_LOCAL: - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); - if (!ip->i_d.di_forkoff) - ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; - else if (mp->m_flags & XFS_MOUNT_ATTR2) - version = 2; - break; - default: - ASSERT(0); - error = -EINVAL; + error = xfs_bmap_set_attrforkoff(ip, size, &version); + if (error) goto trans_cancel; - } - ASSERT(ip->i_afp == NULL); ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); ip->i_afp->if_flags = XFS_IFEXTENTS; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b6e9b639e731..488dc8860fd7 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -183,6 +183,7 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); +int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, struct xfs_owner_info *oinfo, From 83a8e6b2f2e39d4b500ed67e68145751ba9140ed Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 24 Jul 2019 06:34:50 +0000 Subject: [PATCH 093/101] xfs: Add attibute remove and helper functions commit 068f985a9e5ec70fde58d8f679994fdbbd093a36 upstream. This patch adds xfs_attr_remove_args. These sub-routines remove the attributes specified in @args. We will use this later for setting parent pointers as a deferred attribute operation. Signed-off-by: Allison Henderson Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_attr.c | 36 +++++++++++++++++++++++++----------- fs/xfs/libxfs/xfs_attr.h | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 25431ddba1fa..844ed87b1900 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -289,6 +289,30 @@ xfs_attr_set_args( return error; } +/* + * Remove the attribute specified in @args. + */ +int +xfs_attr_remove_args( + struct xfs_da_args *args) +{ + struct xfs_inode *dp = args->dp; + int error; + + if (!xfs_inode_hasattr(dp)) { + error = -ENOATTR; + } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); + error = xfs_attr_shortform_remove(args); + } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + error = xfs_attr_leaf_removename(args); + } else { + error = xfs_attr_node_removename(args); + } + + return error; +} + int xfs_attr_set( struct xfs_inode *dp, @@ -445,17 +469,7 @@ xfs_attr_remove( */ xfs_trans_ijoin(args.trans, dp, 0); - if (!xfs_inode_hasattr(dp)) { - error = -ENOATTR; - } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); - error = xfs_attr_shortform_remove(&args); - } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { - error = xfs_attr_leaf_removename(&args); - } else { - error = xfs_attr_node_removename(&args); - } - + error = xfs_attr_remove_args(&args); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index f608ac8f306f..bdf52a333f3f 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -142,6 +142,7 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags); int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp); int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); +int xfs_attr_remove_args(struct xfs_da_args *args); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); From 655bb2c4ace4ef3b34791b3ca4cc45693f2c0ecd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 24 Jul 2019 06:34:51 +0000 Subject: [PATCH 094/101] xfs: always rejoin held resources during defer roll commit 710d707d2fa9cf4c2aa9def129e71e99513466ea upstream. During testing of xfs/141 on a V4 filesystem, I observed some inconsistent behavior with regards to resources that are held (i.e. remain locked) across a defer roll. The transaction roll always gives the defer roll function a new transaction, even if committing the old transaction fails. However, the defer roll function only rejoins the held resources if the transaction commit succeedied. This means that callers of defer roll have to figure out whether the held resources are attached to the transaction being passed back. Worse yet, if the defer roll was part of a defer finish call, we have a third possibility: the defer finish could pass back a dirty transaction with dirty held resources and an error code. The only sane way to handle all of these scenarios is to require that the code that held the resource either cancel the transaction before unlocking and releasing the resources, or use functions that detach resources from a transaction properly (e.g. xfs_trans_brelse) if they need to drop the reference before committing or cancelling the transaction. In order to make this so, change the defer roll code to join held resources to the new transaction unconditionally and fix all the bhold callers to release the held buffers correctly. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster [mcgrof: fixes kz#204223 ] Signed-off-by: Luis Chamberlain Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_attr.c | 35 ++++++++++++----------------------- fs/xfs/libxfs/xfs_attr.h | 2 +- fs/xfs/libxfs/xfs_defer.c | 14 +++++++++----- fs/xfs/xfs_dquot.c | 17 +++++++++-------- 4 files changed, 31 insertions(+), 37 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 844ed87b1900..6410d3e00ce0 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -224,10 +224,10 @@ xfs_attr_try_sf_addname( */ int xfs_attr_set_args( - struct xfs_da_args *args, - struct xfs_buf **leaf_bp) + struct xfs_da_args *args) { struct xfs_inode *dp = args->dp; + struct xfs_buf *leaf_bp = NULL; int error; /* @@ -255,7 +255,7 @@ xfs_attr_set_args( * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ - error = xfs_attr_shortform_to_leaf(args, leaf_bp); + error = xfs_attr_shortform_to_leaf(args, &leaf_bp); if (error) return error; @@ -263,23 +263,16 @@ xfs_attr_set_args( * Prevent the leaf buffer from being unlocked so that a * concurrent AIL push cannot grab the half-baked leaf * buffer and run into problems with the write verifier. + * Once we're done rolling the transaction we can release + * the hold and add the attr to the leaf. */ - xfs_trans_bhold(args->trans, *leaf_bp); - + xfs_trans_bhold(args->trans, leaf_bp); error = xfs_defer_finish(&args->trans); - if (error) + xfs_trans_bhold_release(args->trans, leaf_bp); + if (error) { + xfs_trans_brelse(args->trans, leaf_bp); return error; - - /* - * Commit the leaf transformation. We'll need another - * (linked) transaction to add the new attribute to the - * leaf. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - return error; - xfs_trans_bjoin(args->trans, *leaf_bp); - *leaf_bp = NULL; + } } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -322,7 +315,6 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; - struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_trans_res tres; int rsvd = (flags & ATTR_ROOT) != 0; @@ -381,9 +373,9 @@ xfs_attr_set( goto out_trans_cancel; xfs_trans_ijoin(args.trans, dp, 0); - error = xfs_attr_set_args(&args, &leaf_bp); + error = xfs_attr_set_args(&args); if (error) - goto out_release_leaf; + goto out_trans_cancel; if (!args.trans) { /* shortform attribute has already been committed */ goto out_unlock; @@ -408,9 +400,6 @@ xfs_attr_set( xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; -out_release_leaf: - if (leaf_bp) - xfs_trans_brelse(args.trans, leaf_bp); out_trans_cancel: if (args.trans) xfs_trans_cancel(args.trans); diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index bdf52a333f3f..cc04ee0aacfb 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -140,7 +140,7 @@ int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, unsigned char *value, int *valuelenp, int flags); int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags); -int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp); +int xfs_attr_set_args(struct xfs_da_args *args); int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); int xfs_attr_remove_args(struct xfs_da_args *args); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index e792b167150a..c52beee31836 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -266,13 +266,15 @@ xfs_defer_trans_roll( trace_xfs_defer_trans_roll(tp, _RET_IP_); - /* Roll the transaction. */ + /* + * Roll the transaction. Rolling always given a new transaction (even + * if committing the old one fails!) to hand back to the caller, so we + * join the held resources to the new transaction so that we always + * return with the held resources joined to @tpp, no matter what + * happened. + */ error = xfs_trans_roll(tpp); tp = *tpp; - if (error) { - trace_xfs_defer_trans_roll_error(tp, error); - return error; - } /* Rejoin the joined inodes. */ for (i = 0; i < ipcount; i++) @@ -284,6 +286,8 @@ xfs_defer_trans_roll( xfs_trans_bhold(tp, bplist[i]); } + if (error) + trace_xfs_defer_trans_roll_error(tp, error); return error; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 87e6dd5326d5..a1af984e4913 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -277,7 +277,8 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) /* * Ensure that the given in-core dquot has a buffer on disk backing it, and - * return the buffer. This is called when the bmapi finds a hole. + * return the buffer locked and held. This is called when the bmapi finds a + * hole. */ STATIC int xfs_dquot_disk_alloc( @@ -355,13 +356,14 @@ xfs_dquot_disk_alloc( * If everything succeeds, the caller of this function is returned a * buffer that is locked and held to the transaction. The caller * is responsible for unlocking any buffer passed back, either - * manually or by committing the transaction. + * manually or by committing the transaction. On error, the buffer is + * released and not passed back. */ xfs_trans_bhold(tp, bp); error = xfs_defer_finish(tpp); - tp = *tpp; if (error) { - xfs_buf_relse(bp); + xfs_trans_bhold_release(*tpp, bp); + xfs_trans_brelse(*tpp, bp); return error; } *bpp = bp; @@ -521,7 +523,6 @@ xfs_qm_dqread_alloc( struct xfs_buf **bpp) { struct xfs_trans *tp; - struct xfs_buf *bp; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc, @@ -529,7 +530,7 @@ xfs_qm_dqread_alloc( if (error) goto err; - error = xfs_dquot_disk_alloc(&tp, dqp, &bp); + error = xfs_dquot_disk_alloc(&tp, dqp, bpp); if (error) goto err_cancel; @@ -539,10 +540,10 @@ xfs_qm_dqread_alloc( * Buffer was held to the transaction, so we have to unlock it * manually here because we're not passing it back. */ - xfs_buf_relse(bp); + xfs_buf_relse(*bpp); + *bpp = NULL; goto err; } - *bpp = bp; return 0; err_cancel: From 0d5e34c1e2633e6256826b8ae2f7fe0d6b3b45d1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Aug 2019 12:58:14 +0300 Subject: [PATCH 095/101] dm zoned: fix potential NULL dereference in dmz_do_reclaim() [ Upstream commit e0702d90b79d430b0ccc276ead4f88440bb51352 ] This function is supposed to return error pointers so it matches the dmz_get_rnd_zone_for_reclaim() function. The current code could lead to a NULL dereference in dmz_do_reclaim() Fixes: b234c6d7a703 ("dm zoned: improve error handling in reclaim") Signed-off-by: Dan Carpenter Reviewed-by: Dmitry Fomichev Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-zoned-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 00e7a343eacf..7e8d7fc99410 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1579,7 +1579,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) struct dm_zone *zone; if (list_empty(&zmd->map_seq_list)) - return NULL; + return ERR_PTR(-EBUSY); list_for_each_entry(zone, &zmd->map_seq_list, link) { if (!zone->bzone) @@ -1588,7 +1588,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) return zone; } - return NULL; + return ERR_PTR(-EBUSY); } /* From 32df8a30b73474403a09b6cc686bc409652a2919 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Wed, 21 Aug 2019 10:19:27 +1000 Subject: [PATCH 096/101] powerpc: Allow flush_(inval_)dcache_range to work across ranges >4GB The upstream commit: 22e9c88d486a ("powerpc/64: reuse PPC32 static inline flush_dcache_range()") has a similar effect, but since it is a rewrite of the assembler to C, is too invasive for stable. This patch is a minimal fix to address the issue in assembler. This patch applies cleanly to v5.2, v4.19 & v4.14. When calling flush_(inval_)dcache_range with a size >4GB, we were masking off the upper 32 bits, so we would incorrectly flush a range smaller than intended. This patch replaces the 32 bit shifts with 64 bit ones, so that the full size is accounted for. Signed-off-by: Alastair D'Silva Acked-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/misc_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 262ba9481781..1bf6aaefd26a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -135,7 +135,7 @@ _GLOBAL_TOC(flush_dcache_range) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 0: dcbst 0,r6 @@ -153,7 +153,7 @@ _GLOBAL(flush_inval_dcache_range) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ sync isync From f28023c4eedcf0467c887af6eaf5e7d5bf0bf1c4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 15:20:41 +0100 Subject: [PATCH 097/101] rxrpc: Fix local endpoint refcounting commit 730c5fd42c1e3652a065448fd235cb9fafb2bd10 upstream. The object lifetime management on the rxrpc_local struct is broken in that the rxrpc_local_processor() function is expected to clean up and remove an object - but it may get requeued by packets coming in on the backing UDP socket once it starts running. This may result in the assertion in rxrpc_local_rcu() firing because the memory has been scheduled for RCU destruction whilst still queued: rxrpc: Assertion failed ------------[ cut here ]------------ kernel BUG at net/rxrpc/local_object.c:468! Note that if the processor comes around before the RCU free function, it will just do nothing because ->dead is true. Fix this by adding a separate refcount to count active users of the endpoint that causes the endpoint to be destroyed when it reaches 0. The original refcount can then be used to refcount objects through the work processor and cause the memory to be rcu freed when that reaches 0. Fixes: 4f95dd78a77e ("rxrpc: Rework local endpoint management") Reported-by: syzbot+1e0edc4b8b7494c28450@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/af_rxrpc.c | 4 +- net/rxrpc/ar-internal.h | 5 ++- net/rxrpc/input.c | 16 ++++++-- net/rxrpc/local_object.c | 86 +++++++++++++++++++++++++--------------- 4 files changed, 72 insertions(+), 39 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d76e5e58905d..7319d3ca30e9 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -195,7 +195,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); - rxrpc_put_local(local); + rxrpc_unuse_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -908,7 +908,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_queue_work(&rxnet->service_conn_reaper); rxrpc_queue_work(&rxnet->client_conn_reaper); - rxrpc_put_local(rx->local); + rxrpc_unuse_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a4c341828b72..dfd9eab77cc8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -258,7 +258,8 @@ struct rxrpc_security { */ struct rxrpc_local { struct rcu_head rcu; - atomic_t usage; + atomic_t active_users; /* Number of users of the local endpoint */ + atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ @@ -998,6 +999,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); void rxrpc_put_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); +void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d591f54cb91f..7965600ee5de 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1106,8 +1106,12 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, { _enter("%p,%p", local, skb); - skb_queue_tail(&local->event_queue, skb); - rxrpc_queue_local(local); + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->event_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } } /* @@ -1117,8 +1121,12 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { CHECK_SLAB_OKAY(&local->usage); - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } } /* diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 10317dbdab5f..2182ebfc7df4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -83,6 +83,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { atomic_set(&local->usage, 1); + atomic_set(&local->active_users, 1); local->rxnet = rxnet; INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); @@ -270,11 +271,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, * bind the transport socket may still fail if we're attempting * to use a local address that the dying object is still using. */ - if (!rxrpc_get_local_maybe(local)) { - cursor = cursor->next; - list_del_init(&local->link); + if (!rxrpc_use_local(local)) break; - } age = "old"; goto found; @@ -288,7 +286,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, if (ret < 0) goto sock_error; - list_add_tail(&local->link, cursor); + if (cursor != &rxnet->local_endpoints) + list_replace(cursor, &local->link); + else + list_add_tail(&local->link, cursor); age = "new"; found: @@ -346,7 +347,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) } /* - * Queue a local endpoint. + * Queue a local endpoint unless it has become unreferenced and pass the + * caller's reference to the work item. */ void rxrpc_queue_local(struct rxrpc_local *local) { @@ -355,15 +357,8 @@ void rxrpc_queue_local(struct rxrpc_local *local) if (rxrpc_queue_work(&local->processor)) trace_rxrpc_local(local, rxrpc_local_queued, atomic_read(&local->usage), here); -} - -/* - * A local endpoint reached its end of life. - */ -static void __rxrpc_put_local(struct rxrpc_local *local) -{ - _enter("%d", local->debug_id); - rxrpc_queue_work(&local->processor); + else + rxrpc_put_local(local); } /* @@ -379,10 +374,45 @@ void rxrpc_put_local(struct rxrpc_local *local) trace_rxrpc_local(local, rxrpc_local_put, n, here); if (n == 0) - __rxrpc_put_local(local); + call_rcu(&local->rcu, rxrpc_local_rcu); } } +/* + * Start using a local endpoint. + */ +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) +{ + unsigned int au; + + local = rxrpc_get_local_maybe(local); + if (!local) + return NULL; + + au = atomic_fetch_add_unless(&local->active_users, 1, 0); + if (au == 0) { + rxrpc_put_local(local); + return NULL; + } + + return local; +} + +/* + * Cease using a local endpoint. Once the number of active users reaches 0, we + * start the closure of the transport in the work processor. + */ +void rxrpc_unuse_local(struct rxrpc_local *local) +{ + unsigned int au; + + au = atomic_dec_return(&local->active_users); + if (au == 0) + rxrpc_queue_local(local); + else + rxrpc_put_local(local); +} + /* * Destroy a local endpoint's socket and then hand the record to RCU to dispose * of. @@ -397,16 +427,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) _enter("%d", local->debug_id); - /* We can get a race between an incoming call packet queueing the - * processor again and the work processor starting the destruction - * process which will shut down the UDP socket. - */ - if (local->dead) { - _leave(" [already dead]"); - return; - } - local->dead = true; - mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); mutex_unlock(&rxnet->local_mutex); @@ -426,13 +446,11 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) */ rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); - - _debug("rcu local %d", local->debug_id); - call_rcu(&local->rcu, rxrpc_local_rcu); } /* - * Process events on an endpoint + * Process events on an endpoint. The work item carries a ref which + * we must release. */ static void rxrpc_local_processor(struct work_struct *work) { @@ -445,8 +463,10 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->usage) == 0) - return rxrpc_local_destroyer(local); + if (atomic_read(&local->active_users) == 0) { + rxrpc_local_destroyer(local); + break; + } if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); @@ -458,6 +478,8 @@ static void rxrpc_local_processor(struct work_struct *work) again = true; } } while (again); + + rxrpc_put_local(local); } /* From a05354cbb82248469f907712587992c52fd1c254 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Aug 2019 22:26:36 +0100 Subject: [PATCH 098/101] rxrpc: Fix read-after-free in rxrpc_queue_local() commit 06d9532fa6b34f12a6d75711162d47c17c1add72 upstream. rxrpc_queue_local() attempts to queue the local endpoint it is given and then, if successful, prints a trace line. The trace line includes the current usage count - but we're not allowed to look at the local endpoint at this point as we passed our ref on it to the workqueue. Fix this by reading the usage count before queuing the work item. Also fix the reading of local->debug_id for trace lines, which must be done with the same consideration as reading the usage count. Fixes: 09d2bf595db4 ("rxrpc: Add a tracepoint to track rxrpc_local refcounting") Reported-by: syzbot+78e71c5bab4f76a6a719@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/local_object.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 147546e0c11b..815dcfa64743 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -500,10 +500,10 @@ rxrpc_tx_points; #define E_(a, b) { a, b } TRACE_EVENT(rxrpc_local, - TP_PROTO(struct rxrpc_local *local, enum rxrpc_local_trace op, + TP_PROTO(unsigned int local_debug_id, enum rxrpc_local_trace op, int usage, const void *where), - TP_ARGS(local, op, usage, where), + TP_ARGS(local_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, local ) @@ -513,7 +513,7 @@ TRACE_EVENT(rxrpc_local, ), TP_fast_assign( - __entry->local = local->debug_id; + __entry->local = local_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 2182ebfc7df4..34ec96e5898e 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -97,7 +97,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; - trace_rxrpc_local(local, rxrpc_local_new, 1, NULL); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL); } _leave(" = %p", local); @@ -325,7 +325,7 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) int n; n = atomic_inc_return(&local->usage); - trace_rxrpc_local(local, rxrpc_local_got, n, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); return local; } @@ -339,7 +339,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) if (local) { int n = atomic_fetch_add_unless(&local->usage, 1, 0); if (n > 0) - trace_rxrpc_local(local, rxrpc_local_got, n + 1, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, + n + 1, here); else local = NULL; } @@ -347,16 +348,16 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) } /* - * Queue a local endpoint unless it has become unreferenced and pass the - * caller's reference to the work item. + * Queue a local endpoint and pass the caller's reference to the work item. */ void rxrpc_queue_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id = local->debug_id; + int n = atomic_read(&local->usage); if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(local, rxrpc_local_queued, - atomic_read(&local->usage), here); + trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); else rxrpc_put_local(local); } @@ -371,7 +372,7 @@ void rxrpc_put_local(struct rxrpc_local *local) if (local) { n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local, rxrpc_local_put, n, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -458,7 +459,7 @@ static void rxrpc_local_processor(struct work_struct *work) container_of(work, struct rxrpc_local, processor); bool again; - trace_rxrpc_local(local, rxrpc_local_processing, + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, atomic_read(&local->usage), NULL); do { From ce3f9e194d25812263c1bacac8c6bcd1f99df899 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 12 Aug 2019 23:30:06 +0100 Subject: [PATCH 099/101] rxrpc: Fix local endpoint replacement [ Upstream commit b00df840fb4004b7087940ac5f68801562d0d2de ] When a local endpoint (struct rxrpc_local) ceases to be in use by any AF_RXRPC sockets, it starts the process of being destroyed, but this doesn't cause it to be removed from the namespace endpoint list immediately as tearing it down isn't trivial and can't be done in softirq context, so it gets deferred. If a new socket comes along that wants to bind to the same endpoint, a new rxrpc_local object will be allocated and rxrpc_lookup_local() will use list_replace() to substitute the new one for the old. Then, when the dying object gets to rxrpc_local_destroyer(), it is removed unconditionally from whatever list it is on by calling list_del_init(). However, list_replace() doesn't reset the pointers in the replaced list_head and so the list_del_init() will likely corrupt the local endpoints list. Fix this by using list_replace_init() instead. Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Reported-by: syzbot+193e29e9387ea5837f1d@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: Sasha Levin --- net/rxrpc/local_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 34ec96e5898e..27f4bbe85e79 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -287,7 +287,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto sock_error; if (cursor != &rxnet->local_endpoints) - list_replace(cursor, &local->link); + list_replace_init(cursor, &local->link); else list_add_tail(&local->link, cursor); age = "new"; From 6d47174198ac797203b8bf0355a30bb7c6dcf030 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 22:47:47 +0100 Subject: [PATCH 100/101] rxrpc: Fix local refcounting [ Upstream commit 68553f1a6f746bf860bce3eb42d78c26a717d9c0 ] Fix rxrpc_unuse_local() to handle a NULL local pointer as it can be called on an unbound socket on which rx->local is not yet set. The following reproduced (includes omitted): int main(void) { socket(AF_RXRPC, SOCK_DGRAM, AF_INET); return 0; } causes the following oops to occur: BUG: kernel NULL pointer dereference, address: 0000000000000010 ... RIP: 0010:rxrpc_unuse_local+0x8/0x1b ... Call Trace: rxrpc_release+0x2b5/0x338 __sock_release+0x37/0xa1 sock_close+0x14/0x17 __fput+0x115/0x1e9 task_work_run+0x72/0x98 do_exit+0x51b/0xa7a ? __context_tracking_exit+0x4e/0x10e do_group_exit+0xab/0xab __x64_sys_exit_group+0x14/0x17 do_syscall_64+0x89/0x1d4 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: syzbot+20dee719a2e090427b5f@syzkaller.appspotmail.com Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Signed-off-by: David Howells cc: Jeffrey Altman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/local_object.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 27f4bbe85e79..c752ad487067 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -407,11 +407,13 @@ void rxrpc_unuse_local(struct rxrpc_local *local) { unsigned int au; - au = atomic_dec_return(&local->active_users); - if (au == 0) - rxrpc_queue_local(local); - else - rxrpc_put_local(local); + if (local) { + au = atomic_dec_return(&local->active_users); + if (au == 0) + rxrpc_queue_local(local); + else + rxrpc_put_local(local); + } } /* From 97ab07e11fbf55c86c3758e07ab295028bf17f94 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 29 Aug 2019 08:29:00 +0200 Subject: [PATCH 101/101] Linux 4.19.69 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6f164b04d953..677341239449 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 68 +SUBLEVEL = 69 EXTRAVERSION = NAME = "People's Front"