From fd110f42e3679af082c427b7a2eeb942d5af470f Mon Sep 17 00:00:00 2001 From: qizhong cheng Date: Mon, 11 Dec 2023 17:49:23 +0800 Subject: [PATCH 01/33] PCI: mediatek: Clear interrupt status before dispatching handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4e11c29873a8a296a20f99b3e03095e65ebf897d ] We found a failure when using the iperf tool during WiFi performance testing, where some MSIs were received while clearing the interrupt status, and these MSIs cannot be serviced. The interrupt status can be cleared even if the MSI status remains pending. As such, given the edge-triggered interrupt type, its status should be cleared before being dispatched to the handler of the underling device. [kwilczynski: commit log, code comment wording] Link: https://lore.kernel.org/linux-pci/20231211094923.31967-1-jianjun.wang@mediatek.com Fixes: 43e6409db64d ("PCI: mediatek: Add MSI support for MT2712 and MT7622") Signed-off-by: qizhong cheng Signed-off-by: Jianjun Wang Signed-off-by: Krzysztof Wilczyński [bhelgaas: rewrap comment] Signed-off-by: Bjorn Helgaas Reviewed-by: AngeloGioacchino Del Regno Cc: Signed-off-by: Sasha Levin --- drivers/pci/controller/pcie-mediatek.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index 066e9e00de11..0d5be37660aa 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -605,14 +605,20 @@ static void mtk_pcie_intr_handler(struct irq_desc *desc) if (status & MSI_STATUS){ unsigned long imsi_status; + /* + * The interrupt status can be cleared even if the + * MSI status remains pending. As such, given the + * edge-triggered interrupt type, its status should + * be cleared before being dispatched to the + * handler of the underlying device. + */ + writel(MSI_STATUS, port->base + PCIE_INT_STATUS); while ((imsi_status = readl(port->base + PCIE_IMSI_STATUS))) { for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM) { virq = irq_find_mapping(port->inner_domain, bit); generic_handle_irq(virq); } } - /* Clear MSI interrupt status */ - writel(MSI_STATUS, port->base + PCIE_INT_STATUS); } } From cde2b87517dcf1e2aa887ad8a327b022e663dcea Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 30 Jan 2020 22:15:28 -0800 Subject: [PATCH 02/33] include/linux/units.h: add helpers for kelvin to/from Celsius conversion [ Upstream commit 23331e4893614deb555c65cdf115c8a28ed32471 ] Patch series "add header file for kelvin to/from Celsius conversion helpers", v4. There are several helper macros to convert kelvin to/from Celsius in for thermal drivers. These are useful for any other drivers or subsystems, but it's odd to include just for the helpers. This adds a new that provides the equivalent inline functions for any drivers or subsystems, and switches all the users of conversion helpers in to use helpers. This patch (of 12): There are several helper macros to convert kelvin to/from Celsius in for thermal drivers. These are useful for any other drivers or subsystems, but it's odd to include just for the helpers. This adds a new that provides the equivalent inline functions for any drivers or subsystems. It is intended to replace the helpers in . Link: http://lkml.kernel.org/r/1576386975-7941-2-git-send-email-akinobu.mita@gmail.com Signed-off-by: Akinobu Mita Reviewed-by: Andy Shevchenko Cc: Sujith Thomas Cc: Darren Hart Cc: Zhang Rui Cc: Daniel Lezcano Cc: Amit Kucheria Cc: Jean Delvare Cc: Guenter Roeck Cc: Keith Busch Cc: Jens Axboe Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Kalle Valo Cc: Stanislaw Gruszka Cc: Johannes Berg Cc: Emmanuel Grumbach Cc: Luca Coelho Cc: Jonathan Cameron Cc: Hartmut Knaack Cc: Lars-Peter Clausen Cc: Peter Meerwald-Stadler Cc: Andy Shevchenko Cc: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: 3ef79cd14122 ("serial: sc16is7xx: set safe default SPI clock frequency") Signed-off-by: Sasha Levin --- include/linux/units.h | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 include/linux/units.h diff --git a/include/linux/units.h b/include/linux/units.h new file mode 100644 index 000000000000..aaf716364ec3 --- /dev/null +++ b/include/linux/units.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNITS_H +#define _LINUX_UNITS_H + +#include + +#define ABSOLUTE_ZERO_MILLICELSIUS -273150 + +static inline long milli_kelvin_to_millicelsius(long t) +{ + return t + ABSOLUTE_ZERO_MILLICELSIUS; +} + +static inline long millicelsius_to_milli_kelvin(long t) +{ + return t - ABSOLUTE_ZERO_MILLICELSIUS; +} + +#define MILLIDEGREE_PER_DEGREE 1000 +#define MILLIDEGREE_PER_DECIDEGREE 100 + +static inline long kelvin_to_millicelsius(long t) +{ + return milli_kelvin_to_millicelsius(t * MILLIDEGREE_PER_DEGREE); +} + +static inline long millicelsius_to_kelvin(long t) +{ + t = millicelsius_to_milli_kelvin(t); + + return DIV_ROUND_CLOSEST(t, MILLIDEGREE_PER_DEGREE); +} + +static inline long deci_kelvin_to_celsius(long t) +{ + t = milli_kelvin_to_millicelsius(t * MILLIDEGREE_PER_DECIDEGREE); + + return DIV_ROUND_CLOSEST(t, MILLIDEGREE_PER_DEGREE); +} + +static inline long celsius_to_deci_kelvin(long t) +{ + t = millicelsius_to_milli_kelvin(t * MILLIDEGREE_PER_DEGREE); + + return DIV_ROUND_CLOSEST(t, MILLIDEGREE_PER_DECIDEGREE); +} + +/** + * deci_kelvin_to_millicelsius_with_offset - convert Kelvin to Celsius + * @t: temperature value in decidegrees Kelvin + * @offset: difference between Kelvin and Celsius in millidegrees + * + * Return: temperature value in millidegrees Celsius + */ +static inline long deci_kelvin_to_millicelsius_with_offset(long t, long offset) +{ + return t * MILLIDEGREE_PER_DECIDEGREE - offset; +} + +static inline long deci_kelvin_to_millicelsius(long t) +{ + return milli_kelvin_to_millicelsius(t * MILLIDEGREE_PER_DECIDEGREE); +} + +static inline long millicelsius_to_deci_kelvin(long t) +{ + t = millicelsius_to_milli_kelvin(t); + + return DIV_ROUND_CLOSEST(t, MILLIDEGREE_PER_DECIDEGREE); +} + +static inline long kelvin_to_celsius(long t) +{ + return t + DIV_ROUND_CLOSEST(ABSOLUTE_ZERO_MILLICELSIUS, + MILLIDEGREE_PER_DEGREE); +} + +static inline long celsius_to_kelvin(long t) +{ + return t - DIV_ROUND_CLOSEST(ABSOLUTE_ZERO_MILLICELSIUS, + MILLIDEGREE_PER_DEGREE); +} + +#endif /* _LINUX_UNITS_H */ From 0e7f574162e2f15f4b63f9892906f6a9afe4429f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 8 Dec 2020 17:41:42 +0100 Subject: [PATCH 03/33] units: Add Watt units [ Upstream commit 2ee5f8f05949735fa2f4c463a5e13fcb3660c719 ] As there are the temperature units, let's add the Watt macros definition. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki Stable-dep-of: 3ef79cd14122 ("serial: sc16is7xx: set safe default SPI clock frequency") Signed-off-by: Sasha Levin --- include/linux/units.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/units.h b/include/linux/units.h index aaf716364ec3..92c234e71cab 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -4,6 +4,10 @@ #include +#define MILLIWATT_PER_WATT 1000L +#define MICROWATT_PER_MILLIWATT 1000L +#define MICROWATT_PER_WATT 1000000L + #define ABSOLUTE_ZERO_MILLICELSIUS -273150 static inline long milli_kelvin_to_millicelsius(long t) From 08333e4c4f3ffe6f9d916cc111d9f3429a6aa6c2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:57:44 -0700 Subject: [PATCH 04/33] units: change from 'L' to 'UL' [ Upstream commit c9221919a2d2df5741ab074dfec5bdfc6f1e043b ] Patch series "Add Hz macros", v3. There are multiple definitions of the HZ_PER_MHZ or HZ_PER_KHZ in the different drivers. Instead of duplicating this definition again and again, add one in the units.h header to be reused in all the place the redefiniton occurs. At the same time, change the type of the Watts, as they can not be negative. This patch (of 10): The users of the macros are safe to be assigned with an unsigned instead of signed as the variables using them are themselves unsigned. Link: https://lkml.kernel.org/r/20210816114732.1834145-1-daniel.lezcano@linaro.org Link: https://lkml.kernel.org/r/20210816114732.1834145-2-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Cc: Andy Shevchenko Cc: Jonathan Cameron Cc: Christian Eggers Cc: Lukasz Luba Cc: MyungJoo Ham Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Peter Meerwald Cc: Zhang Rui Cc: Guenter Roeck Cc: Miquel Raynal Cc: Maxime Coquelin Cc: "Rafael J. Wysocki" Cc: Daniel Lezcano Cc: Chanwoo Choi Cc: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: 3ef79cd14122 ("serial: sc16is7xx: set safe default SPI clock frequency") Signed-off-by: Sasha Levin --- include/linux/units.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/units.h b/include/linux/units.h index 92c234e71cab..4a23e39acc7b 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -4,9 +4,9 @@ #include -#define MILLIWATT_PER_WATT 1000L -#define MICROWATT_PER_MILLIWATT 1000L -#define MICROWATT_PER_WATT 1000000L +#define MILLIWATT_PER_WATT 1000UL +#define MICROWATT_PER_MILLIWATT 1000UL +#define MICROWATT_PER_WATT 1000000UL #define ABSOLUTE_ZERO_MILLICELSIUS -273150 From fe79b37c6a59c623c9e64e53d5d9a0fffcfe977f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Sep 2021 19:57:48 -0700 Subject: [PATCH 05/33] units: add the HZ macros [ Upstream commit e2c77032fcbe515194107994d12cd72ddb77b022 ] The macros for the unit conversion for frequency are duplicated in different places. Provide these macros in the 'units' header, so they can be reused. Link: https://lkml.kernel.org/r/20210816114732.1834145-3-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Reviewed-by: Christian Eggers Reviewed-by: Andy Shevchenko Cc: Chanwoo Choi Cc: Guenter Roeck Cc: Jonathan Cameron Cc: Jonathan Cameron Cc: Kyungmin Park Cc: Lars-Peter Clausen Cc: Lukasz Luba Cc: Maxime Coquelin Cc: Miquel Raynal Cc: MyungJoo Ham Cc: Peter Meerwald Cc: "Rafael J. Wysocki" Cc: Zhang Rui Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: 3ef79cd14122 ("serial: sc16is7xx: set safe default SPI clock frequency") Signed-off-by: Sasha Levin --- include/linux/units.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/units.h b/include/linux/units.h index 4a23e39acc7b..a0af6d2ef4e5 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -4,6 +4,10 @@ #include +#define HZ_PER_KHZ 1000UL +#define KHZ_PER_MHZ 1000UL +#define HZ_PER_MHZ 1000000UL + #define MILLIWATT_PER_WATT 1000UL #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL From 5733959d6770324020e30dd9313cbeac0aec07ef Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 21 Dec 2023 18:18:10 -0500 Subject: [PATCH 06/33] serial: sc16is7xx: set safe default SPI clock frequency [ Upstream commit 3ef79cd1412236d884ab0c46b4d1921380807b48 ] 15 MHz is supported only by 76x variants. If the SPI clock frequency is not specified, use a safe default clock value of 4 MHz that is supported by all variants. Also use HZ_PER_MHZ macro to improve readability. Fixes: 2c837a8a8f9f ("sc16is7xx: spi interface is added") Cc: Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231221231823.2327894-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sc16is7xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index c65f9194abb0..07898a46fd96 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #define SC16IS7XX_NAME "sc16is7xx" @@ -1408,7 +1409,7 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) spi->bits_per_word = 8; /* only supports mode 0 on SC16IS762 */ spi->mode = spi->mode ? : SPI_MODE_0; - spi->max_speed_hz = spi->max_speed_hz ? : 15000000; + spi->max_speed_hz = spi->max_speed_hz ? : 4 * HZ_PER_MHZ; ret = spi_setup(spi); if (ret) return ret; From b0465a4897047ece1e4275fefc88a66ee41c5379 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 13 Jul 2020 16:43:21 +0200 Subject: [PATCH 07/33] driver core: add device probe log helper [ Upstream commit a787e5400a1ceeb0ef92d71ec43aeb35b1fa1334 ] During probe every time driver gets resource it should usually check for error printk some message if it is not -EPROBE_DEFER and return the error. This pattern is simple but requires adding few lines after any resource acquisition code, as a result it is often omitted or implemented only partially. dev_err_probe helps to replace such code sequences with simple call, so code: if (err != -EPROBE_DEFER) dev_err(dev, ...); return err; becomes: return dev_err_probe(dev, err, ...); Signed-off-by: Andrzej Hajda Reviewed-by: Rafael J. Wysocki Reviewed-by: Mark Brown Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200713144324.23654-2-a.hajda@samsung.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 6d710b769c1f ("serial: sc16is7xx: add check for unsupported SPI modes during probe") Signed-off-by: Sasha Levin --- drivers/base/core.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/device.h | 3 +++ 2 files changed, 45 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 6e380ad9d08a..b66647277d52 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3334,6 +3334,48 @@ define_dev_printk_level(_dev_info, KERN_INFO); #endif +/** + * dev_err_probe - probe error check and log helper + * @dev: the pointer to the struct device + * @err: error value to test + * @fmt: printf-style format string + * @...: arguments as specified in the format string + * + * This helper implements common pattern present in probe functions for error + * checking: print debug or error message depending if the error value is + * -EPROBE_DEFER and propagate error upwards. + * It replaces code sequence: + * if (err != -EPROBE_DEFER) + * dev_err(dev, ...); + * else + * dev_dbg(dev, ...); + * return err; + * with + * return dev_err_probe(dev, err, ...); + * + * Returns @err. + * + */ +int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (err != -EPROBE_DEFER) + dev_err(dev, "error %d: %pV", err, &vaf); + else + dev_dbg(dev, "error %d: %pV", err, &vaf); + + va_end(args); + + return err; +} +EXPORT_SYMBOL_GPL(dev_err_probe); + static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) { return fwnode && !IS_ERR(fwnode->secondary); diff --git a/include/linux/device.h b/include/linux/device.h index bccd367c11de..0714d6e5d500 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1581,6 +1581,9 @@ do { \ WARN_ONCE(condition, "%s %s: " format, \ dev_driver_string(dev), dev_name(dev), ## arg) +extern __printf(3, 4) +int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); + /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) From 93d63ccd20b55ae704d2c89664375e6f98552ee6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 27 Oct 2020 10:57:23 +0100 Subject: [PATCH 08/33] spi: introduce SPI_MODE_X_MASK macro [ Upstream commit 029b42d8519cef70c4fb5fcaccd08f1053ed2bf0 ] Provide a macro to filter all SPI_MODE_0,1,2,3 mode in one run. The latest SPI framework will parse the devicetree in following call sequence: of_register_spi_device() -> of_spi_parse_dt() So, driver do not need to pars the devicetree and will get prepared flags in the probe. On one hand it is good far most drivers. On other hand some drivers need to filter flags provide by SPI framework and apply know to work flags. This drivers may use SPI_MODE_X_MASK to filter MODE flags and set own, known flags: spi->flags &= ~SPI_MODE_X_MASK; spi->flags |= SPI_MODE_0; Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20201027095724.18654-2-o.rempel@pengutronix.de Signed-off-by: Mark Brown Stable-dep-of: 6d710b769c1f ("serial: sc16is7xx: add check for unsupported SPI modes during probe") Signed-off-by: Sasha Levin --- include/linux/spi/spi.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 16158fe097a8..23a232d6db69 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -153,6 +153,7 @@ struct spi_device { #define SPI_MODE_1 (0|SPI_CPHA) #define SPI_MODE_2 (SPI_CPOL|0) #define SPI_MODE_3 (SPI_CPOL|SPI_CPHA) +#define SPI_MODE_X_MASK (SPI_CPOL|SPI_CPHA) #define SPI_CS_HIGH 0x04 /* chipselect active high? */ #define SPI_LSB_FIRST 0x08 /* per-word bits-on-wire */ #define SPI_3WIRE 0x10 /* SI/SO signals shared */ From cccdd04643a7d21d0ca5d8205b15ec0ea1be1c6b Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 21 Dec 2023 18:18:09 -0500 Subject: [PATCH 09/33] serial: sc16is7xx: add check for unsupported SPI modes during probe [ Upstream commit 6d710b769c1f5f0d55c9ad9bb49b7dce009ec103 ] The original comment is confusing because it implies that variants other than the SC16IS762 supports other SPI modes beside SPI_MODE_0. Extract from datasheet: The SC16IS762 differs from the SC16IS752 in that it supports SPI clock speeds up to 15 Mbit/s instead of the 4 Mbit/s supported by the SC16IS752... In all other aspects, the SC16IS762 is functionally and electrically the same as the SC16IS752. The same is also true of the SC16IS760 variant versus the SC16IS740 and SC16IS750 variants. For all variants, only SPI mode 0 is supported. Change comment and abort probing if the specified SPI mode is not SPI_MODE_0. Fixes: 2c837a8a8f9f ("sc16is7xx: spi interface is added") Cc: Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231221231823.2327894-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sc16is7xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 07898a46fd96..800cb94e4b91 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1407,7 +1407,10 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) /* Setup SPI bus */ spi->bits_per_word = 8; - /* only supports mode 0 on SC16IS762 */ + /* For all variants, only mode 0 is supported */ + if ((spi->mode & SPI_MODE_X_MASK) != SPI_MODE_0) + return dev_err_probe(&spi->dev, -EINVAL, "Unsupported SPI mode\n"); + spi->mode = spi->mode ? : SPI_MODE_0; spi->max_speed_hz = spi->max_speed_hz ? : 4 * HZ_PER_MHZ; ret = spi_setup(spi); From 5b6a7f323b533e5ab07e93633ad9644b41b6df42 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 13 Dec 2023 16:16:35 +1100 Subject: [PATCH 10/33] ext4: allow for the last group to be marked as trimmed commit 7c784d624819acbeefb0018bac89e632467cca5a upstream. The ext4 filesystem tracks the trim status of blocks at the group level. When an entire group has been trimmed then it is marked as such and subsequent trim invocations with the same minimum trim size will not be attempted on that group unless it is marked as able to be trimmed again such as when a block is freed. Currently the last group can't be marked as trimmed due to incorrect logic in ext4_last_grp_cluster(). ext4_last_grp_cluster() is supposed to return the zero based index of the last cluster in a group. This is then used by ext4_try_to_trim_range() to determine if the trim operation spans the entire group and as such if the trim status of the group should be recorded. ext4_last_grp_cluster() takes a 0 based group index, thus the valid values for grp are 0..(ext4_get_groups_count - 1). Any group index less than (ext4_get_groups_count - 1) is not the last group and must have EXT4_CLUSTERS_PER_GROUP(sb) clusters. For the last group we need to calculate the number of clusters based on the number of blocks in the group. Finally subtract 1 from the number of clusters as zero based indexing is expected. Rearrange the function slightly to make it clear what we are calculating and returning. Reproducer: // Create file system where the last group has fewer blocks than // blocks per group $ mkfs.ext4 -b 4096 -g 8192 /dev/nvme0n1 8191 $ mount /dev/nvme0n1 /mnt Before Patch: $ fstrim -v /mnt /mnt: 25.9 MiB (27156480 bytes) trimmed // Group not marked as trimmed so second invocation still discards blocks $ fstrim -v /mnt /mnt: 25.9 MiB (27156480 bytes) trimmed After Patch: fstrim -v /mnt /mnt: 25.9 MiB (27156480 bytes) trimmed // Group marked as trimmed so second invocation DOESN'T discard any blocks fstrim -v /mnt /mnt: 0 B (0 bytes) trimmed Fixes: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Cc: # 4.19+ Signed-off-by: Suraj Jitindar Singh Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231213051635.37731-1-surajjs@amazon.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8875fac9f958..0572ae09c729 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5192,11 +5192,16 @@ __acquires(bitlock) static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, ext4_group_t grp) { - if (grp < ext4_get_groups_count(sb)) - return EXT4_CLUSTERS_PER_GROUP(sb) - 1; - return (ext4_blocks_count(EXT4_SB(sb)->s_es) - - ext4_group_first_block_no(sb, grp) - 1) >> - EXT4_CLUSTER_BITS(sb); + unsigned long nr_clusters_in_group; + + if (grp < (ext4_get_groups_count(sb) - 1)) + nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb); + else + nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp)) + >> EXT4_CLUSTER_BITS(sb); + + return nr_clusters_in_group - 1; } static bool ext4_trim_interrupted(void) From 15a67115d487ea5cb8213915a4f75f58adb87cbc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 7 Dec 2023 18:36:57 +0800 Subject: [PATCH 11/33] crypto: api - Disallow identical driver names commit 27016f75f5ed47e2d8e0ca75a8ff1f40bc1a5e27 upstream. Disallow registration of two algorithms with identical driver names. Cc: Reported-by: Ovidiu Panait Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index 346557a3fc0b..a1ceca922841 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -231,6 +231,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) } if (!strcmp(q->cra_driver_name, alg->cra_name) || + !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } From 489506a2a0cbbfc7065d4d18ec6bb9baa3818c62 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Thu, 16 Nov 2023 08:56:09 +0800 Subject: [PATCH 12/33] PM: hibernate: Enforce ordering during image compression/decompression commit 71cd7e80cfde548959952eac7063aeaea1f2e1c6 upstream. An S4 (suspend to disk) test on the LoongArch 3A6000 platform sometimes fails with the following error messaged in the dmesg log: Invalid LZO compressed length That happens because when compressing/decompressing the image, the synchronization between the control thread and the compress/decompress/crc thread is based on a relaxed ordering interface, which is unreliable, and the following situation may occur: CPU 0 CPU 1 save_image_lzo lzo_compress_threadfn atomic_set(&d->stop, 1); atomic_read(&data[thr].stop) data[thr].cmp = data[thr].cmp_len; WRITE data[thr].cmp_len Then CPU0 gets a stale cmp_len and writes it to disk. During resume from S4, wrong cmp_len is loaded. To maintain data consistency between the two threads, use the acquire/release variants of atomic set and read operations. Fixes: 081a9d043c98 ("PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image") Cc: All applicable Signed-off-by: Hongchen Zhang Co-developed-by: Weihao Li Signed-off-by: Weihao Li [ rjw: Subject rewrite and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/swap.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 9db7f2f93fae..a818ad6ade9c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -596,11 +596,11 @@ static int crc32_threadfn(void *data) unsigned i; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -609,7 +609,7 @@ static int crc32_threadfn(void *data) for (i = 0; i < d->run_threads; i++) *d->crc32 = crc32_le(*d->crc32, d->unc[i], *d->unc_len[i]); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -639,12 +639,12 @@ static int lzo_compress_threadfn(void *data) struct cmp_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -653,7 +653,7 @@ static int lzo_compress_threadfn(void *data) d->ret = lzo1x_1_compress(d->unc, d->unc_len, d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -791,7 +791,7 @@ static int save_image_lzo(struct swap_map_handle *handle, data[thr].unc_len = off; - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -799,12 +799,12 @@ static int save_image_lzo(struct swap_map_handle *handle, break; crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -843,7 +843,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } @@ -1124,12 +1124,12 @@ static int lzo_decompress_threadfn(void *data) struct dec_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -1142,7 +1142,7 @@ static int lzo_decompress_threadfn(void *data) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -1330,7 +1330,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); crc->run_threads = 0; } @@ -1366,7 +1366,7 @@ static int load_image_lzo(struct swap_map_handle *handle, pg = 0; } - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -1385,7 +1385,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -1416,7 +1416,7 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = snapshot_write_next(snapshot); if (ret <= 0) { crc->run_threads = thr + 1; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); goto out_finish; } @@ -1424,13 +1424,13 @@ static int load_image_lzo(struct swap_map_handle *handle, } crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); } out_finish: if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } stop = ktime_get(); From eafd83b92f6c044007a3591cbd476bcf90455990 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 2 Dec 2023 09:01:54 +0800 Subject: [PATCH 13/33] hwrng: core - Fix page fault dead lock on mmap-ed hwrng commit 78aafb3884f6bc6636efcc1760c891c8500b9922 upstream. There is a dead-lock in the hwrng device read path. This triggers when the user reads from /dev/hwrng into memory also mmap-ed from /dev/hwrng. The resulting page fault triggers a recursive read which then dead-locks. Fix this by using a stack buffer when calling copy_to_user. Reported-by: Edward Adam Davis Reported-by: syzbot+c52ab18308964d248092@syzkaller.appspotmail.com Fixes: 9996508b3353 ("hwrng: core - Replace u32 in driver API with byte array") Cc: Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/core.c | 36 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index c9757fa2d308..d932dd95b04e 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -24,10 +24,13 @@ #include #include #include +#include #include #define RNG_MODULE_NAME "hw_random" +#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) + static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; @@ -59,7 +62,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static size_t rng_buffer_size(void) { - return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; + return RNG_BUFFER_SIZE; } static void add_early_randomness(struct hwrng *rng) @@ -202,6 +205,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { + u8 buffer[RNG_BUFFER_SIZE]; ssize_t ret = 0; int err = 0; int bytes_read, len; @@ -229,34 +233,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; - } - data_avail = bytes_read; - } - - if (!data_avail) { - if (filp->f_flags & O_NONBLOCK) { + } else if (bytes_read == 0 && + (filp->f_flags & O_NONBLOCK)) { err = -EAGAIN; goto out_unlock_reading; } - } else { - len = data_avail; + + data_avail = bytes_read; + } + + len = data_avail; + if (len) { if (len > size) len = size; data_avail -= len; - if (copy_to_user(buf + ret, rng_buffer + data_avail, - len)) { + memcpy(buffer, rng_buffer + data_avail, len); + } + mutex_unlock(&reading_mutex); + put_rng(rng); + + if (len) { + if (copy_to_user(buf + ret, buffer, len)) { err = -EFAULT; - goto out_unlock_reading; + goto out; } size -= len; ret += len; } - mutex_unlock(&reading_mutex); - put_rng(rng); if (need_resched()) schedule_timeout_interruptible(1); @@ -267,6 +274,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + memzero_explicit(buffer, sizeof(buffer)); return ret ? : err; out_unlock_reading: From 4ce844d0f50f789cc70de7bd02511df0f40c64b6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 26 Jan 2024 10:36:31 -0700 Subject: [PATCH 14/33] powerpc: Use always instead of always-y in for crtsavres.o This commit is for linux-4.19.y only, it has no direct upstream equivalent. Prior to commit 5f2fb52fac15 ("kbuild: rename hostprogs-y/always to hostprogs/always-y"), always-y did not exist, making the backport of mainline commit 1b1e38002648 ("powerpc: add crtsavres.o to always-y instead of extra-y") to linux-4.19.y as commit b7b85ec5ec15 ("powerpc: add crtsavres.o to always-y instead of extra-y") incorrect, breaking the build with linkers that need crtsavres.o: ld.lld: error: cannot open arch/powerpc/lib/crtsavres.o: No such file or directory Backporting the aforementioned kbuild commit is not suitable for stable due to its size and number of conflicts, so transform the always-y usage to an equivalent form using always, which resolves the build issues. Fixes: b7b85ec5ec15 ("powerpc: add crtsavres.o to always-y instead of extra-y") Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 6f1e57182876..f0aa6fc8c6b2 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -21,8 +21,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs -ifeq ($(call ld-ifversion, -lt, 225000000, y),y) -always-$(CONFIG_PPC64) += crtsavres.o +ifeq ($(call ld-ifversion, -lt, 225000000, y)$(CONFIG_PPC64),yy) +always += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ From 229ce47cbfdc7d3a9415eb676abbfb77d676cb08 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Fri, 15 Dec 2023 10:00:49 +0800 Subject: [PATCH 15/33] rpmsg: virtio: Free driver_override when rpmsg_remove() commit d5362c37e1f8a40096452fc201c30e705750e687 upstream. Free driver_override when rpmsg_remove(), otherwise the following memory leak will occur: unreferenced object 0xffff0000d55d7080 (size 128): comm "kworker/u8:2", pid 56, jiffies 4294893188 (age 214.272s) hex dump (first 32 bytes): 72 70 6d 73 67 5f 6e 73 00 00 00 00 00 00 00 00 rpmsg_ns........ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000009c94c9c1>] __kmem_cache_alloc_node+0x1f8/0x320 [<000000002300d89b>] __kmalloc_node_track_caller+0x44/0x70 [<00000000228a60c3>] kstrndup+0x4c/0x90 [<0000000077158695>] driver_set_override+0xd0/0x164 [<000000003e9c4ea5>] rpmsg_register_device_override+0x98/0x170 [<000000001c0c89a8>] rpmsg_ns_register_device+0x24/0x30 [<000000008bbf8fa2>] rpmsg_probe+0x2e0/0x3ec [<00000000e65a68df>] virtio_dev_probe+0x1c0/0x280 [<00000000443331cc>] really_probe+0xbc/0x2dc [<00000000391064b1>] __driver_probe_device+0x78/0xe0 [<00000000a41c9a5b>] driver_probe_device+0xd8/0x160 [<000000009c3bd5df>] __device_attach_driver+0xb8/0x140 [<0000000043cd7614>] bus_for_each_drv+0x7c/0xd4 [<000000003b929a36>] __device_attach+0x9c/0x19c [<00000000a94e0ba8>] device_initial_probe+0x14/0x20 [<000000003c999637>] bus_probe_device+0xa0/0xac Signed-off-by: Xiaolei Wang Fixes: b0b03b811963 ("rpmsg: Release rpmsg devices in backends") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231215020049.78750-1-xiaolei.wang@windriver.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/virtio_rpmsg_bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 664f957012cd..aab990339051 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -381,6 +381,7 @@ static void virtio_rpmsg_release_device(struct device *dev) struct rpmsg_device *rpdev = to_rpmsg_device(dev); struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev); + kfree(rpdev->driver_override); kfree(vch); } From 771df0145297a1a9f1e7f799da43f8b0f8850e7c Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 25 Jan 2024 19:06:54 +0100 Subject: [PATCH 16/33] x86/CPU/AMD: Fix disabling XSAVES on AMD family 0x17 due to erratum The stable kernel version backport of the patch disabling XSAVES on AMD Zen family 0x17 applied this change to the wrong function (init_amd_k6()), one which isn't called for Zen CPUs. Move the erratum to the init_amd_zn() function instead. Add an explicit family 0x17 check to the erratum so nothing will break if someone naively makes this kernel version call init_amd_zn() also for family 0x19 in the future (as the current upstream code does). Fixes: f028a7db9824 ("x86/CPU/AMD: Disable XSAVES on AMD family 0x17") Signed-off-by: Maciej S. Szmigiero Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 84667781c41d..5b75a4ff6802 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -271,15 +271,6 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } #endif - /* - * Work around Erratum 1386. The XSAVES instruction malfunctions in - * certain circumstances on Zen1/2 uarch, and not all parts have had - * updated microcode at the time of writing (March 2023). - * - * Affected parts all have no supervisor XSAVE states, meaning that - * the XSAVEC instruction (which works fine) is equivalent. - */ - clear_cpu_cap(c, X86_FEATURE_XSAVES); } static void init_amd_k7(struct cpuinfo_x86 *c) @@ -979,6 +970,17 @@ static void init_amd_zn(struct cpuinfo_x86 *c) if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) set_cpu_cap(c, X86_FEATURE_BTC_NO); } + + /* + * Work around Erratum 1386. The XSAVES instruction malfunctions in + * certain circumstances on Zen1/2 uarch, and not all parts have had + * updated microcode at the time of writing (March 2023). + * + * Affected parts all have no supervisor XSAVE states, meaning that + * the XSAVEC instruction (which works fine) is equivalent. + */ + if (c->x86 == 0x17) + clear_cpu_cap(c, X86_FEATURE_XSAVES); } static bool cpu_has_zenbleed_microcode(void) From 63cc5eb9b4102474423c4d6563fdf2ebbedf437f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 3 Jan 2024 21:02:16 +0100 Subject: [PATCH 17/33] parisc/firmware: Fix F-extend for PDC addresses commit 735ae74f73e55c191d48689bd11ff4a06ea0508f upstream. When running with narrow firmware (64-bit kernel using a 32-bit firmware), extend PDC addresses into the 0xfffffff0.00000000 region instead of the 0xf0f0f0f0.00000000 region. This fixes the power button on the C3700 machine in qemu (64-bit CPU with 32-bit firmware), and my assumption is that the previous code was really never used (because most 64-bit machines have a 64-bit firmware), or it just worked on very old machines because they may only decode 40-bit of virtual addresses. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 906b7c882587..82f264621845 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -127,10 +127,10 @@ static unsigned long f_extend(unsigned long address) #ifdef CONFIG_64BIT if(unlikely(parisc_narrow_firmware)) { if((address & 0xff000000) == 0xf0000000) - return 0xf0f0f0f000000000UL | (u32)address; + return (0xfffffff0UL << 32) | (u32)address; if((address & 0xf0000000) == 0xf0000000) - return 0xffffffff00000000UL | (u32)address; + return (0xffffffffUL << 32) | (u32)address; } #endif return address; From d31978bfec1d251a75d4a038e564ef2ff9d8be40 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Aug 2020 13:44:59 +0300 Subject: [PATCH 18/33] driver core: Annotate dev_err_probe() with __must_check commit e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 upstream. We have got already new users of this API which interpret it differently and miss the opportunity to optimize their code. In order to avoid similar cases in the future, annotate dev_err_probe() with __must_check. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200826104459.81979-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 0714d6e5d500..2e5926792f4e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1582,7 +1582,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) extern __printf(3, 4) -int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +int __must_check dev_err_probe(const struct device *dev, int err, const char *fmt, ...); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ From 98fcd3b663835bc7518d65d053419c640f3515a3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 18 Jan 2024 06:19:57 +1000 Subject: [PATCH 19/33] nouveau/vmm: don't set addr on the fail path to avoid warning commit cacea81390fd8c8c85404e5eb2adeb83d87a912e upstream. nvif_vmm_put gets called if addr is set, but if the allocation fails we don't need to call put, otherwise we get a warning like [523232.435671] ------------[ cut here ]------------ [523232.435674] WARNING: CPU: 8 PID: 1505697 at drivers/gpu/drm/nouveau/nvif/vmm.c:68 nvif_vmm_put+0x72/0x80 [nouveau] [523232.435795] Modules linked in: uinput rfcomm snd_seq_dummy snd_hrtimer nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink qrtr bnep sunrpc binfmt_misc intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common iwlmvm nfit libnvdimm vfat fat x86_pkg_temp_thermal intel_powerclamp mac80211 snd_soc_avs snd_soc_hda_codec coretemp snd_hda_ext_core snd_soc_core snd_hda_codec_realtek kvm_intel snd_hda_codec_hdmi snd_compress snd_hda_codec_generic ac97_bus snd_pcm_dmaengine snd_hda_intel libarc4 snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm iwlwifi snd_hda_core btusb snd_hwdep btrtl snd_seq btintel irqbypass btbcm rapl snd_seq_device eeepc_wmi btmtk intel_cstate iTCO_wdt cfg80211 snd_pcm asus_wmi bluetooth intel_pmc_bxt iTCO_vendor_support snd_timer ledtrig_audio pktcdvd snd mei_me [523232.435828] sparse_keymap intel_uncore i2c_i801 platform_profile wmi_bmof mei pcspkr ioatdma soundcore i2c_smbus rfkill idma64 dca joydev acpi_tad loop zram nouveau drm_ttm_helper ttm video drm_exec drm_gpuvm gpu_sched crct10dif_pclmul i2c_algo_bit nvme crc32_pclmul crc32c_intel drm_display_helper polyval_clmulni nvme_core polyval_generic e1000e mxm_wmi cec ghash_clmulni_intel r8169 sha512_ssse3 nvme_common wmi pinctrl_sunrisepoint uas usb_storage ip6_tables ip_tables fuse [523232.435849] CPU: 8 PID: 1505697 Comm: gnome-shell Tainted: G W 6.6.0-rc7-nvk-uapi+ #12 [523232.435851] Hardware name: System manufacturer System Product Name/ROG STRIX X299-E GAMING II, BIOS 1301 09/24/2021 [523232.435852] RIP: 0010:nvif_vmm_put+0x72/0x80 [nouveau] [523232.435934] Code: 00 00 48 89 e2 be 02 00 00 00 48 c7 04 24 00 00 00 00 48 89 44 24 08 e8 fc bf ff ff 85 c0 75 0a 48 c7 43 08 00 00 00 00 eb b3 <0f> 0b eb f2 e8 f5 c9 b2 e6 0f 1f 44 00 00 90 90 90 90 90 90 90 90 [523232.435936] RSP: 0018:ffffc900077ffbd8 EFLAGS: 00010282 [523232.435937] RAX: 00000000fffffffe RBX: ffffc900077ffc00 RCX: 0000000000000010 [523232.435938] RDX: 0000000000000010 RSI: ffffc900077ffb38 RDI: ffffc900077ffbd8 [523232.435940] RBP: ffff888e1c4f2140 R08: 0000000000000000 R09: 0000000000000000 [523232.435940] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888503811800 [523232.435941] R13: ffffc900077ffca0 R14: ffff888e1c4f2140 R15: ffff88810317e1e0 [523232.435942] FS: 00007f933a769640(0000) GS:ffff88905fa00000(0000) knlGS:0000000000000000 [523232.435943] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [523232.435944] CR2: 00007f930bef7000 CR3: 00000005d0322001 CR4: 00000000003706e0 [523232.435945] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [523232.435946] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [523232.435964] Call Trace: [523232.435965] [523232.435966] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436051] ? __warn+0x81/0x130 [523232.436055] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436138] ? report_bug+0x171/0x1a0 [523232.436142] ? handle_bug+0x3c/0x80 [523232.436144] ? exc_invalid_op+0x17/0x70 [523232.436145] ? asm_exc_invalid_op+0x1a/0x20 [523232.436149] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436230] ? nvif_vmm_put+0x64/0x80 [nouveau] [523232.436342] nouveau_vma_del+0x80/0xd0 [nouveau] [523232.436506] nouveau_vma_new+0x1a0/0x210 [nouveau] [523232.436671] nouveau_gem_object_open+0x1d0/0x1f0 [nouveau] [523232.436835] drm_gem_handle_create_tail+0xd1/0x180 [523232.436840] drm_prime_fd_to_handle_ioctl+0x12e/0x200 [523232.436844] ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10 [523232.436847] drm_ioctl_kernel+0xd3/0x180 [523232.436849] drm_ioctl+0x26d/0x4b0 [523232.436851] ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10 [523232.436855] nouveau_drm_ioctl+0x5a/0xb0 [nouveau] [523232.437032] __x64_sys_ioctl+0x94/0xd0 [523232.437036] do_syscall_64+0x5d/0x90 [523232.437040] ? syscall_exit_to_user_mode+0x2b/0x40 [523232.437044] ? do_syscall_64+0x6c/0x90 [523232.437046] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Reported-by: Faith Ekstrand Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240117213852.295565-1-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_vmm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 2032c3e4f6e5..fe8b88c6e2f9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -107,6 +107,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, } else { ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, mem->mem.size, &tmp); + if (ret) + goto done; + vma->addr = tmp.addr; } From b715d543d42e8e4ddd645193410cd4511fc46d6c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Sep 2020 09:37:40 +0200 Subject: [PATCH 20/33] Revert "driver core: Annotate dev_err_probe() with __must_check" commit f601e8f37c2c1c52f2923fffc48204a7f7dc023d upstream. This reverts commit e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 as it's already starting to cause build warnings in linux-next for things that are "obviously correct". It's up to driver authors do "do the right thing" here with this function, and if they don't want to call it as the last line of a function, that's up to them, otherwise code that looks like: ret = dev_err_probe(..., ret, ...); does look really "odd". Reported-by: Stephen Rothwell Reported-by: Krzysztof Kozlowski Fixes: e1f82a0dcf38 ("driver core: Annotate dev_err_probe() with __must_check") Cc: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 2e5926792f4e..0714d6e5d500 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1582,7 +1582,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) extern __printf(3, 4) -int __must_check dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ From d2d0b95ca1b5fefa3deed444a803c9f809db66cf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Aug 2023 15:41:00 +0100 Subject: [PATCH 21/33] block: Remove special-casing of compound pages commit 1b151e2435fc3a9b10c8946c6aebe9f3e1938c55 upstream. The special casing was originally added in pre-git history; reproducing the commit log here: > commit a318a92567d77 > Author: Andrew Morton > Date: Sun Sep 21 01:42:22 2003 -0700 > > [PATCH] Speed up direct-io hugetlbpage handling > > This patch short-circuits all the direct-io page dirtying logic for > higher-order pages. Without this, we pointlessly bounce BIOs up to > keventd all the time. In the last twenty years, compound pages have become used for more than just hugetlb. Rewrite these functions to operate on folios instead of pages and remove the special case for hugetlbfs; I don't think it's needed any more (and if it is, we can put it back in as a call to folio_test_hugetlb()). This was found by inspection; as far as I can tell, this bug can lead to pages used as the destination of a direct I/O read not being marked as dirty. If those pages are then reclaimed by the MM without being dirtied for some other reason, they won't be written out. Then when they're faulted back in, they will not contain the data they should. It'll take a pretty unusual setup to produce this problem with several races all going the wrong way. This problem predates the folio work; it could for example have been triggered by mmaping a THP in tmpfs and using that as the target of an O_DIRECT read. Fixes: 800d8c63b2e98 ("shmem: add huge pages support") Cc: Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/bio.c b/block/bio.c index 7858b2d23916..476a88e11715 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1592,8 +1592,7 @@ void bio_set_pages_dirty(struct bio *bio) int i; bio_for_each_segment_all(bvec, bio, i) { - if (!PageCompound(bvec->bv_page)) - set_page_dirty_lock(bvec->bv_page); + set_page_dirty_lock(bvec->bv_page); } } EXPORT_SYMBOL_GPL(bio_set_pages_dirty); @@ -1652,7 +1651,7 @@ void bio_check_pages_dirty(struct bio *bio) int i; bio_for_each_segment_all(bvec, bio, i) { - if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) + if (!PageDirty(bvec->bv_page)) goto defer; } From cf07cb793264fd3c459918dda7e839d6a279493c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 28 Aug 2020 18:14:35 +0200 Subject: [PATCH 22/33] driver code: print symbolic error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 693a8e936590f93451e6f5a3d748616f5a59c80b upstream. dev_err_probe() prepends the message with an error code. Let's make it more readable by translating the code to a more recognisable symbol. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/ea3f973e4708919573026fdce52c264db147626d.1598630856.git.mirq-linux@rere.qmqm.pl Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b66647277d52..8a53e346e7aa 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3366,9 +3366,9 @@ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) vaf.va = &args; if (err != -EPROBE_DEFER) - dev_err(dev, "error %d: %pV", err, &vaf); + dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); else - dev_dbg(dev, "error %d: %pV", err, &vaf); + dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); va_end(args); From 4d61ff79b439fe9cd5eaa3363a25853f230e2026 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 9 Sep 2020 11:53:43 +0200 Subject: [PATCH 23/33] drivers: core: fix kernel-doc markup for dev_err_probe() commit 074b3aad307de6126fbac1fff4996d1034b48fee upstream. There are two literal blocks there. Fix the markups, in order to produce the right html output and solve those warnings: ./drivers/base/core.c:4218: WARNING: Unexpected indentation. ./drivers/base/core.c:4222: WARNING: Definition list ends without a blank line; unexpected unindent. ./drivers/base/core.c:4223: WARNING: Block quote ends without a blank line; unexpected unindent. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 8a53e346e7aa..5fd2b887cc04 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3344,13 +3344,15 @@ define_dev_printk_level(_dev_info, KERN_INFO); * This helper implements common pattern present in probe functions for error * checking: print debug or error message depending if the error value is * -EPROBE_DEFER and propagate error upwards. - * It replaces code sequence: + * It replaces code sequence:: * if (err != -EPROBE_DEFER) * dev_err(dev, ...); * else * dev_dbg(dev, ...); * return err; - * with + * + * with:: + * * return dev_err_probe(dev, err, ...); * * Returns @err. From 27aea64838914c6122db5b8bd4bed865c9736f22 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 18 Jan 2024 12:32:10 +0800 Subject: [PATCH 24/33] net/smc: fix illegal rmb_desc access in SMC-D connection dump [ Upstream commit dbc153fd3c142909e564bb256da087e13fbf239c ] A crash was found when dumping SMC-D connections. It can be reproduced by following steps: - run nginx/wrk test: smc_run nginx smc_run wrk -t 16 -c 1000 -d -H 'Connection: Close' - continuously dump SMC-D connections in parallel: watch -n 1 'smcss -D' BUG: kernel NULL pointer dereference, address: 0000000000000030 CPU: 2 PID: 7204 Comm: smcss Kdump: loaded Tainted: G E 6.7.0+ #55 RIP: 0010:__smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x66/0x150 ? exc_page_fault+0x69/0x140 ? asm_exc_page_fault+0x26/0x30 ? __smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] ? __kmalloc_node_track_caller+0x35d/0x430 ? __alloc_skb+0x77/0x170 smc_diag_dump_proto+0xd0/0xf0 [smc_diag] smc_diag_dump+0x26/0x60 [smc_diag] netlink_dump+0x19f/0x320 __netlink_dump_start+0x1dc/0x300 smc_diag_handler_dump+0x6a/0x80 [smc_diag] ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag] sock_diag_rcv_msg+0x121/0x140 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 sock_diag_rcv+0x28/0x40 netlink_unicast+0x22a/0x330 netlink_sendmsg+0x1f8/0x420 __sock_sendmsg+0xb0/0xc0 ____sys_sendmsg+0x24e/0x300 ? copy_msghdr_from_user+0x62/0x80 ___sys_sendmsg+0x7c/0xd0 ? __do_fault+0x34/0x160 ? do_read_fault+0x5f/0x100 ? do_fault+0xb0/0x110 ? __handle_mm_fault+0x2b0/0x6c0 __sys_sendmsg+0x4d/0x80 do_syscall_64+0x69/0x180 entry_SYSCALL_64_after_hwframe+0x6e/0x76 It is possible that the connection is in process of being established when we dump it. Assumed that the connection has been registered in a link group by smc_conn_create() but the rmb_desc has not yet been initialized by smc_buf_create(), thus causing the illegal access to conn->rmb_desc. So fix it by checking before dump. Fixes: 4b1b7d3b30a6 ("net/smc: add SMC-D diag support") Signed-off-by: Wen Gu Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/smc_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 6c4a7a5938b7..3ab14678afff 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -167,7 +167,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc->conn.lgr && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; From 91759822dd336c20f817e6fd59cccee3952599f7 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 18 Jan 2024 21:03:06 +0800 Subject: [PATCH 25/33] vlan: skip nested type that is not IFLA_VLAN_QOS_MAPPING [ Upstream commit 6c21660fe221a15c789dee2bc2fd95516bc5aeaf ] In the vlan_changelink function, a loop is used to parse the nested attributes IFLA_VLAN_EGRESS_QOS and IFLA_VLAN_INGRESS_QOS in order to obtain the struct ifla_vlan_qos_mapping. These two nested attributes are checked in the vlan_validate_qos_map function, which calls nla_validate_nested_deprecated with the vlan_map_policy. However, this deprecated validator applies a LIBERAL strictness, allowing the presence of an attribute with the type IFLA_VLAN_QOS_UNSPEC. Consequently, the loop in vlan_changelink may parse an attribute of type IFLA_VLAN_QOS_UNSPEC and believe it carries a payload of struct ifla_vlan_qos_mapping, which is not necessarily true. To address this issue and ensure compatibility, this patch introduces two type checks that skip attributes whose type is not IFLA_VLAN_QOS_MAPPING. Fixes: 07b5b17e157b ("[VLAN]: Use rtnl_link API") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240118130306.1644001-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/8021q/vlan_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 74042b9d7f73..231eae2f1685 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -120,12 +120,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) From 84e9d10419f6f4f3f3cd8f9aaf44a48719aa4b1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2024 18:36:25 +0000 Subject: [PATCH 26/33] llc: make llc_ui_sendmsg() more robust against bonding changes [ Upstream commit dad555c816a50c6a6a8a86be1f9177673918c647 ] syzbot was able to trick llc_ui_sendmsg(), allocating an skb with no headroom, but subsequently trying to push 14 bytes of Ethernet header [1] Like some others, llc_ui_sendmsg() releases the socket lock before calling sock_alloc_send_skb(). Then it acquires it again, but does not redo all the sanity checks that were performed. This fix: - Uses LL_RESERVED_SPACE() to reserve space. - Check all conditions again after socket lock is held again. - Do not account Ethernet header for mtu limitation. [1] skbuff: skb_under_panic: text:ffff800088baa334 len:1514 put:14 head:ffff0000c9c37000 data:ffff0000c9c36ff2 tail:0x5dc end:0x6c0 dev:bond0 kernel BUG at net/core/skbuff.c:193 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 6875 Comm: syz-executor.0 Not tainted 6.7.0-rc8-syzkaller-00101-g0802e17d9aca-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:189 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 lr : skb_panic net/core/skbuff.c:189 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 sp : ffff800096f97000 x29: ffff800096f97010 x28: ffff80008cc8d668 x27: dfff800000000000 x26: ffff0000cb970c90 x25: 00000000000005dc x24: ffff0000c9c36ff2 x23: ffff0000c9c37000 x22: 00000000000005ea x21: 00000000000006c0 x20: 000000000000000e x19: ffff800088baa334 x18: 1fffe000368261ce x17: ffff80008e4ed000 x16: ffff80008a8310f8 x15: 0000000000000001 x14: 1ffff00012df2d58 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000001 x10: 0000000000ff0100 x9 : e28a51f1087e8400 x8 : e28a51f1087e8400 x7 : ffff80008028f8d0 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : ffff800082b78714 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 0000000000000089 Call trace: skb_panic net/core/skbuff.c:189 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 skb_push+0xf0/0x108 net/core/skbuff.c:2451 eth_header+0x44/0x1f8 net/ethernet/eth.c:83 dev_hard_header include/linux/netdevice.h:3188 [inline] llc_mac_hdr_init+0x110/0x17c net/llc/llc_output.c:33 llc_sap_action_send_xid_c+0x170/0x344 net/llc/llc_s_ac.c:85 llc_exec_sap_trans_actions net/llc/llc_sap.c:153 [inline] llc_sap_next_state net/llc/llc_sap.c:182 [inline] llc_sap_state_process+0x1ec/0x774 net/llc/llc_sap.c:209 llc_build_and_send_xid_pkt+0x12c/0x1c0 net/llc/llc_sap.c:270 llc_ui_sendmsg+0x7bc/0xb1c net/llc/af_llc.c:997 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x194/0x274 net/socket.c:767 splice_to_socket+0x7cc/0xd58 fs/splice.c:881 do_splice_from fs/splice.c:933 [inline] direct_splice_actor+0xe4/0x1c0 fs/splice.c:1142 splice_direct_to_actor+0x2a0/0x7e4 fs/splice.c:1088 do_splice_direct+0x20c/0x348 fs/splice.c:1194 do_sendfile+0x4bc/0xc70 fs/read_write.c:1254 __do_sys_sendfile64 fs/read_write.c:1322 [inline] __se_sys_sendfile64 fs/read_write.c:1308 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1308 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x158 arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595 Code: aa1803e6 aa1903e7 a90023f5 94792f6a (d4210000) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+2a7024e9502df538e8ef@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240118183625.4007013-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/llc/af_llc.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5cba9199c3c9..a5c104ea477c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -926,14 +926,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; + int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; + struct net_device *dev; size_t size = 0; - int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); @@ -953,22 +954,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (rc) goto out; } - hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); + dev = llc->dev; + hh_len = LL_RESERVED_SPACE(dev); + hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > llc->dev->mtu) - size = llc->dev->mtu; + size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); + skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; - skb->dev = llc->dev; + if (sock_flag(sk, SOCK_ZAPPED) || + llc->dev != dev || + hdrlen != llc_ui_header_len(sk, addr) || + hh_len != LL_RESERVED_SPACE(dev) || + size > READ_ONCE(dev->mtu)) + goto out; + skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); - skb_reserve(skb, hdrlen); + skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; From 165ad1e22779685c3ed3dd349c6c4c632309cc62 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 Jan 2024 17:55:15 -0800 Subject: [PATCH 27/33] llc: Drop support for ETH_P_TR_802_2. [ Upstream commit e3f9bed9bee261e3347131764e42aeedf1ffea61 ] syzbot reported an uninit-value bug below. [0] llc supports ETH_P_802_2 (0x0004) and used to support ETH_P_TR_802_2 (0x0011), and syzbot abused the latter to trigger the bug. write$tun(r0, &(0x7f0000000040)={@val={0x0, 0x11}, @val, @mpls={[], @llc={@snap={0xaa, 0x1, ')', "90e5dd"}}}}, 0x16) llc_conn_handler() initialises local variables {saddr,daddr}.mac based on skb in llc_pdu_decode_sa()/llc_pdu_decode_da() and passes them to __llc_lookup(). However, the initialisation is done only when skb->protocol is htons(ETH_P_802_2), otherwise, __llc_lookup_established() and __llc_lookup_listener() will read garbage. The missing initialisation existed prior to commit 211ed865108e ("net: delete all instances of special processing for token ring"). It removed the part to kick out the token ring stuff but forgot to close the door allowing ETH_P_TR_802_2 packets to sneak into llc_rcv(). Let's remove llc_tr_packet_type and complete the deprecation. [0]: BUG: KMSAN: uninit-value in __llc_lookup_established+0xe9d/0xf90 __llc_lookup_established+0xe9d/0xf90 __llc_lookup net/llc/llc_conn.c:611 [inline] llc_conn_handler+0x4bd/0x1360 net/llc/llc_conn.c:791 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 __netif_receive_skb_one_core net/core/dev.c:5527 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5641 netif_receive_skb_internal net/core/dev.c:5727 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5786 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2020 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x1490 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable daddr created at: llc_conn_handler+0x53/0x1360 net/llc/llc_conn.c:783 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 CPU: 1 PID: 5004 Comm: syz-executor994 Not tainted 6.6.0-syzkaller-14500-g1c41041124bd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Fixes: 211ed865108e ("net: delete all instances of special processing for token ring") Reported-by: syzbot+b5ad66046b913bc04c6f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b5ad66046b913bc04c6f Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119015515.61898-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/llc_pdu.h | 6 ++---- net/llc/llc_core.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 49aa79c7b278..581cd37aa98b 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 64d4bef04e73..4900a27b5176 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = { .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TR_802_2), - .func = llc_rcv, -}; - static int __init llc_init(void) { dev_add_pack(&llc_packet_type); - dev_add_pack(&llc_tr_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); - dev_remove_pack(&llc_tr_packet_type); } module_init(llc_init); From 344350bfa3b4b37d7c3d5a00536e6fbf0e953fbf Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Fri, 19 Jan 2024 17:48:39 -0800 Subject: [PATCH 28/33] net/rds: Fix UBSAN: array-index-out-of-bounds in rds_cmsg_recv [ Upstream commit 13e788deb7348cc88df34bed736c3b3b9927ea52 ] Syzcaller UBSAN crash occurs in rds_cmsg_recv(), which reads inc->i_rx_lat_trace[j + 1] with index 4 (3 + 1), but with array size of 4 (RDS_RX_MAX_TRACES). Here 'j' is assigned from rs->rs_rx_trace[i] and in-turn from trace.rx_trace_pos[i] in rds_recv_track_latency(), with both arrays sized 3 (RDS_MSG_RX_DGRAM_TRACE_MAX). So fix the off-by-one bounds check in rds_recv_track_latency() to prevent a potential crash in rds_cmsg_recv(). Found by syzcaller: ================================================================= UBSAN: array-index-out-of-bounds in net/rds/recv.c:585:39 index 4 is out of range for type 'u64 [4]' CPU: 1 PID: 8058 Comm: syz-executor228 Not tainted 6.6.0-gd2f51b3516da #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x136/0x150 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_out_of_bounds+0xd5/0x130 lib/ubsan.c:348 rds_cmsg_recv+0x60d/0x700 net/rds/recv.c:585 rds_recvmsg+0x3fb/0x1610 net/rds/recv.c:716 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1066 __sys_recvfrom+0x1b6/0x2f0 net/socket.c:2246 __do_sys_recvfrom net/socket.c:2264 [inline] __se_sys_recvfrom net/socket.c:2260 [inline] __x64_sys_recvfrom+0xe0/0x1b0 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b ================================================================== Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-rdma/CALGdzuoVdq-wtQ4Az9iottBqC5cv9ZhcE5q8N7LfYFvkRsOVcw@mail.gmail.com/ Signed-off-by: Sharath Srinivasan Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/af_rds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index cd7e01ea8144..5eb71d276706 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -385,7 +385,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } From 5022b331c041e8c54b9a6a3251579bd1e8c0fc0b Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 22 Jan 2024 16:09:28 +0100 Subject: [PATCH 29/33] tracing: Ensure visibility when inserting an element into tracing_map [ Upstream commit 2b44760609e9eaafc9d234a6883d042fc21132a7 ] Running the following two commands in parallel on a multi-processor AArch64 machine can sporadically produce an unexpected warning about duplicate histogram entries: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist sleep 0.001 done $ stress-ng --sysbadaddr $(nproc) The warning looks as follows: [ 2911.172474] ------------[ cut here ]------------ [ 2911.173111] Duplicates detected: 1 [ 2911.173574] WARNING: CPU: 2 PID: 12247 at kernel/trace/tracing_map.c:983 tracing_map_sort_entries+0x3e0/0x408 [ 2911.174702] Modules linked in: iscsi_ibft(E) iscsi_boot_sysfs(E) rfkill(E) af_packet(E) nls_iso8859_1(E) nls_cp437(E) vfat(E) fat(E) ena(E) tiny_power_button(E) qemu_fw_cfg(E) button(E) fuse(E) efi_pstore(E) ip_tables(E) x_tables(E) xfs(E) libcrc32c(E) aes_ce_blk(E) aes_ce_cipher(E) crct10dif_ce(E) polyval_ce(E) polyval_generic(E) ghash_ce(E) gf128mul(E) sm4_ce_gcm(E) sm4_ce_ccm(E) sm4_ce(E) sm4_ce_cipher(E) sm4(E) sm3_ce(E) sm3(E) sha3_ce(E) sha512_ce(E) sha512_arm64(E) sha2_ce(E) sha256_arm64(E) nvme(E) sha1_ce(E) nvme_core(E) nvme_auth(E) t10_pi(E) sg(E) scsi_mod(E) scsi_common(E) efivarfs(E) [ 2911.174738] Unloaded tainted modules: cppc_cpufreq(E):1 [ 2911.180985] CPU: 2 PID: 12247 Comm: cat Kdump: loaded Tainted: G E 6.7.0-default #2 1b58bbb22c97e4399dc09f92d309344f69c44a01 [ 2911.182398] Hardware name: Amazon EC2 c7g.8xlarge/, BIOS 1.0 11/1/2018 [ 2911.183208] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 2911.184038] pc : tracing_map_sort_entries+0x3e0/0x408 [ 2911.184667] lr : tracing_map_sort_entries+0x3e0/0x408 [ 2911.185310] sp : ffff8000a1513900 [ 2911.185750] x29: ffff8000a1513900 x28: ffff0003f272fe80 x27: 0000000000000001 [ 2911.186600] x26: ffff0003f272fe80 x25: 0000000000000030 x24: 0000000000000008 [ 2911.187458] x23: ffff0003c5788000 x22: ffff0003c16710c8 x21: ffff80008017f180 [ 2911.188310] x20: ffff80008017f000 x19: ffff80008017f180 x18: ffffffffffffffff [ 2911.189160] x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000a15134b8 [ 2911.190015] x14: 0000000000000000 x13: 205d373432323154 x12: 5b5d313131333731 [ 2911.190844] x11: 00000000fffeffff x10: 00000000fffeffff x9 : ffffd1b78274a13c [ 2911.191716] x8 : 000000000017ffe8 x7 : c0000000fffeffff x6 : 000000000057ffa8 [ 2911.192554] x5 : ffff0012f6c24ec0 x4 : 0000000000000000 x3 : ffff2e5b72b5d000 [ 2911.193404] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0003ff254480 [ 2911.194259] Call trace: [ 2911.194626] tracing_map_sort_entries+0x3e0/0x408 [ 2911.195220] hist_show+0x124/0x800 [ 2911.195692] seq_read_iter+0x1d4/0x4e8 [ 2911.196193] seq_read+0xe8/0x138 [ 2911.196638] vfs_read+0xc8/0x300 [ 2911.197078] ksys_read+0x70/0x108 [ 2911.197534] __arm64_sys_read+0x24/0x38 [ 2911.198046] invoke_syscall+0x78/0x108 [ 2911.198553] el0_svc_common.constprop.0+0xd0/0xf8 [ 2911.199157] do_el0_svc+0x28/0x40 [ 2911.199613] el0_svc+0x40/0x178 [ 2911.200048] el0t_64_sync_handler+0x13c/0x158 [ 2911.200621] el0t_64_sync+0x1a8/0x1b0 [ 2911.201115] ---[ end trace 0000000000000000 ]--- The problem appears to be caused by CPU reordering of writes issued from __tracing_map_insert(). The check for the presence of an element with a given key in this function is: val = READ_ONCE(entry->val); if (val && keys_match(key, val->key, map->key_size)) ... The write of a new entry is: elt = get_free_elt(map); memcpy(elt->key, key, map->key_size); entry->val = elt; The "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;" stores may become visible in the reversed order on another CPU. This second CPU might then incorrectly determine that a new key doesn't match an already present val->key and subsequently insert a new element, resulting in a duplicate. Fix the problem by adding a write barrier between "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;", and for good measure, also use WRITE_ONCE(entry->val, elt) for publishing the element. The sequence pairs with the mentioned "READ_ONCE(entry->val);" and the "val->key" check which has an address dependency. The barrier is placed on a path executed when adding an element for a new key. Subsequent updates targeting the same key remain unaffected. From the user's perspective, the issue was introduced by commit c193707dde77 ("tracing: Remove code which merges duplicates"), which followed commit cbf4100efb8f ("tracing: Add support to detect and avoid duplicates"). The previous code operated differently; it inherently expected potential races which result in duplicates but merged them later when they occurred. Link: https://lore.kernel.org/linux-trace-kernel/20240122150928.27725-1-petr.pavlu@suse.com Fixes: c193707dde77 ("tracing: Remove code which merges duplicates") Signed-off-by: Petr Pavlu Acked-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/tracing_map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 83c2a0598c64..33c463967bb3 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; From 52c46caf283bfa3016a5e41363df93c02037f788 Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Fri, 19 Jan 2024 11:01:33 -0800 Subject: [PATCH 30/33] tcp: Add memory barrier to tcp_push() [ Upstream commit 7267e8dcad6b2f9fce05a6a06335d7040acbc2b6 ] On CPUs with weak memory models, reads and updates performed by tcp_push to the sk variables can get reordered leaving the socket throttled when it should not. The tasklet running tcp_wfree() may also not observe the memory updates in time and will skip flushing any packets throttled by tcp_push(), delaying the sending. This can pathologically cause 40ms extra latency due to bad interactions with delayed acks. Adding a memory barrier in tcp_push removes the bug, similarly to the previous commit bf06200e732d ("tcp: tsq: fix nonagle handling"). smp_mb__after_atomic() is used to not incur in unnecessary overhead on x86 since not affected. Patch has been tested using an AWS c7g.2xlarge instance with Ubuntu 22.04 and Apache Tomcat 9.0.83 running the basic servlet below: import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; public class HelloWorldServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=utf-8"); OutputStreamWriter osw = new OutputStreamWriter(response.getOutputStream(),"UTF-8"); String s = "a".repeat(3096); osw.write(s,0,s.length()); osw.flush(); } } Load was applied using wrk2 (https://github.com/kinvolk/wrk2) from an AWS c6i.8xlarge instance. Before the patch an additional 40ms latency from P99.99+ values is observed while, with the patch, the extra latency disappears. No patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.91ms 75.000% 1.13ms 90.000% 1.46ms 99.000% 1.74ms 99.900% 1.89ms 99.990% 41.95ms <<< 40+ ms extra latency 99.999% 48.32ms 100.000% 48.96ms With patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.90ms 75.000% 1.13ms 90.000% 1.45ms 99.000% 1.72ms 99.900% 1.83ms 99.990% 2.11ms <<< no 40+ ms extra latency 99.999% 2.53ms 100.000% 2.62ms Patch has been also tested on x86 (m7i.2xlarge instance) which it is not affected by this issue and the patch doesn't introduce any additional delay. Fixes: 7aa5470c2c09 ("tcp: tsq: move tsq_flags close to sk_wmem_alloc") Signed-off-by: Salvatore Dipietro Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119190133.43698-1-dipiets@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 00648a478c6a..712186336997 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -733,6 +733,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. From 8ea171e43782a4f4d785ee9a3218860129a121a9 Mon Sep 17 00:00:00 2001 From: Peiyong Wang Date: Tue, 5 Mar 2024 01:46:22 +0000 Subject: [PATCH 31/33] BACKPORT: net: core: enable SO_BINDTODEVICE for non-root users Currently, SO_BINDTODEVICE requires CAP_NET_RAW. This change allows a non-root user to bind a socket to an interface if it is not already bound. This is useful to allow an application to bind itself to a specific VRF for outgoing or incoming connections. Currently, an application wanting to manage connections through several VRF need to be privileged. Previously, IP_UNICAST_IF and IPV6_UNICAST_IF were added for Wine (76e21053b5bf3 and c4062dfc425e9) specifically for use by non-root processes. However, they are restricted to sendmsg() and not usable with TCP. Allowing SO_BINDTODEVICE would allow TCP clients to get the same privilege. As for TCP servers, outside the VRF use case, SO_BINDTODEVICE would only further restrict connections a server could accept. When an application is restricted to a VRF (with `ip vrf exec`), the socket is bound to an interface at creation and therefore, a non-privileged call to SO_BINDTODEVICE to escape the VRF fails. When an application bound a socket to SO_BINDTODEVICE and transmit it to a non-privileged process through a Unix socket, a tentative to change the bound device also fails. Before: >>> import socket >>> s=socket.socket(socket.AF_INET, socket.SOCK_STREAM) >>> s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, b"dummy0") Traceback (most recent call last): File "", line 1, in PermissionError: [Errno 1] Operation not permitted After: >>> import socket >>> s=socket.socket(socket.AF_INET, socket.SOCK_STREAM) >>> s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, b"dummy0") >>> s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, b"dummy0") Traceback (most recent call last): File "", line 1, in PermissionError: [Errno 1] Operation not permitted Bug: 323792489 Signed-off-by: Vincent Bernat Reviewed-by: David Ahern Signed-off-by: David S. Miller (cherry picked from commit c427bfec18f2190b8f4718785ee8ed2db4f84ee6) Change-Id: Ie3f4c536b78da12dd961dc681c6dfb0cdf1a06b7 Signed-off-by: Peiyong Wang --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index e1379df5d8d0..4b143f3ae1d9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -531,7 +531,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, /* Sorry... */ ret = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_RAW)) + if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out; ret = -EINVAL; From 02965842883d4b8f8ecb5c4a9ac134f32054ed7e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Jan 2024 12:16:03 +0000 Subject: [PATCH 32/33] Reapply "cred: switch to using atomic_long_t" This reverts commit b2ea47a79d30760d6346e00530c7eef96b25eb6b as it is needed in Android systems. It still breaks the ABI, but that is no longer an issue on this branch. Bug: 317347552 Change-Id: I9c7f8901c5aabf6e0378b5218f587d6c7335522f Signed-off-by: Greg Kroah-Hartman --- include/linux/cred.h | 6 ++-- kernel/cred.c | 66 ++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index 4b081e4911c8..62509a19ea08 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -114,7 +114,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; + atomic_long_t usage; #ifdef CONFIG_DEBUG_CREDENTIALS atomic_t subscribers; /* number of processes subscribed */ void *put_addr; @@ -231,7 +231,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); + atomic_long_inc(&cred->usage); return cred; } @@ -275,7 +275,7 @@ static inline void put_cred(const struct cred *_cred) if (cred) { validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_long_dec_and_test(&(cred)->usage)) __put_cred(cred); } } diff --git a/kernel/cred.c b/kernel/cred.c index a9f0f8b21d8c..8c58f0f63af2 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -101,17 +101,17 @@ static void put_cred_rcu(struct rcu_head *rcu) #ifdef CONFIG_DEBUG_CREDENTIALS if (cred->magic != CRED_MAGIC_DEAD || - atomic_read(&cred->usage) != 0 || + atomic_long_read(&cred->usage) != 0 || read_cred_subscribers(cred) != 0) panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %d, subscr %d\n", + " mag %x, put %p, usage %ld, subscr %d\n", cred, cred->magic, cred->put_addr, - atomic_read(&cred->usage), + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); #else - if (atomic_read(&cred->usage) != 0) - panic("CRED: put_cred_rcu() sees %p with usage %d\n", - cred, atomic_read(&cred->usage)); + if (atomic_long_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %ld\n", + cred, atomic_long_read(&cred->usage)); #endif security_cred_free(cred); @@ -134,11 +134,11 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%d,%d})", cred, - atomic_read(&cred->usage), + kdebug("__put_cred(%p{%ld,%d})", cred, + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); - BUG_ON(atomic_read(&cred->usage) != 0); + BUG_ON(atomic_long_read(&cred->usage) != 0); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(cred) != 0); cred->magic = CRED_MAGIC_DEAD; @@ -161,8 +161,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *cred; - kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), + kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); cred = (struct cred *) tsk->real_cred; @@ -197,7 +197,7 @@ const struct cred *get_task_cred(struct task_struct *task) do { cred = __task_cred((task)); BUG_ON(!cred); - } while (!atomic_inc_not_zero(&((struct cred *)cred)->usage)); + } while (!atomic_long_inc_not_zero(&((struct cred *)cred)->usage)); rcu_read_unlock(); return cred; @@ -215,7 +215,7 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif @@ -262,7 +262,7 @@ struct cred *prepare_creds(void) memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); @@ -338,8 +338,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->real_cred = get_cred(p->cred); get_cred(p->cred); alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%d,%d})", - p->cred, atomic_read(&p->cred->usage), + kdebug("share_creds(%p{%ld,%d})", + p->cred, atomic_long_read(&p->cred->usage), read_cred_subscribers(p->cred)); atomic_inc(&p->cred->user->processes); return 0; @@ -429,8 +429,8 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("commit_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); BUG_ON(task->cred != old); @@ -439,7 +439,7 @@ int commit_creds(struct cred *new) validate_creds(old); validate_creds(new); #endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -512,14 +512,14 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("abort_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(new) != 0); #endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } EXPORT_SYMBOL(abort_creds); @@ -535,8 +535,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("override_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); validate_creds(old); @@ -558,8 +558,8 @@ const struct cred *override_creds(const struct cred *new) rcu_assign_pointer(current->cred, new); alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%d,%d}", old, - atomic_read(&old->usage), + kdebug("override_creds() = %p{%ld,%d}", old, + atomic_long_read(&old->usage), read_cred_subscribers(old)); return old; } @@ -576,8 +576,8 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%d,%d})", old, - atomic_read(&old->usage), + kdebug("revert_creds(%p{%ld,%d})", old, + atomic_long_read(&old->usage), read_cred_subscribers(old)); validate_creds(old); @@ -637,7 +637,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) *new = *old; new->non_rcu = 0; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); @@ -760,8 +760,8 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, cred == tsk->cred ? "[eff]" : ""); printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n", cred->magic, cred->put_addr); - printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n", - atomic_read(&cred->usage), + printk(KERN_ERR "CRED: ->usage=%ld, subscr=%d\n", + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n", from_kuid_munged(&init_user_ns, cred->uid), @@ -833,9 +833,9 @@ EXPORT_SYMBOL(__validate_process_creds); */ void validate_creds_for_do_exit(struct task_struct *tsk) { - kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})", + kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), + atomic_long_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); __validate_process_creds(tsk, __FILE__, __LINE__); From 08d0950cd1e0165da7971753e38b0ee575aeb9d4 Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Fri, 15 Mar 2024 12:38:56 +0000 Subject: [PATCH 33/33] Revert "Reapply "cred: switch to using atomic_long_t"" This reverts commit 02965842883d4b8f8ecb5c4a9ac134f32054ed7e. Reason for revert: suspect in b/329612465 failures Change-Id: Ie523860ae1c8eb91f19f42ef496a5ce3897b921d --- include/linux/cred.h | 6 ++-- kernel/cred.c | 66 ++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index 62509a19ea08..4b081e4911c8 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -114,7 +114,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_long_t usage; + atomic_t usage; #ifdef CONFIG_DEBUG_CREDENTIALS atomic_t subscribers; /* number of processes subscribed */ void *put_addr; @@ -231,7 +231,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_long_inc(&cred->usage); + atomic_inc(&cred->usage); return cred; } @@ -275,7 +275,7 @@ static inline void put_cred(const struct cred *_cred) if (cred) { validate_creds(cred); - if (atomic_long_dec_and_test(&(cred)->usage)) + if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } } diff --git a/kernel/cred.c b/kernel/cred.c index 8c58f0f63af2..a9f0f8b21d8c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -101,17 +101,17 @@ static void put_cred_rcu(struct rcu_head *rcu) #ifdef CONFIG_DEBUG_CREDENTIALS if (cred->magic != CRED_MAGIC_DEAD || - atomic_long_read(&cred->usage) != 0 || + atomic_read(&cred->usage) != 0 || read_cred_subscribers(cred) != 0) panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %ld, subscr %d\n", + " mag %x, put %p, usage %d, subscr %d\n", cred, cred->magic, cred->put_addr, - atomic_long_read(&cred->usage), + atomic_read(&cred->usage), read_cred_subscribers(cred)); #else - if (atomic_long_read(&cred->usage) != 0) - panic("CRED: put_cred_rcu() sees %p with usage %ld\n", - cred, atomic_long_read(&cred->usage)); + if (atomic_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %d\n", + cred, atomic_read(&cred->usage)); #endif security_cred_free(cred); @@ -134,11 +134,11 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%ld,%d})", cred, - atomic_long_read(&cred->usage), + kdebug("__put_cred(%p{%d,%d})", cred, + atomic_read(&cred->usage), read_cred_subscribers(cred)); - BUG_ON(atomic_long_read(&cred->usage) != 0); + BUG_ON(atomic_read(&cred->usage) != 0); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(cred) != 0); cred->magic = CRED_MAGIC_DEAD; @@ -161,8 +161,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *cred; - kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), + kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); cred = (struct cred *) tsk->real_cred; @@ -197,7 +197,7 @@ const struct cred *get_task_cred(struct task_struct *task) do { cred = __task_cred((task)); BUG_ON(!cred); - } while (!atomic_long_inc_not_zero(&((struct cred *)cred)->usage)); + } while (!atomic_inc_not_zero(&((struct cred *)cred)->usage)); rcu_read_unlock(); return cred; @@ -215,7 +215,7 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; - atomic_long_set(&new->usage, 1); + atomic_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif @@ -262,7 +262,7 @@ struct cred *prepare_creds(void) memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; - atomic_long_set(&new->usage, 1); + atomic_set(&new->usage, 1); set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); @@ -338,8 +338,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->real_cred = get_cred(p->cred); get_cred(p->cred); alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%ld,%d})", - p->cred, atomic_long_read(&p->cred->usage), + kdebug("share_creds(%p{%d,%d})", + p->cred, atomic_read(&p->cred->usage), read_cred_subscribers(p->cred)); atomic_inc(&p->cred->user->processes); return 0; @@ -429,8 +429,8 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), + kdebug("commit_creds(%p{%d,%d})", new, + atomic_read(&new->usage), read_cred_subscribers(new)); BUG_ON(task->cred != old); @@ -439,7 +439,7 @@ int commit_creds(struct cred *new) validate_creds(old); validate_creds(new); #endif - BUG_ON(atomic_long_read(&new->usage) < 1); + BUG_ON(atomic_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -512,14 +512,14 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), + kdebug("abort_creds(%p{%d,%d})", new, + atomic_read(&new->usage), read_cred_subscribers(new)); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(new) != 0); #endif - BUG_ON(atomic_long_read(&new->usage) < 1); + BUG_ON(atomic_read(&new->usage) < 1); put_cred(new); } EXPORT_SYMBOL(abort_creds); @@ -535,8 +535,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), + kdebug("override_creds(%p{%d,%d})", new, + atomic_read(&new->usage), read_cred_subscribers(new)); validate_creds(old); @@ -558,8 +558,8 @@ const struct cred *override_creds(const struct cred *new) rcu_assign_pointer(current->cred, new); alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%ld,%d}", old, - atomic_long_read(&old->usage), + kdebug("override_creds() = %p{%d,%d}", old, + atomic_read(&old->usage), read_cred_subscribers(old)); return old; } @@ -576,8 +576,8 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%ld,%d})", old, - atomic_long_read(&old->usage), + kdebug("revert_creds(%p{%d,%d})", old, + atomic_read(&old->usage), read_cred_subscribers(old)); validate_creds(old); @@ -637,7 +637,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) *new = *old; new->non_rcu = 0; - atomic_long_set(&new->usage, 1); + atomic_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); @@ -760,8 +760,8 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, cred == tsk->cred ? "[eff]" : ""); printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n", cred->magic, cred->put_addr); - printk(KERN_ERR "CRED: ->usage=%ld, subscr=%d\n", - atomic_long_read(&cred->usage), + printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n", + atomic_read(&cred->usage), read_cred_subscribers(cred)); printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n", from_kuid_munged(&init_user_ns, cred->uid), @@ -833,9 +833,9 @@ EXPORT_SYMBOL(__validate_process_creds); */ void validate_creds_for_do_exit(struct task_struct *tsk) { - kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", + kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})", tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), + atomic_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); __validate_process_creds(tsk, __FILE__, __LINE__);