From a8140078096adc7475baef9594b933331408cb00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:43:58 -0400 Subject: [PATCH 01/94] NFS/pNFS: Report EINVAL errors from connect() to the server [ Upstream commit dd7d7ee3ba2a70d12d02defb478790cf57d5b87b ] With IPv6, connect() can occasionally return EINVAL if a route is unavailable. If this happens during I/O to a data server, we want to report it using LAYOUTERROR as an inability to connect. Fixes: dd52128afdde ("NFSv4.1/pnfs Ensure flexfiles reports all connection related errors") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index fee421da2197..bb10f2b21cc1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1189,6 +1189,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -EPFNOSUPPORT: case -EPROTONOSUPPORT: case -EOPNOTSUPP: + case -EINVAL: case -ECONNREFUSED: case -ECONNRESET: case -EHOSTDOWN: From 36b4cf8da3433090f8cdc89a4a8987bad695ab32 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Dec 2021 08:20:47 +0100 Subject: [PATCH 02/94] ata: ahci: Drop pointless VPRINTK() calls and convert the remaining ones [ Upstream commit 93c7711494f47f9c829321e2a8711671b02f6e4c ] Drop pointless VPRINTK() calls for entering and existing interrupt routines and convert the remaining calls to dev_dbg(). Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal Stable-dep-of: 737dd811a3db ("ata: libahci: clear pending interrupt status") Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 4 +--- drivers/ata/ahci_xgene.c | 4 ---- drivers/ata/libahci.c | 18 ++++-------------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 13fb983b3413..2e4bcf2d0a51 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -694,7 +694,7 @@ static void ahci_pci_init_controller(struct ata_host *host) /* clear port IRQ */ tmp = readl(port_mmio + PORT_IRQ_STAT); - VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp); + dev_dbg(&pdev->dev, "PORT_IRQ_STAT 0x%x\n", tmp); if (tmp) writel(tmp, port_mmio + PORT_IRQ_STAT); } @@ -1504,7 +1504,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) u32 irq_stat, irq_masked; unsigned int handled = 1; - VPRINTK("ENTER\n"); hpriv = host->private_data; mmio = hpriv->mmio; irq_stat = readl(mmio + HOST_IRQ_STAT); @@ -1521,7 +1520,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) irq_stat = readl(mmio + HOST_IRQ_STAT); spin_unlock(&host->lock); } while (irq_stat); - VPRINTK("EXIT\n"); return IRQ_RETVAL(handled); } diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index 7e157e1bf65e..04ad6a225014 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -601,8 +601,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) void __iomem *mmio; u32 irq_stat, irq_masked; - VPRINTK("ENTER\n"); - hpriv = host->private_data; mmio = hpriv->mmio; @@ -625,8 +623,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) spin_unlock(&host->lock); - VPRINTK("EXIT\n"); - return IRQ_RETVAL(rc); } diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index f1153e7ba3b3..00eef92e91e5 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1226,12 +1226,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, /* clear SError */ tmp = readl(port_mmio + PORT_SCR_ERR); - VPRINTK("PORT_SCR_ERR 0x%x\n", tmp); + dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); writel(tmp, port_mmio + PORT_SCR_ERR); /* clear port IRQ */ tmp = readl(port_mmio + PORT_IRQ_STAT); - VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp); + dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); if (tmp) writel(tmp, port_mmio + PORT_IRQ_STAT); @@ -1262,10 +1262,10 @@ void ahci_init_controller(struct ata_host *host) } tmp = readl(mmio + HOST_CTL); - VPRINTK("HOST_CTL 0x%x\n", tmp); + dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp); writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL); tmp = readl(mmio + HOST_CTL); - VPRINTK("HOST_CTL 0x%x\n", tmp); + dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp); } EXPORT_SYMBOL_GPL(ahci_init_controller); @@ -1916,8 +1916,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) void __iomem *port_mmio = ahci_port_base(ap); u32 status; - VPRINTK("ENTER\n"); - status = readl(port_mmio + PORT_IRQ_STAT); writel(status, port_mmio + PORT_IRQ_STAT); @@ -1925,8 +1923,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) ahci_handle_port_interrupt(ap, port_mmio, status); spin_unlock(ap->lock); - VPRINTK("EXIT\n"); - return IRQ_HANDLED; } @@ -1943,9 +1939,7 @@ u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked) ap = host->ports[i]; if (ap) { ahci_port_intr(ap); - VPRINTK("port %u\n", i); } else { - VPRINTK("port %u (no irq)\n", i); if (ata_ratelimit()) dev_warn(host->dev, "interrupt on disabled port %u\n", i); @@ -1966,8 +1960,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) void __iomem *mmio; u32 irq_stat, irq_masked; - VPRINTK("ENTER\n"); - hpriv = host->private_data; mmio = hpriv->mmio; @@ -1995,8 +1987,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) spin_unlock(&host->lock); - VPRINTK("EXIT\n"); - return IRQ_RETVAL(rc); } From ec6e1bc8eed3f16ec9713a2ddb2b3bc6a5a655a2 Mon Sep 17 00:00:00 2001 From: Szuying Chen Date: Thu, 7 Sep 2023 16:17:10 +0800 Subject: [PATCH 03/94] ata: libahci: clear pending interrupt status [ Upstream commit 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 ] When a CRC error occurs, the HBA asserts an interrupt to indicate an interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then does error recovery. If the adapter receives another SDB FIS with an error (PxIS.TFES) from the device before the start of the EH recovery process, the interrupt signaling the new SDB cannot be serviced as PxIE was cleared already. This in turn results in the HBA inability to issue any command during the error recovery process after setting PxCMD.ST to 1 because PxIS.TFES is still set. According to AHCI 1.3.1 specifications section 6.2.2, fatal errors notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will cause the HBA to enter the ERR:Fatal state. In this state, the HBA shall not issue any new commands. To avoid this situation, introduce the function ahci_port_clear_pending_irq() to clear pending interrupts before executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2 specification. Signed-off-by: Szuying Chen Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset") Cc: stable@vger.kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 00eef92e91e5..b93fad6939da 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1210,6 +1210,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf) return sprintf(buf, "%d\n", emp->blink_policy); } +static void ahci_port_clear_pending_irq(struct ata_port *ap) +{ + struct ahci_host_priv *hpriv = ap->host->private_data; + void __iomem *port_mmio = ahci_port_base(ap); + u32 tmp; + + /* clear SError */ + tmp = readl(port_mmio + PORT_SCR_ERR); + dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp); + writel(tmp, port_mmio + PORT_SCR_ERR); + + /* clear port IRQ */ + tmp = readl(port_mmio + PORT_IRQ_STAT); + dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp); + if (tmp) + writel(tmp, port_mmio + PORT_IRQ_STAT); + + writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT); +} + static void ahci_port_init(struct device *dev, struct ata_port *ap, int port_no, void __iomem *mmio, void __iomem *port_mmio) @@ -1224,18 +1244,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, if (rc) dev_warn(dev, "%s (%d)\n", emsg, rc); - /* clear SError */ - tmp = readl(port_mmio + PORT_SCR_ERR); - dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); - writel(tmp, port_mmio + PORT_SCR_ERR); - - /* clear port IRQ */ - tmp = readl(port_mmio + PORT_IRQ_STAT); - dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); - if (tmp) - writel(tmp, port_mmio + PORT_IRQ_STAT); - - writel(1 << port_no, mmio + HOST_IRQ_STAT); + ahci_port_clear_pending_irq(ap); /* mark esata ports */ tmp = readl(port_mmio + PORT_CMD); @@ -1565,6 +1574,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, tf.command = ATA_BUSY; ata_tf_to_fis(&tf, 0, 0, d2h_fis); + ahci_port_clear_pending_irq(ap); + rc = sata_link_hardreset(link, timing, deadline, online, ahci_check_ready); From a477402de0e320363699f307810cb315986c4a81 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Sep 2023 19:04:45 +0200 Subject: [PATCH 04/94] netfilter: nf_tables: disallow element removal on anonymous sets [ Upstream commit 23a3bfd4ba7acd36abf52b78605f61b21bdac216 ] Anonymous sets need to be populated once at creation and then they are bound to rule since 938154b93be8 ("netfilter: nf_tables: reject unbound anonymous set before commit phase"), otherwise transaction reports EINVAL. Userspace does not need to delete elements of anonymous sets that are not yet bound, reject this with EOPNOTSUPP. From flush command path, skip anonymous sets, they are expected to be bound already. Otherwise, EINVAL is hit at the end of this transaction for unbound sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0ff8f1006c6b..3e3044116289 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -993,8 +993,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -4902,8 +4901,10 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && - (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { From 97ac63b13904e0f7c18d4a8eee7efd4c50832d70 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 26 May 2021 20:27:19 -0700 Subject: [PATCH 05/94] selftests/tls: Add {} to avoid static checker warning [ Upstream commit f50688b47c5858d2ff315d020332bf4cb6710837 ] This silences a static checker warning due to the unusual macro construction of EXPECT_*() by adding explicit {}s around the enclosing while loop. Reported-by: Dan Carpenter Fixes: 7f657d5bf507 ("selftests: tls: add selftests for TLS sockets") Signed-off-by: Kees Cook Signed-off-by: Shuah Khan Stable-dep-of: c326ca98446e ("selftests: tls: swap the TX and RX sockets in some tests") Signed-off-by: Sasha Levin --- tools/testing/selftests/net/tls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 7549d39ccaff..43bb9eadf03e 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -202,8 +202,9 @@ TEST_F(tls, sendmsg_large) EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); } - while (recvs++ < sends) + while (recvs++ < sends) { EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + } free(mem); } From 83ff1953aca93b7bde599500834be12c860d9e83 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 12 Sep 2023 16:16:25 +0200 Subject: [PATCH 06/94] selftests: tls: swap the TX and RX sockets in some tests [ Upstream commit c326ca98446e0ae4fee43a40acf79412b74cfedb ] tls.sendmsg_large and tls.sendmsg_multiple are trying to send through the self->cfd socket (only configured with TLS_RX) and to receive through the self->fd socket (only configured with TLS_TX), so they're not using kTLS at all. Swap the sockets. Fixes: 7f657d5bf507 ("selftests: tls: add selftests for TLS sockets") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 43bb9eadf03e..92adfe4df4e6 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -199,11 +199,11 @@ TEST_F(tls, sendmsg_large) msg.msg_iov = &vec; msg.msg_iovlen = 1; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); } while (recvs++ < sends) { - EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1); } free(mem); @@ -232,9 +232,9 @@ TEST_F(tls, sendmsg_multiple) msg.msg_iov = vec; msg.msg_iovlen = iov_len; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len); buf = malloc(total_len); - EXPECT_NE(recv(self->fd, buf, total_len, 0), -1); + EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1); for (i = 0; i < iov_len; i++) { EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp, strlen(test_strs[i])), From a2cf7bd75b3992e8df68dd5fdc6499b67d45f6e0 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 14 Sep 2023 22:12:57 -0700 Subject: [PATCH 07/94] ipv4: fix null-deref in ipv4_link_failure [ Upstream commit 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c ] Currently, we assume the skb is associated with a device before calling __ip_options_compile, which is not always the case if it is re-routed by ipvs. When skb->dev is NULL, dev_net(skb->dev) will become null-dereference. This patch adds a check for the edge case and switch to use the net_device from the rtable when skb->dev is NULL. Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Suggested-by: David Ahern Signed-off-by: Kyle Zeng Cc: Stephen Suryaputra Cc: Vadim Fedorenko Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 57e2316529d0..9753d07bfc0b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1215,6 +1215,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct net_device *dev; struct ip_options opt; int res; @@ -1232,7 +1233,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) From 76a3e6009929776c2e0bb96f87b391d321c5fc08 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 25 Aug 2023 11:26:01 +0530 Subject: [PATCH 08/94] powerpc/perf/hv-24x7: Update domain value check [ Upstream commit 4ff3ba4db5943cac1045e3e4a3c0463ea10f6930 ] Valid domain value is in range 1 to HV_PERF_DOMAIN_MAX. Current code has check for domain value greater than or equal to HV_PERF_DOMAIN_MAX. But the check for domain value 0 is missing. Fix this issue by adding check for domain value 0. Before: # ./perf stat -v -e hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ sleep 1 Using CPUID 00800200 Control descriptor is not initialized Error: The sys_perf_event_open() syscall returned with 5 (Input/output error) for event (hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/). /bin/dmesg | grep -i perf may provide additional information. Result from dmesg: [ 37.819387] hv-24x7: hcall failed: [0 0x60040000 0x100 0] => ret 0xfffffffffffffffc (-4) detail=0x2000000 failing ix=0 After: # ./perf stat -v -e hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ sleep 1 Using CPUID 00800200 Control descriptor is not initialized Warning: hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ event is not supported by the kernel. failed to read counter hv_24x7/CPM_ADJUNCT_INST,domain=0,core=1/ Fixes: ebd4a5a3ebd9 ("powerpc/perf/hv-24x7: Minor improvements") Reported-by: Krishan Gopal Sarawast Signed-off-by: Kajol Jain Tested-by: Disha Goel Signed-off-by: Michael Ellerman Link: https://msgid.link/20230825055601.360083-1-kjain@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/perf/hv-24x7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 2bb798918483..e6eb2b4cf97e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1326,7 +1326,7 @@ static int h_24x7_event_init(struct perf_event *event) } domain = event_get_domain(event); - if (domain >= HV_PERF_DOMAIN_MAX) { + if (domain == 0 || domain >= HV_PERF_DOMAIN_MAX) { pr_devel("invalid domain %d\n", domain); return -EINVAL; } From 57fb8b599ec66c3dfa5467473994e663036db82a Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 18 Sep 2023 15:48:40 +0800 Subject: [PATCH 09/94] net: hns3: add 5ms delay before clear firmware reset irq source [ Upstream commit 0770063096d5da4a8e467b6e73c1646a75589628 ] Currently the reset process in hns3 and firmware watchdog init process is asynchronous. we think firmware watchdog initialization is completed before hns3 clear the firmware interrupt source. However, firmware initialization may not complete early. so we add delay before hns3 clear firmware interrupt source and 5 ms delay is enough to avoid second firmware reset interrupt. Fixes: c1a81619d73a ("net: hns3: Add mailbox interrupt handling to PF driver") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2c334b56fd42..d668d25ae7e7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2517,8 +2517,13 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) static void hclge_clear_event_cause(struct hclge_dev *hdev, u32 event_type, u32 regclr) { +#define HCLGE_IMP_RESET_DELAY 5 + switch (event_type) { case HCLGE_VECTOR0_EVENT_RST: + if (regclr == BIT(HCLGE_VECTOR0_IMPRESET_INT_B)) + mdelay(HCLGE_IMP_RESET_DELAY); + hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, regclr); break; case HCLGE_VECTOR0_EVENT_MBX: From 2b601fcacd30ea9b3f777490e7c788b548389991 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2022 08:53:55 +0000 Subject: [PATCH 10/94] net: add atomic_long_t to net_device_stats fields [ Upstream commit 6c1c5097781f563b70a81683ea6fdac21637573b ] Long standing KCSAN issues are caused by data-race around some dev->stats changes. Most performance critical paths already use per-cpu variables, or per-queue ones. It is reasonable (and more correct) to use atomic operations for the slow paths. This patch adds an union for each field of net_device_stats, so that we can convert paths that are not yet protected by a spinlock or a mutex. netdev_stats_to_stats64() no longer has an #if BITS_PER_LONG==64 Note that the memcpy() we were using on 64bit arches had no provision to avoid load-tearing, while atomic_long_read() is providing the needed protection at no cost. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Stable-dep-of: 44bdb313da57 ("net: bridge: use DEV_STATS_INC()") Signed-off-by: Sasha Levin --- include/linux/netdevice.h | 58 +++++++++++++++++++++++---------------- include/net/dst.h | 5 ++-- net/core/dev.c | 14 ++-------- 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e9df3854420..e977118111f6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -162,31 +162,38 @@ static inline bool dev_xmit_complete(int rc) * (unsigned long) so they can be read and written atomically. */ +#define NET_DEV_STAT(FIELD) \ + union { \ + unsigned long FIELD; \ + atomic_long_t __##FIELD; \ + } + struct net_device_stats { - unsigned long rx_packets; - unsigned long tx_packets; - unsigned long rx_bytes; - unsigned long tx_bytes; - unsigned long rx_errors; - unsigned long tx_errors; - unsigned long rx_dropped; - unsigned long tx_dropped; - unsigned long multicast; - unsigned long collisions; - unsigned long rx_length_errors; - unsigned long rx_over_errors; - unsigned long rx_crc_errors; - unsigned long rx_frame_errors; - unsigned long rx_fifo_errors; - unsigned long rx_missed_errors; - unsigned long tx_aborted_errors; - unsigned long tx_carrier_errors; - unsigned long tx_fifo_errors; - unsigned long tx_heartbeat_errors; - unsigned long tx_window_errors; - unsigned long rx_compressed; - unsigned long tx_compressed; + NET_DEV_STAT(rx_packets); + NET_DEV_STAT(tx_packets); + NET_DEV_STAT(rx_bytes); + NET_DEV_STAT(tx_bytes); + NET_DEV_STAT(rx_errors); + NET_DEV_STAT(tx_errors); + NET_DEV_STAT(rx_dropped); + NET_DEV_STAT(tx_dropped); + NET_DEV_STAT(multicast); + NET_DEV_STAT(collisions); + NET_DEV_STAT(rx_length_errors); + NET_DEV_STAT(rx_over_errors); + NET_DEV_STAT(rx_crc_errors); + NET_DEV_STAT(rx_frame_errors); + NET_DEV_STAT(rx_fifo_errors); + NET_DEV_STAT(rx_missed_errors); + NET_DEV_STAT(tx_aborted_errors); + NET_DEV_STAT(tx_carrier_errors); + NET_DEV_STAT(tx_fifo_errors); + NET_DEV_STAT(tx_heartbeat_errors); + NET_DEV_STAT(tx_window_errors); + NET_DEV_STAT(rx_compressed); + NET_DEV_STAT(tx_compressed); }; +#undef NET_DEV_STAT #include @@ -4842,4 +4849,9 @@ do { \ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) +/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */ +#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) +#define DEV_STATS_ADD(DEV, FIELD, VAL) \ + atomic_long_add((VAL), &(DEV)->stats.__##FIELD) + #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/net/dst.h b/include/net/dst.h index 50258a813137..97267997601f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -362,9 +362,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, struct net *net) { - /* TODO : stats should be SMP safe */ - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + DEV_STATS_INC(dev, rx_packets); + DEV_STATS_ADD(dev, rx_bytes, skb->len); __skb_tunnel_rx(skb, dev, net); } diff --git a/net/core/dev.c b/net/core/dev.c index a9c8660a2570..3bf40c288c03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9035,24 +9035,16 @@ void netdev_run_todo(void) void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats) { -#if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); - /* zero out counters that only exist in rtnl_link_stats64 */ - memset((char *)stats64 + sizeof(*netdev_stats), 0, - sizeof(*stats64) - sizeof(*netdev_stats)); -#else - size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); - const unsigned long *src = (const unsigned long *)netdev_stats; + size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t); + const atomic_long_t *src = (atomic_long_t *)netdev_stats; u64 *dst = (u64 *)stats64; BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = src[i]; + dst[i] = atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); -#endif } EXPORT_SYMBOL(netdev_stats_to_stats64); From d2346e6beb699909ca455d9d20c4e577ce900839 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2023 09:13:51 +0000 Subject: [PATCH 11/94] net: bridge: use DEV_STATS_INC() [ Upstream commit 44bdb313da57322c9b3c108eb66981c6ec6509f4 ] syzbot/KCSAN reported data-races in br_handle_frame_finish() [1] This function can run from multiple cpus without mutual exclusion. Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. Handles updates to dev->stats.tx_dropped while we are at it. [1] BUG: KCSAN: data-race in br_handle_frame_finish / br_handle_frame_finish read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 1: br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189 br_nf_hook_thresh+0x1ed/0x220 br_nf_pre_routing_finish_ipv6+0x50f/0x540 NF_HOOK include/linux/netfilter.h:304 [inline] br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178 br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508 nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline] nf_hook_bridge_pre net/bridge/br_input.c:272 [inline] br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417 __netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417 __netif_receive_skb_one_core net/core/dev.c:5521 [inline] __netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637 process_backlog+0x21f/0x380 net/core/dev.c:5965 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 run_ksoftirqd+0x17/0x20 kernel/softirq.c:921 smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 0: br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189 br_nf_hook_thresh+0x1ed/0x220 br_nf_pre_routing_finish_ipv6+0x50f/0x540 NF_HOOK include/linux/netfilter.h:304 [inline] br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178 br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508 nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline] nf_hook_bridge_pre net/bridge/br_input.c:272 [inline] br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417 __netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417 __netif_receive_skb_one_core net/core/dev.c:5521 [inline] __netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637 process_backlog+0x21f/0x380 net/core/dev.c:5965 __napi_poll+0x60/0x3b0 net/core/dev.c:6527 napi_poll net/core/dev.c:6594 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6727 __do_softirq+0xc1/0x265 kernel/softirq.c:553 do_softirq+0x5e/0x90 kernel/softirq.c:454 __local_bh_enable_ip+0x64/0x70 kernel/softirq.c:381 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline] _raw_spin_unlock_bh+0x36/0x40 kernel/locking/spinlock.c:210 spin_unlock_bh include/linux/spinlock.h:396 [inline] batadv_tt_local_purge+0x1a8/0x1f0 net/batman-adv/translation-table.c:1356 batadv_tt_purge+0x2b/0x630 net/batman-adv/translation-table.c:3560 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 value changed: 0x00000000000d7190 -> 0x00000000000d7191 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 14848 Comm: kworker/u4:11 Not tainted 6.6.0-rc1-syzkaller-00236-gad8a69f361b9 #0 Fixes: 1c29fc4989bc ("[BRIDGE]: keep track of received multicast packets") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: bridge@lists.linux-foundation.org Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20230918091351.1356153-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/bridge/br_forward.c | 4 ++-- net/bridge/br_input.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48ddc60b4fbd..c07a47d65c39 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -122,7 +122,7 @@ static int deliver_clone(const struct net_bridge_port *prev, skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } @@ -261,7 +261,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, skb = skb_copy(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 14c2fdc268ea..f3938337ff87 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -146,12 +146,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } mcast_hit = true; } else { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } break; case BR_PKT_UNICAST: From a7fb47b9711101d2405b0eb1276fb1f9b9b270c7 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 18 Sep 2023 20:30:11 +0800 Subject: [PATCH 12/94] team: fix null-ptr-deref when team device type is changed [ Upstream commit 492032760127251e5540a5716a70996bacf2a3fd ] Get a null-ptr-deref bug as follows with reproducer [1]. BUG: kernel NULL pointer dereference, address: 0000000000000228 ... RIP: 0010:vlan_dev_hard_header+0x35/0x140 [8021q] ... Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x82/0x150 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? vlan_dev_hard_header+0x35/0x140 [8021q] ? vlan_dev_hard_header+0x8e/0x140 [8021q] neigh_connected_output+0xb2/0x100 ip6_finish_output2+0x1cb/0x520 ? nf_hook_slow+0x43/0xc0 ? ip6_mtu+0x46/0x80 ip6_finish_output+0x2a/0xb0 mld_sendpack+0x18f/0x250 mld_ifc_work+0x39/0x160 process_one_work+0x1e6/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 [1] $ teamd -t team0 -d -c '{"runner": {"name": "loadbalance"}}' $ ip link add name t-dummy type dummy $ ip link add link t-dummy name t-dummy.100 type vlan id 100 $ ip link add name t-nlmon type nlmon $ ip link set t-nlmon master team0 $ ip link set t-nlmon nomaster $ ip link set t-dummy up $ ip link set team0 up $ ip link set t-dummy.100 down $ ip link set t-dummy.100 master team0 When enslave a vlan device to team device and team device type is changed from non-ether to ether, header_ops of team device is changed to vlan_header_ops. That is incorrect and will trigger null-ptr-deref for vlan->real_dev in vlan_dev_hard_header() because team device is not a vlan device. Cache eth_header_ops in team_setup(), then assign cached header_ops to header_ops of team net device when its type is changed from non-ether to ether to fix the bug. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Suggested-by: Hangbin Liu Reviewed-by: Hangbin Liu Signed-off-by: Ziyang Xuan Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230918123011.1884401-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/team/team.c | 10 +++++++++- include/linux/if_team.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 8b5e1ec6aabf..08f9530fd5b1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2095,7 +2095,12 @@ static const struct ethtool_ops team_ethtool_ops = { static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { - dev->header_ops = port_dev->header_ops; + struct team *team = netdev_priv(dev); + + if (port_dev->type == ARPHRD_ETHER) + dev->header_ops = team->header_ops_cache; + else + dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; @@ -2142,8 +2147,11 @@ static int team_dev_type_check_change(struct net_device *dev, static void team_setup(struct net_device *dev) { + struct team *team = netdev_priv(dev); + ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; + team->header_ops_cache = dev->header_ops; dev->netdev_ops = &team_netdev_ops; dev->ethtool_ops = &team_ethtool_ops; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index ac42da56f7a2..fd32538ae705 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -196,6 +196,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* From 8be96f43cc12cb9fabf8bd9962a266c175ca92fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Sep 2023 08:13:21 +0200 Subject: [PATCH 13/94] gpio: tb10x: Fix an error handling path in tb10x_gpio_probe() [ Upstream commit b547b5e52a0587e6b25ea520bf2f9e03d00cbcb6 ] If an error occurs after a successful irq_domain_add_linear() call, it should be undone by a corresponding irq_domain_remove(), as already done in the remove function. Fixes: c6ce2b6bffe5 ("gpio: add TB10x GPIO driver") Signed-off-by: Christophe JAILLET Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-tb10x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c index a12cd0b5c972..1d80bae86ec9 100644 --- a/drivers/gpio/gpio-tb10x.c +++ b/drivers/gpio/gpio-tb10x.c @@ -246,7 +246,7 @@ static int tb10x_gpio_probe(struct platform_device *pdev) handle_edge_irq, IRQ_NOREQUEST, IRQ_NOPROBE, IRQ_GC_INIT_MASK_CACHE); if (ret) - return ret; + goto err_remove_domain; gc = tb10x_gpio->domain->gc->gc[0]; gc->reg_base = tb10x_gpio->base; @@ -260,6 +260,10 @@ static int tb10x_gpio_probe(struct platform_device *pdev) } return 0; + +err_remove_domain: + irq_domain_remove(tb10x_gpio->domain); + return ret; } static int tb10x_gpio_remove(struct platform_device *pdev) From 6e6ebca74ec87622fc442691e24774b00ce1d5ce Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Thu, 3 Mar 2022 20:39:14 +0800 Subject: [PATCH 14/94] i2c: mux: demux-pinctrl: check the return value of devm_kstrdup() [ Upstream commit 7c0195fa9a9e263df204963f88a22b21688ffb66 ] devm_kstrdup() returns pointer to allocated string on success, NULL on failure. So it is better to check the return value of it. Fixes: e35478eac030 ("i2c: mux: demux-pinctrl: run properly with multiple instances") Signed-off-by: Xiaoke Wang Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 1b99d0b928a0..092ebc08549f 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -244,6 +244,10 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) props[i].name = devm_kstrdup(&pdev->dev, "status", GFP_KERNEL); props[i].value = devm_kstrdup(&pdev->dev, "ok", GFP_KERNEL); + if (!props[i].name || !props[i].value) { + err = -ENOMEM; + goto err_rollback; + } props[i].length = 3; of_changeset_init(&priv->chan[i].chgset); From 8e29477d3f232f67756329ee6241901157725f74 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 12 Jul 2023 11:56:51 -0700 Subject: [PATCH 15/94] Input: i8042 - add quirk for TUXEDO Gemini 17 Gen1/Clevo PD70PN [ Upstream commit eb09074bdb05ffd6bfe77f8b4a41b76ef78c997b ] The touchpad of this device is both connected via PS/2 and i2c. This causes strange behavior when both driver fight for control. The easy fix is to prevent the PS/2 driver from accessing the mouse port as the full feature set of the touchpad is only supported in the i2c interface anyway. The strange behavior in this case is, that when an external screen is connected and the notebook is closed, the pointer on the external screen is moving to the lower right corner. When the notebook is opened again, this movement stops, but the touchpad clicks are unresponsive afterwards until reboot. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230607173331.851192-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index da2bf8259330..0cf9a3787326 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -1188,6 +1188,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */ + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PD5x_7xPNP_PNR_PNN_PNT"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170SM"), From f892a5b5bc10db973b4059b3b9e809cf3b598328 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 21 Dec 2018 09:33:44 -0800 Subject: [PATCH 16/94] scsi: qla2xxx: Add protection mask module parameters [ Upstream commit 7855d2ba1172d716d96a628af7c5bafa5725ac57 ] Allow user to selectively enable/disable DIF/DIX protection capabilities mask. Signed-off-by: Martin K. Petersen Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Stable-dep-of: e9105c4b7a92 ("scsi: qla2xxx: Remove unsupported ql2xenabledif option") Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_os.c | 36 +++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4580774b2c3e..27514d0abe84 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -277,6 +277,20 @@ MODULE_PARM_DESC(qla2xuseresexchforels, "Reserve 1/2 of emergency exchanges for ELS.\n" " 0 (default): disabled"); +int ql2xprotmask; +module_param(ql2xprotmask, int, 0644); +MODULE_PARM_DESC(ql2xprotmask, + "Override DIF/DIX protection capabilities mask\n" + "Default is 0 which sets protection mask based on " + "capabilities reported by HBA firmware.\n"); + +int ql2xprotguard; +module_param(ql2xprotguard, int, 0644); +MODULE_PARM_DESC(ql2xprotguard, "Override choice of DIX checksum\n" + " 0 -- Let HBA firmware decide\n" + " 1 -- Force T10 CRC\n" + " 2 -- Force IP checksum\n"); + /* * SCSI host template entry points */ @@ -3293,13 +3307,16 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) "Registering for DIF/DIX type 1 and 3 protection.\n"); if (ql2xenabledif == 1) prot = SHOST_DIX_TYPE0_PROTECTION; - scsi_host_set_prot(host, - prot | SHOST_DIF_TYPE1_PROTECTION - | SHOST_DIF_TYPE2_PROTECTION - | SHOST_DIF_TYPE3_PROTECTION - | SHOST_DIX_TYPE1_PROTECTION - | SHOST_DIX_TYPE2_PROTECTION - | SHOST_DIX_TYPE3_PROTECTION); + if (ql2xprotmask) + scsi_host_set_prot(host, ql2xprotmask); + else + scsi_host_set_prot(host, + prot | SHOST_DIF_TYPE1_PROTECTION + | SHOST_DIF_TYPE2_PROTECTION + | SHOST_DIF_TYPE3_PROTECTION + | SHOST_DIX_TYPE1_PROTECTION + | SHOST_DIX_TYPE2_PROTECTION + | SHOST_DIX_TYPE3_PROTECTION); guard = SHOST_DIX_GUARD_CRC; @@ -3307,7 +3324,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) (ql2xenabledif > 1 || IS_PI_DIFB_DIX0_CAPABLE(ha))) guard |= SHOST_DIX_GUARD_IP; - scsi_host_set_guard(host, guard); + if (ql2xprotguard) + scsi_host_set_guard(host, ql2xprotguard); + else + scsi_host_set_guard(host, guard); } else base_vha->flags.difdix_supported = 0; } From 75d6b6babbea0ed42cc0a60a5f8d30301b1b17fa Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Mon, 21 Aug 2023 18:30:42 +0530 Subject: [PATCH 17/94] scsi: qla2xxx: Remove unsupported ql2xenabledif option [ Upstream commit e9105c4b7a9208a21a9bda133707624f12ddabc2 ] User accidently passed module parameter ql2xenabledif=1 which is unsupported. However, driver still initialized which lead to guard tag errors during device discovery. Remove unsupported ql2xenabledif=1 option and validate the user input. Cc: stable@vger.kernel.org Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230821130045.34850-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_attr.c | 2 -- drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_os.c | 9 +++++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 6c9095d0aa0f..9a25e92ef1ab 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2088,8 +2088,6 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable) vha->flags.difdix_supported = 1; ql_dbg(ql_dbg_user, vha, 0x7082, "Registered for DIF/DIX type 1 and 3 protection.\n"); - if (ql2xenabledif == 1) - prot = SHOST_DIX_TYPE0_PROTECTION; scsi_host_set_prot(vha->host, prot | SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 36871760a5d3..fcbadd41856c 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -22,7 +22,7 @@ * | Queue Command and IO tracing | 0x3074 | 0x300b | * | | | 0x3027-0x3028 | * | | | 0x303d-0x3041 | - * | | | 0x302d,0x3033 | + * | | | 0x302e,0x3033 | * | | | 0x3036,0x3038 | * | | | 0x303a | * | DPC Thread | 0x4023 | 0x4002,0x4013 | diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 27514d0abe84..36dca08166f2 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3069,6 +3069,13 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->max_id = ha->max_fibre_devices; host->cmd_per_lun = 3; host->unique_id = host->host_no; + + if (ql2xenabledif && ql2xenabledif != 2) { + ql_log(ql_log_warn, base_vha, 0x302d, + "Invalid value for ql2xenabledif, resetting it to default (2)\n"); + ql2xenabledif = 2; + } + if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) host->max_cmd_len = 32; else @@ -3305,8 +3312,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->flags.difdix_supported = 1; ql_dbg(ql_dbg_init, base_vha, 0x00f1, "Registering for DIF/DIX type 1 and 3 protection.\n"); - if (ql2xenabledif == 1) - prot = SHOST_DIX_TYPE0_PROTECTION; if (ql2xprotmask) scsi_host_set_prot(host, ql2xprotmask); else From c718a19fa2d7c4b8cc8f213cdc2a851a7c26ae3f Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Tue, 7 May 2019 10:05:36 -0700 Subject: [PATCH 18/94] scsi: megaraid_sas: Load balance completions across all MSI-X [ Upstream commit 1d15d9098ad12b0021ac5a6b851f26d1ab021e5a ] Driver will use "reply descriptor post queues" in round robin fashion when the combined MSI-X mode is not enabled. With this IO completions are distributed and load balanced across all the available reply descriptor post queues equally. This is enabled only if combined MSI-X mode is not enabled in firmware. This improves performance and also fixes soft lockups. When load balancing is enabled, IRQ affinity from driver needs to be disabled. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Stable-dep-of: 0b0747d507bf ("scsi: megaraid_sas: Fix deadlock on firmware crashdump") Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas.h | 3 +++ drivers/scsi/megaraid/megaraid_sas_base.c | 22 +++++++++++++++++---- drivers/scsi/megaraid/megaraid_sas_fusion.c | 18 +++++++++++++---- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 67d356d84717..80ae48b65b60 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2193,6 +2193,7 @@ struct megasas_instance { u32 secure_jbod_support; u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */ bool use_seqnum_jbod_fp; /* Added for PD sequence */ + bool smp_affinity_enable; spinlock_t crashdump_lock; struct megasas_register_set __iomem *reg_set; @@ -2210,6 +2211,7 @@ struct megasas_instance { u16 ldio_threshold; u16 cur_can_queue; u32 max_sectors_per_req; + bool msix_load_balance; struct megasas_aen_event *ev; struct megasas_cmd **cmd_list; @@ -2246,6 +2248,7 @@ struct megasas_instance { atomic_t sge_holes_type1; atomic_t sge_holes_type2; atomic_t sge_holes_type3; + atomic64_t total_io_count; struct megasas_instance_template *instancet; struct tasklet_struct isr_tasklet; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8d1df03386b4..2b6b6d3deba8 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5101,6 +5101,7 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe) &instance->irq_context[j]); /* Retry irq register for IO_APIC*/ instance->msix_vectors = 0; + instance->msix_load_balance = false; if (is_probe) { pci_free_irq_vectors(instance->pdev); return megasas_setup_irqs_ioapic(instance); @@ -5109,6 +5110,7 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe) } } } + return 0; } @@ -5364,6 +5366,12 @@ static int megasas_init_fw(struct megasas_instance *instance) if (rdpq_enable) instance->is_rdpq = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; + + if (!instance->msix_combined) { + instance->msix_load_balance = true; + instance->smp_affinity_enable = false; + } + fw_msix_count = instance->msix_vectors; /* Save 1-15 reply post index address to local memory * Index 0 is already saved from reg offset @@ -5382,17 +5390,20 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->msix_vectors); } else /* MFI adapters */ instance->msix_vectors = 1; + /* Don't bother allocating more MSI-X vectors than cpus */ instance->msix_vectors = min(instance->msix_vectors, (unsigned int)num_online_cpus()); - if (smp_affinity_enable) + if (instance->smp_affinity_enable) irq_flags |= PCI_IRQ_AFFINITY; i = pci_alloc_irq_vectors(instance->pdev, 1, instance->msix_vectors, irq_flags); - if (i > 0) + if (i > 0) { instance->msix_vectors = i; - else + } else { instance->msix_vectors = 0; + instance->msix_load_balance = false; + } } /* * MSI-X host index 0 is common for all adapter. @@ -6447,6 +6458,7 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) INIT_LIST_HEAD(&instance->internal_reset_pending_q); atomic_set(&instance->fw_outstanding, 0); + atomic64_set(&instance->total_io_count, 0); init_waitqueue_head(&instance->int_cmd_wait_q); init_waitqueue_head(&instance->abort_cmd_wait_q); @@ -6469,6 +6481,8 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) instance->last_time = 0; instance->disableOnlineCtrlReset = 1; instance->UnevenSpanSupport = 0; + instance->smp_affinity_enable = smp_affinity_enable ? true : false; + instance->msix_load_balance = false; if (instance->adapter_type != MFI_SERIES) { INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq); @@ -6818,7 +6832,7 @@ megasas_resume(struct pci_dev *pdev) /* Now re-enable MSI-X */ if (instance->msix_vectors) { irq_flags = PCI_IRQ_MSIX; - if (smp_affinity_enable) + if (instance->smp_affinity_enable) irq_flags |= PCI_IRQ_AFFINITY; } rval = pci_alloc_irq_vectors(instance->pdev, 1, diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index b400167f9ad4..294e1a3a6adf 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2641,8 +2641,13 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, fp_possible = (io_info.fpOkForIo > 0) ? true : false; } - cmd->request_desc->SCSIIO.MSIxIndex = - instance->reply_map[raw_smp_processor_id()]; + if (instance->msix_load_balance) + cmd->request_desc->SCSIIO.MSIxIndex = + (mega_mod64(atomic64_add_return(1, &instance->total_io_count), + instance->msix_vectors)); + else + cmd->request_desc->SCSIIO.MSIxIndex = + instance->reply_map[raw_smp_processor_id()]; praid_context = &io_request->RaidContext; @@ -2969,8 +2974,13 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, cmd->request_desc->SCSIIO.DevHandle = io_request->DevHandle; - cmd->request_desc->SCSIIO.MSIxIndex = - instance->reply_map[raw_smp_processor_id()]; + if (instance->msix_load_balance) + cmd->request_desc->SCSIIO.MSIxIndex = + (mega_mod64(atomic64_add_return(1, &instance->total_io_count), + instance->msix_vectors)); + else + cmd->request_desc->SCSIIO.MSIxIndex = + instance->reply_map[raw_smp_processor_id()]; if (!fp_possible) { /* system pd firmware path */ From a96d7847367b586b3f39c59edde5dc4e2cdaeb08 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 28 Aug 2023 15:10:18 -0700 Subject: [PATCH 19/94] scsi: megaraid_sas: Fix deadlock on firmware crashdump [ Upstream commit 0b0747d507bffb827e40fc0f9fb5883fffc23477 ] The following processes run into a deadlock. CPU 41 was waiting for CPU 29 to handle a CSD request while holding spinlock "crashdump_lock", but CPU 29 was hung by that spinlock with IRQs disabled. PID: 17360 TASK: ffff95c1090c5c40 CPU: 41 COMMAND: "mrdiagd" !# 0 [ffffb80edbf37b58] __read_once_size at ffffffff9b871a40 include/linux/compiler.h:185:0 !# 1 [ffffb80edbf37b58] atomic_read at ffffffff9b871a40 arch/x86/include/asm/atomic.h:27:0 !# 2 [ffffb80edbf37b58] dump_stack at ffffffff9b871a40 lib/dump_stack.c:54:0 # 3 [ffffb80edbf37b78] csd_lock_wait_toolong at ffffffff9b131ad5 kernel/smp.c:364:0 # 4 [ffffb80edbf37b78] __csd_lock_wait at ffffffff9b131ad5 kernel/smp.c:384:0 # 5 [ffffb80edbf37bf8] csd_lock_wait at ffffffff9b13267a kernel/smp.c:394:0 # 6 [ffffb80edbf37bf8] smp_call_function_many at ffffffff9b13267a kernel/smp.c:843:0 # 7 [ffffb80edbf37c50] smp_call_function at ffffffff9b13279d kernel/smp.c:867:0 # 8 [ffffb80edbf37c50] on_each_cpu at ffffffff9b13279d kernel/smp.c:976:0 # 9 [ffffb80edbf37c78] flush_tlb_kernel_range at ffffffff9b085c4b arch/x86/mm/tlb.c:742:0 #10 [ffffb80edbf37cb8] __purge_vmap_area_lazy at ffffffff9b23a1e0 mm/vmalloc.c:701:0 #11 [ffffb80edbf37ce0] try_purge_vmap_area_lazy at ffffffff9b23a2cc mm/vmalloc.c:722:0 #12 [ffffb80edbf37ce0] free_vmap_area_noflush at ffffffff9b23a2cc mm/vmalloc.c:754:0 #13 [ffffb80edbf37cf8] free_unmap_vmap_area at ffffffff9b23bb3b mm/vmalloc.c:764:0 #14 [ffffb80edbf37cf8] remove_vm_area at ffffffff9b23bb3b mm/vmalloc.c:1509:0 #15 [ffffb80edbf37d18] __vunmap at ffffffff9b23bb8a mm/vmalloc.c:1537:0 #16 [ffffb80edbf37d40] vfree at ffffffff9b23bc85 mm/vmalloc.c:1612:0 #17 [ffffb80edbf37d58] megasas_free_host_crash_buffer [megaraid_sas] at ffffffffc020b7f2 drivers/scsi/megaraid/megaraid_sas_fusion.c:3932:0 #18 [ffffb80edbf37d80] fw_crash_state_store [megaraid_sas] at ffffffffc01f804d drivers/scsi/megaraid/megaraid_sas_base.c:3291:0 #19 [ffffb80edbf37dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 #20 [ffffb80edbf37dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 #21 [ffffb80edbf37de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 #22 [ffffb80edbf37e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 #23 [ffffb80edbf37ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 #24 [ffffb80edbf37ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 #25 [ffffb80edbf37ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 #26 [ffffb80edbf37f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 #27 [ffffb80edbf37f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 PID: 17355 TASK: ffff95c1090c3d80 CPU: 29 COMMAND: "mrdiagd" !# 0 [ffffb80f2d3c7d30] __read_once_size at ffffffff9b0f2ab0 include/linux/compiler.h:185:0 !# 1 [ffffb80f2d3c7d30] native_queued_spin_lock_slowpath at ffffffff9b0f2ab0 kernel/locking/qspinlock.c:368:0 # 2 [ffffb80f2d3c7d58] pv_queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/paravirt.h:674:0 # 3 [ffffb80f2d3c7d58] queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/qspinlock.h:53:0 # 4 [ffffb80f2d3c7d68] queued_spin_lock at ffffffff9b8961a6 include/asm-generic/qspinlock.h:90:0 # 5 [ffffb80f2d3c7d68] do_raw_spin_lock_flags at ffffffff9b8961a6 include/linux/spinlock.h:173:0 # 6 [ffffb80f2d3c7d68] __raw_spin_lock_irqsave at ffffffff9b8961a6 include/linux/spinlock_api_smp.h:122:0 # 7 [ffffb80f2d3c7d68] _raw_spin_lock_irqsave at ffffffff9b8961a6 kernel/locking/spinlock.c:160:0 # 8 [ffffb80f2d3c7d88] fw_crash_buffer_store [megaraid_sas] at ffffffffc01f8129 drivers/scsi/megaraid/megaraid_sas_base.c:3205:0 # 9 [ffffb80f2d3c7dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 #10 [ffffb80f2d3c7dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 #11 [ffffb80f2d3c7de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 #12 [ffffb80f2d3c7e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 #13 [ffffb80f2d3c7ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 #14 [ffffb80f2d3c7ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 #15 [ffffb80f2d3c7ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 #16 [ffffb80f2d3c7f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 #17 [ffffb80f2d3c7f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 The lock is used to synchronize different sysfs operations, it doesn't protect any resource that will be touched by an interrupt. Consequently it's not required to disable IRQs. Replace the spinlock with a mutex to fix the deadlock. Signed-off-by: Junxiao Bi Link: https://lore.kernel.org/r/20230828221018.19471-1-junxiao.bi@oracle.com Reviewed-by: Mike Christie Cc: stable@vger.kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas.h | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 21 +++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 80ae48b65b60..0d7cca9365aa 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2194,7 +2194,7 @@ struct megasas_instance { u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */ bool use_seqnum_jbod_fp; /* Added for PD sequence */ bool smp_affinity_enable; - spinlock_t crashdump_lock; + struct mutex crashdump_lock; struct megasas_register_set __iomem *reg_set; u32 __iomem *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY]; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 2b6b6d3deba8..bdfa36712fcc 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3004,14 +3004,13 @@ megasas_fw_crash_buffer_store(struct device *cdev, struct megasas_instance *instance = (struct megasas_instance *) shost->hostdata; int val = 0; - unsigned long flags; if (kstrtoint(buf, 0, &val) != 0) return -EINVAL; - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); instance->fw_crash_buffer_offset = val; - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return strlen(buf); } @@ -3027,17 +3026,16 @@ megasas_fw_crash_buffer_show(struct device *cdev, unsigned long dmachunk = CRASH_DMA_BUF_SIZE; unsigned long chunk_left_bytes; unsigned long src_addr; - unsigned long flags; u32 buff_offset; - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); buff_offset = instance->fw_crash_buffer_offset; if (!instance->crash_dump_buf || !((instance->fw_crash_state == AVAILABLE) || (instance->fw_crash_state == COPYING))) { dev_err(&instance->pdev->dev, "Firmware crash dump is not available\n"); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return -EINVAL; } @@ -3046,7 +3044,7 @@ megasas_fw_crash_buffer_show(struct device *cdev, if (buff_offset > (instance->fw_crash_buffer_size * dmachunk)) { dev_err(&instance->pdev->dev, "Firmware crash dump offset is out of range\n"); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return 0; } @@ -3058,7 +3056,7 @@ megasas_fw_crash_buffer_show(struct device *cdev, src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] + (buff_offset % dmachunk); memcpy(buf, (void *)src_addr, size); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); return size; } @@ -3083,7 +3081,6 @@ megasas_fw_crash_state_store(struct device *cdev, struct megasas_instance *instance = (struct megasas_instance *) shost->hostdata; int val = 0; - unsigned long flags; if (kstrtoint(buf, 0, &val) != 0) return -EINVAL; @@ -3097,9 +3094,9 @@ megasas_fw_crash_state_store(struct device *cdev, instance->fw_crash_state = val; if ((val == COPIED) || (val == COPY_ERROR)) { - spin_lock_irqsave(&instance->crashdump_lock, flags); + mutex_lock(&instance->crashdump_lock); megasas_free_host_crash_buffer(instance); - spin_unlock_irqrestore(&instance->crashdump_lock, flags); + mutex_unlock(&instance->crashdump_lock); if (val == COPY_ERROR) dev_info(&instance->pdev->dev, "application failed to " "copy Firmware crash dump\n"); @@ -6463,7 +6460,7 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) init_waitqueue_head(&instance->int_cmd_wait_q); init_waitqueue_head(&instance->abort_cmd_wait_q); - spin_lock_init(&instance->crashdump_lock); + mutex_init(&instance->crashdump_lock); spin_lock_init(&instance->mfi_pool_lock); spin_lock_init(&instance->hba_lock); spin_lock_init(&instance->stream_lock); From 77e6ea703ced87c0738626f6fe6ff1537d89262c Mon Sep 17 00:00:00 2001 From: Wang Jianchao Date: Sat, 24 Jul 2021 15:41:20 +0800 Subject: [PATCH 20/94] ext4: remove the 'group' parameter of ext4_trim_extent [ Upstream commit bd2eea8d0a6b6a9aca22f20bf74f73b71d8808af ] Get rid of the 'group' parameter of ext4_trim_extent as we can get it from the 'e4b'. Reviewed-by: Andreas Dilger Signed-off-by: Wang Jianchao Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210724074124.25731-2-jianchao.wan9@gmail.com Signed-off-by: Theodore Ts'o Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7692c12b8528..7b8109483175 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5149,19 +5149,19 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, * @sb: super block for the file system * @start: starting block of the free extent in the alloc. group * @count: number of blocks to TRIM - * @group: alloc. group we are working with * @e4b: ext4 buddy for the group * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ -static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) +static int ext4_trim_extent(struct super_block *sb, + int start, int count, struct ext4_buddy *e4b) __releases(bitlock) __acquires(bitlock) { struct ext4_free_extent ex; + ext4_group_t group = e4b->bd_group; int ret = 0; trace_ext4_trim_extent(sb, group, start, count); @@ -5237,8 +5237,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, next = mb_find_next_bit(bitmap, max + 1, start); if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, - next - start, group, &e4b); + ret = ext4_trim_extent(sb, start, next - start, &e4b); if (ret && ret != -EOPNOTSUPP) break; ret = 0; From 4a050f812100857cb7e0474d01a478334831f7eb Mon Sep 17 00:00:00 2001 From: Wang Jianchao Date: Sat, 24 Jul 2021 15:41:21 +0800 Subject: [PATCH 21/94] ext4: add new helper interface ext4_try_to_trim_range() [ Upstream commit 6920b3913235f517728bb69abe9b39047a987113 ] There is no functional change in this patch but just split the codes, which serachs free block and does trim, into a new function ext4_try_to_trim_range. This is preparing for the following async backgroup discard. Reviewed-by: Andreas Dilger Signed-off-by: Wang Jianchao Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210724074124.25731-3-jianchao.wan9@gmail.com Signed-off-by: Theodore Ts'o Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 102 ++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7b8109483175..51def652098b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5184,6 +5184,54 @@ __acquires(bitlock) return ret; } +static int ext4_try_to_trim_range(struct super_block *sb, + struct ext4_buddy *e4b, ext4_grpblk_t start, + ext4_grpblk_t max, ext4_grpblk_t minblocks) +{ + ext4_grpblk_t next, count, free_count; + void *bitmap; + int ret = 0; + + bitmap = e4b->bd_bitmap; + start = (e4b->bd_info->bb_first_free > start) ? + e4b->bd_info->bb_first_free : start; + count = 0; + free_count = 0; + + while (start <= max) { + start = mb_find_next_zero_bit(bitmap, max + 1, start); + if (start > max) + break; + next = mb_find_next_bit(bitmap, max + 1, start); + + if ((next - start) >= minblocks) { + ret = ext4_trim_extent(sb, start, next - start, e4b); + if (ret && ret != -EOPNOTSUPP) + break; + ret = 0; + count += next - start; + } + free_count += next - start; + start = next + 1; + + if (fatal_signal_pending(current)) { + count = -ERESTARTSYS; + break; + } + + if (need_resched()) { + ext4_unlock_group(sb, e4b->bd_group); + cond_resched(); + ext4_lock_group(sb, e4b->bd_group); + } + + if ((e4b->bd_info->bb_free - free_count) < minblocks) + break; + } + + return count; +} + /** * ext4_trim_all_free -- function to trim all free space in alloc. group * @sb: super block for file system @@ -5207,10 +5255,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) { - void *bitmap; - ext4_grpblk_t next, count = 0, free_count = 0; struct ext4_buddy e4b; - int ret = 0; + int ret; trace_ext4_trim_all_free(sb, group, start, max); @@ -5220,57 +5266,23 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret, group); return ret; } - bitmap = e4b.bd_bitmap; ext4_lock_group(sb, group); - if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && - minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) - goto out; - start = (e4b.bd_info->bb_first_free > start) ? - e4b.bd_info->bb_first_free : start; - - while (start <= max) { - start = mb_find_next_zero_bit(bitmap, max + 1, start); - if (start > max) - break; - next = mb_find_next_bit(bitmap, max + 1, start); - - if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, next - start, &e4b); - if (ret && ret != -EOPNOTSUPP) - break; - ret = 0; - count += next - start; - } - free_count += next - start; - start = next + 1; - - if (fatal_signal_pending(current)) { - count = -ERESTARTSYS; - break; - } - - if (need_resched()) { - ext4_unlock_group(sb, group); - cond_resched(); - ext4_lock_group(sb, group); - } - - if ((e4b.bd_info->bb_free - free_count) < minblocks) - break; + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || + minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) { + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); + if (ret >= 0) + EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); + } else { + ret = 0; } - if (!ret) { - ret = count; - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); - } -out: ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); ext4_debug("trimmed %d blocks in the group %d\n", - count, group); + ret, group); return ret; } From 15bb69eaaeb24543ade36ffd07e06f690a94a263 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 20 Aug 2021 14:08:53 +0200 Subject: [PATCH 22/94] ext4: scope ret locally in ext4_try_to_trim_range() [ Upstream commit afcc4e32f606dbfb47aa7309172c89174b86e74c ] As commit 6920b3913235 ("ext4: add new helper interface ext4_try_to_trim_range()") moves some code into the separate function ext4_try_to_trim_range(), the use of the variable ret within that function is more limited and can be adjusted as well. Scope the use of the variable ret locally and drop dead assignments. No functional change. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210820120853.23134-1-lukas.bulwahn@gmail.com Signed-off-by: Theodore Ts'o Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 51def652098b..58a0d2ea314b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5190,7 +5190,6 @@ static int ext4_try_to_trim_range(struct super_block *sb, { ext4_grpblk_t next, count, free_count; void *bitmap; - int ret = 0; bitmap = e4b->bd_bitmap; start = (e4b->bd_info->bb_first_free > start) ? @@ -5205,10 +5204,10 @@ static int ext4_try_to_trim_range(struct super_block *sb, next = mb_find_next_bit(bitmap, max + 1, start); if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, next - start, e4b); + int ret = ext4_trim_extent(sb, start, next - start, e4b); + if (ret && ret != -EOPNOTSUPP) break; - ret = 0; count += next - start; } free_count += next - start; From 03b4174c42755f1b3cf125315b26c5f8ce812bf0 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 3 Nov 2021 15:51:21 +0100 Subject: [PATCH 23/94] ext4: change s_last_trim_minblks type to unsigned long [ Upstream commit 2327fb2e23416cfb2795ccca2f77d4d65925be99 ] There is no good reason for the s_last_trim_minblks to be atomic. There is no data integrity needed and there is no real danger in setting and reading it in a racy manner. Change it to be unsigned long, the same type as s_clusters_per_group which is the maximum that's allowed. Signed-off-by: Lukas Czerner Suggested-by: Andreas Dilger Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20211103145122.17338-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Signed-off-by: Sasha Levin --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 909f231a387d..fa2579abea7d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1500,7 +1500,7 @@ struct ext4_sb_info { struct task_struct *s_mmp_tsk; /* record the last minlen when FITRIM is called. */ - atomic_t s_last_trim_minblks; + unsigned long s_last_trim_minblks; /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 58a0d2ea314b..b76e8b8f01a1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5269,7 +5269,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || - minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) { + minblocks < EXT4_SB(sb)->s_last_trim_minblks) { ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); if (ret >= 0) EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); @@ -5378,7 +5378,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } if (!ret) - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); + EXT4_SB(sb)->s_last_trim_minblks = minlen; out: range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; From 96b38975a04d18a1d0588801809c1e5c80e2eb6d Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 17 Apr 2022 20:03:15 +0300 Subject: [PATCH 24/94] ext4: mark group as trimmed only if it was fully scanned [ Upstream commit d63c00ea435a5352f486c259665a4ced60399421 ] Otherwise nonaligned fstrim calls will works inconveniently for iterative scanners, for example: // trim [0,16MB] for group-1, but mark full group as trimmed fstrim -o $((1024*1024*128)) -l $((1024*1024*16)) ./m // handle [16MB,16MB] for group-1, do nothing because group already has the flag. fstrim -o $((1024*1024*144)) -l $((1024*1024*16)) ./m [ Update function documentation for ext4_trim_all_free -- TYT ] Signed-off-by: Dmitry Monakhov Link: https://lore.kernel.org/r/1650214995-860245-1-git-send-email-dmtrmonakhov@yandex-team.ru Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b76e8b8f01a1..e926b8c3ea89 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5238,6 +5238,7 @@ static int ext4_try_to_trim_range(struct super_block *sb, * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @set_trimmed: set the trimmed flag if at least one block is trimmed * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -5252,7 +5253,7 @@ static int ext4_try_to_trim_range(struct super_block *sb, static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, bool set_trimmed) { struct ext4_buddy e4b; int ret; @@ -5271,7 +5272,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || minblocks < EXT4_SB(sb)->s_last_trim_minblks) { ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); - if (ret >= 0) + if (ret >= 0 && set_trimmed) EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); } else { ret = 0; @@ -5308,6 +5309,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); + bool whole_group, eof = false; int ret = 0; start = range->start >> sb->s_blocksize_bits; @@ -5326,8 +5328,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) goto out; } - if (end >= max_blks) + if (end >= max_blks - 1) { end = max_blks - 1; + eof = true; + } if (end <= first_data_blk) goto out; if (start < first_data_blk) @@ -5341,6 +5345,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) /* end now represents the last cluster to discard in this group */ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; + whole_group = true; for (group = first_group; group <= last_group; group++) { grp = ext4_get_group_info(sb, group); @@ -5357,12 +5362,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) * change it for the last group, note that last_cluster is * already computed earlier by ext4_get_group_no_and_offset() */ - if (group == last_group) + if (group == last_group) { end = last_cluster; - + whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; + } if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, whole_group); if (cnt < 0) { ret = cnt; break; From 7f880d83f4310a469454ed103cef7292ff98ffb0 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 1 Aug 2023 22:32:00 +0800 Subject: [PATCH 25/94] ext4: replace the traditional ternary conditional operator with with max()/min() [ Upstream commit de8bf0e5ee7482585450357c6d4eddec8efc5cb7 ] Replace the traditional ternary conditional operator with with max()/min() Signed-off-by: Kemeng Shi Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20230801143204.2284343-7-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e926b8c3ea89..22da6b1143e5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5192,8 +5192,7 @@ static int ext4_try_to_trim_range(struct super_block *sb, void *bitmap; bitmap = e4b->bd_bitmap; - start = (e4b->bd_info->bb_first_free > start) ? - e4b->bd_info->bb_first_free : start; + start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -5413,8 +5412,7 @@ ext4_mballoc_query_range( ext4_lock_group(sb, group); - start = (e4b.bd_info->bb_first_free > start) ? - e4b.bd_info->bb_first_free : start; + start = max(e4b.bd_info->bb_first_free, start); if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; From d61445f6a5c576bbe31c77afc290c428b1ac88a8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Sep 2023 17:04:54 +0200 Subject: [PATCH 26/94] ext4: move setting of trimmed bit into ext4_try_to_trim_range() [ Upstream commit 45e4ab320c9b5fa67b1fc3b6a9b381cfcc0c8488 ] Currently we set the group's trimmed bit in ext4_trim_all_free() based on return value of ext4_try_to_trim_range(). However when we will want to abort trimming because of suspend attempt, we want to return success from ext4_try_to_trim_range() but not set the trimmed bit. Instead implementing awkward propagation of this information, just move setting of trimmed bit into ext4_try_to_trim_range() when the whole group is trimmed. Cc: stable@kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230913150504.9054-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 22da6b1143e5..94b3bf8173e2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5184,14 +5184,27 @@ __acquires(bitlock) return ret; } +static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, + ext4_group_t grp) +{ + if (grp < ext4_get_groups_count(sb)) + return EXT4_CLUSTERS_PER_GROUP(sb) - 1; + return (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp) - 1) >> + EXT4_CLUSTER_BITS(sb); +} + static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) { ext4_grpblk_t next, count, free_count; + bool set_trimmed = false; void *bitmap; bitmap = e4b->bd_bitmap; + if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + set_trimmed = true; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -5206,16 +5219,14 @@ static int ext4_try_to_trim_range(struct super_block *sb, int ret = ext4_trim_extent(sb, start, next - start, e4b); if (ret && ret != -EOPNOTSUPP) - break; + return count; count += next - start; } free_count += next - start; start = next + 1; - if (fatal_signal_pending(current)) { - count = -ERESTARTSYS; - break; - } + if (fatal_signal_pending(current)) + return -ERESTARTSYS; if (need_resched()) { ext4_unlock_group(sb, e4b->bd_group); @@ -5227,6 +5238,9 @@ static int ext4_try_to_trim_range(struct super_block *sb, break; } + if (set_trimmed) + EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); + return count; } @@ -5237,7 +5251,6 @@ static int ext4_try_to_trim_range(struct super_block *sb, * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count - * @set_trimmed: set the trimmed flag if at least one block is trimmed * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -5252,7 +5265,7 @@ static int ext4_try_to_trim_range(struct super_block *sb, static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks, bool set_trimmed) + ext4_grpblk_t minblocks) { struct ext4_buddy e4b; int ret; @@ -5269,13 +5282,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || - minblocks < EXT4_SB(sb)->s_last_trim_minblks) { + minblocks < EXT4_SB(sb)->s_last_trim_minblks) ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); - if (ret >= 0 && set_trimmed) - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); - } else { + else ret = 0; - } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -5308,7 +5318,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); - bool whole_group, eof = false; int ret = 0; start = range->start >> sb->s_blocksize_bits; @@ -5327,10 +5336,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) goto out; } - if (end >= max_blks - 1) { + if (end >= max_blks - 1) end = max_blks - 1; - eof = true; - } if (end <= first_data_blk) goto out; if (start < first_data_blk) @@ -5344,7 +5351,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) /* end now represents the last cluster to discard in this group */ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; - whole_group = true; for (group = first_group; group <= last_group; group++) { grp = ext4_get_group_info(sb, group); @@ -5361,13 +5367,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) * change it for the last group, note that last_cluster is * already computed earlier by ext4_get_group_no_and_offset() */ - if (group == last_group) { + if (group == last_group) end = last_cluster; - whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; - } if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen, whole_group); + end, minlen); if (cnt < 0) { ret = cnt; break; From c01fc581338250585f69a7087c6e758d305a543c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Sep 2023 17:04:55 +0200 Subject: [PATCH 27/94] ext4: do not let fstrim block system suspend [ Upstream commit 5229a658f6453362fbb9da6bf96872ef25a7097e ] Len Brown has reported that system suspend sometimes fail due to inability to freeze a task working in ext4_trim_fs() for one minute. Trimming a large filesystem on a disk that slowly processes discard requests can indeed take a long time. Since discard is just an advisory call, it is perfectly fine to interrupt it at any time and the return number of discarded blocks until that moment. Do that when we detect the task is being frozen. Cc: stable@kernel.org Reported-by: Len Brown Suggested-by: Dave Chinner References: https://bugzilla.kernel.org/show_bug.cgi?id=216322 Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230913150504.9054-2-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 94b3bf8173e2..fb2f255c48e8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #ifdef CONFIG_EXT4_DEBUG @@ -5194,6 +5195,11 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, EXT4_CLUSTER_BITS(sb); } +static bool ext4_trim_interrupted(void) +{ + return fatal_signal_pending(current) || freezing(current); +} + static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) @@ -5225,8 +5231,8 @@ static int ext4_try_to_trim_range(struct super_block *sb, free_count += next - start; start = next + 1; - if (fatal_signal_pending(current)) - return -ERESTARTSYS; + if (ext4_trim_interrupted()) + return count; if (need_resched()) { ext4_unlock_group(sb, e4b->bd_group); @@ -5353,6 +5359,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; for (group = first_group; group <= last_group; group++) { + if (ext4_trim_interrupted()) + break; grp = ext4_get_group_info(sb, group); /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { From 9cd4fadbd20bdd595681297ec6081413d9d96d78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Sep 2023 09:06:56 +0200 Subject: [PATCH 28/94] MIPS: Alchemy: only build mmc support helpers if au1xmmc is enabled [ Upstream commit ef8f8f04a0b25e8f294b24350e8463a8d6a9ba0b ] While commit d4a5c59a955b ("mmc: au1xmmc: force non-modular build and remove symbol_get usage") to be built in, it can still build a kernel without MMC support and thuse no mmc_detect_change symbol at all. Add ifdefs to build the mmc support code in the alchemy arch code conditional on mmc support. Fixes: d4a5c59a955b ("mmc: au1xmmc: force non-modular build and remove symbol_get usage") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/alchemy/devboards/db1000.c | 4 ++++ arch/mips/alchemy/devboards/db1200.c | 6 ++++++ arch/mips/alchemy/devboards/db1300.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c index 6fe0f0f95ed7..548bd4db0f97 100644 --- a/arch/mips/alchemy/devboards/db1000.c +++ b/arch/mips/alchemy/devboards/db1000.c @@ -173,6 +173,7 @@ static struct platform_device db1x00_audio_dev = { /******************************************************************************/ +#ifdef CONFIG_MMC_AU1X static irqreturn_t db1100_mmc_cd(int irq, void *ptr) { mmc_detect_change(ptr, msecs_to_jiffies(500)); @@ -380,6 +381,7 @@ static struct platform_device db1100_mmc1_dev = { .num_resources = ARRAY_SIZE(au1100_mmc1_res), .resource = au1100_mmc1_res, }; +#endif /* CONFIG_MMC_AU1X */ /******************************************************************************/ @@ -497,9 +499,11 @@ static struct platform_device *db1000_devs[] = { static struct platform_device *db1100_devs[] = { &au1100_lcd_device, +#ifdef CONFIG_MMC_AU1X &db1100_mmc0_dev, &db1100_mmc1_dev, &db1000_irda_dev, +#endif }; int __init db1000_dev_setup(void) diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c index ae81e05fcb2c..48840e48e79a 100644 --- a/arch/mips/alchemy/devboards/db1200.c +++ b/arch/mips/alchemy/devboards/db1200.c @@ -341,6 +341,7 @@ static struct platform_device db1200_ide_dev = { /**********************************************************************/ +#ifdef CONFIG_MMC_AU1X /* SD carddetects: they're supposed to be edge-triggered, but ack * doesn't seem to work (CPLD Rev 2). Instead, the screaming one * is disabled and its counterpart enabled. The 200ms timeout is @@ -601,6 +602,7 @@ static struct platform_device pb1200_mmc1_dev = { .num_resources = ARRAY_SIZE(au1200_mmc1_res), .resource = au1200_mmc1_res, }; +#endif /* CONFIG_MMC_AU1X */ /**********************************************************************/ @@ -768,7 +770,9 @@ static struct platform_device db1200_audiodma_dev = { static struct platform_device *db1200_devs[] __initdata = { NULL, /* PSC0, selected by S6.8 */ &db1200_ide_dev, +#ifdef CONFIG_MMC_AU1X &db1200_mmc0_dev, +#endif &au1200_lcd_dev, &db1200_eth_dev, &db1200_nand_dev, @@ -779,7 +783,9 @@ static struct platform_device *db1200_devs[] __initdata = { }; static struct platform_device *pb1200_devs[] __initdata = { +#ifdef CONFIG_MMC_AU1X &pb1200_mmc1_dev, +#endif }; /* Some peripheral base addresses differ on the PB1200 */ diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c index 0c12fbc07117..664a5a783d2c 100644 --- a/arch/mips/alchemy/devboards/db1300.c +++ b/arch/mips/alchemy/devboards/db1300.c @@ -448,6 +448,7 @@ static struct platform_device db1300_ide_dev = { /**********************************************************************/ +#ifdef CONFIG_MMC_AU1X static irqreturn_t db1300_mmc_cd(int irq, void *ptr) { disable_irq_nosync(irq); @@ -626,6 +627,7 @@ static struct platform_device db1300_sd0_dev = { .resource = au1300_sd0_res, .num_resources = ARRAY_SIZE(au1300_sd0_res), }; +#endif /* CONFIG_MMC_AU1X */ /**********************************************************************/ @@ -756,8 +758,10 @@ static struct platform_device *db1300_dev[] __initdata = { &db1300_5waysw_dev, &db1300_nand_dev, &db1300_ide_dev, +#ifdef CONFIG_MMC_AU1X &db1300_sd0_dev, &db1300_sd1_dev, +#endif &db1300_lcd_dev, &db1300_ac97_dev, &db1300_i2s_dev, From 33a760a5fd86267c3b09ff9d9bfb953f10aed02c Mon Sep 17 00:00:00 2001 From: Timo Alho Date: Tue, 12 Sep 2023 14:29:50 +0300 Subject: [PATCH 29/94] clk: tegra: fix error return case for recalc_rate [ Upstream commit a47b44fbb13f5e7a981b4515dcddc93a321ae89c ] tegra-bpmp clocks driver makes implicit conversion of signed error code to unsigned value in recalc_rate operation. The behavior for recalc_rate, according to it's specification, should be that "If the driver cannot figure out a rate for this clock, it must return 0." Fixes: ca6f2796eef7 ("clk: tegra: Add BPMP clock driver") Signed-off-by: Timo Alho Signed-off-by: Mikko Perttunen Link: https://lore.kernel.org/r/20230912112951.2330497-1-cyndis@kapsi.fi Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-bpmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/tegra/clk-bpmp.c b/drivers/clk/tegra/clk-bpmp.c index 01dada561c10..3bdd3334b0f9 100644 --- a/drivers/clk/tegra/clk-bpmp.c +++ b/drivers/clk/tegra/clk-bpmp.c @@ -162,7 +162,7 @@ static unsigned long tegra_bpmp_clk_recalc_rate(struct clk_hw *hw, err = tegra_bpmp_clk_transfer(clk->bpmp, &msg); if (err < 0) - return err; + return 0; return response.rate; } From 90f5eaccf9e769fbb85f4a980de6255339a26ed0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 11 Sep 2023 07:07:38 +0300 Subject: [PATCH 30/94] ARM: dts: ti: omap: motorola-mapphone: Fix abe_clkctrl warning on boot [ Upstream commit ac08bda1569b06b7a62c7b4dd00d4c3b28ceaaec ] Commit 0840242e8875 ("ARM: dts: Configure clock parent for pwm vibra") attempted to fix the PWM settings but ended up causin an additional clock reparenting error: clk: failed to reparent abe-clkctrl:0060:24 to sys_clkin_ck: -22 Only timer9 is in the PER domain and can use the sys_clkin_ck clock source. For timer8, the there is no sys_clkin_ck available as it's in the ABE domain, instead it should use syc_clk_div_ck. However, for power management, we want to use the always on sys_32k_ck instead. Cc: Ivaylo Dimitrov Cc: Carl Philipp Klemm Cc: Merlijn Wajer Cc: Pavel Machek Reviewed-by: Sebastian Reichel Fixes: 0840242e8875 ("ARM: dts: Configure clock parent for pwm vibra") Depends-on: 61978617e905 ("ARM: dts: Add minimal support for Droid Bionic xt875") Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 67d77eee9433..459720f5f558 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -647,12 +647,12 @@ /* Configure pwm clock source for timers 8 & 9 */ &timer8 { assigned-clocks = <&abe_clkctrl OMAP4_TIMER8_CLKCTRL 24>; - assigned-clock-parents = <&sys_clkin_ck>; + assigned-clock-parents = <&sys_32k_ck>; }; &timer9 { assigned-clocks = <&l4_per_clkctrl OMAP4_TIMER9_CLKCTRL 24>; - assigned-clock-parents = <&sys_clkin_ck>; + assigned-clock-parents = <&sys_32k_ck>; }; /* From bcb0575e291ce1359e69cc633040beb09e8cebab Mon Sep 17 00:00:00 2001 From: Wenhua Lin Date: Thu, 21 Sep 2023 20:25:27 +0800 Subject: [PATCH 31/94] gpio: pmic-eic-sprd: Add can_sleep flag for PMIC EIC chip [ Upstream commit 26d9e5640d2130ee16df7b1fb6a908f460ab004c ] The drivers uses a mutex and I2C bus access in its PMIC EIC chip get implementation. This means these functions can sleep and the PMIC EIC chip should set the can_sleep property to true. This will ensure that a warning is printed when trying to get the value from a context that potentially can't sleep. Fixes: 348f3cde84ab ("gpio: Add Spreadtrum PMIC EIC driver support") Signed-off-by: Wenhua Lin Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pmic-eic-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pmic-eic-sprd.c b/drivers/gpio/gpio-pmic-eic-sprd.c index 29e044ff4b17..7697cb96bf36 100644 --- a/drivers/gpio/gpio-pmic-eic-sprd.c +++ b/drivers/gpio/gpio-pmic-eic-sprd.c @@ -341,6 +341,7 @@ static int sprd_pmic_eic_probe(struct platform_device *pdev) pmic_eic->chip.set_config = sprd_pmic_eic_set_config; pmic_eic->chip.set = sprd_pmic_eic_set; pmic_eic->chip.get = sprd_pmic_eic_get; + pmic_eic->chip.can_sleep = true; pmic_eic->intc.name = dev_name(&pdev->dev); pmic_eic->intc.irq_mask = sprd_pmic_eic_irq_mask; From 8da1ab77083660c996023ed1e2d0922fbb90150a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 30 Aug 2023 08:10:01 +0200 Subject: [PATCH 32/94] parisc: sba: Fix compile warning wrt list of SBA devices [ Upstream commit eb3255ee8f6f4691471a28fbf22db5e8901116cd ] Fix this makecheck warning: drivers/parisc/sba_iommu.c:98:19: warning: symbol 'sba_list' was not declared. Should it be static? Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/include/asm/ropes.h | 3 +++ drivers/char/agp/parisc-agp.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h index 8e51c775c80a..62399c7ea94a 100644 --- a/arch/parisc/include/asm/ropes.h +++ b/arch/parisc/include/asm/ropes.h @@ -86,6 +86,9 @@ struct sba_device { struct ioc ioc[MAX_IOC]; }; +/* list of SBA's in system, see drivers/parisc/sba_iommu.c */ +extern struct sba_device *sba_list; + #define ASTRO_RUNWAY_PORT 0x582 #define IKE_MERCED_PORT 0x803 #define REO_MERCED_PORT 0x804 diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index 1d5510cb6db4..1962ff624b7c 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -385,8 +385,6 @@ find_quicksilver(struct device *dev, void *data) static int __init parisc_agp_init(void) { - extern struct sba_device *sba_list; - int err = -1; struct parisc_device *sba = NULL, *lba = NULL; struct lba_device *lbadev = NULL; From 700f6cee4831476a187012340d72ae3c64e9b222 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 30 Aug 2023 11:59:55 +0200 Subject: [PATCH 33/94] parisc: iosapic.c: Fix sparse warnings [ Upstream commit 927c6c8aa27c284a799b8c18784e37d3373af908 ] Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/parisc/iosapic.c | 4 ++-- drivers/parisc/iosapic_private.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index eb9137faccf7..4cc08d13b82f 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -216,9 +216,9 @@ static inline void iosapic_write(void __iomem *iosapic, unsigned int reg, u32 va static DEFINE_SPINLOCK(iosapic_lock); -static inline void iosapic_eoi(void __iomem *addr, unsigned int data) +static inline void iosapic_eoi(__le32 __iomem *addr, __le32 data) { - __raw_writel(data, addr); + __raw_writel((__force u32)data, addr); } /* diff --git a/drivers/parisc/iosapic_private.h b/drivers/parisc/iosapic_private.h index 6e05e30a2450..7a928c03d520 100644 --- a/drivers/parisc/iosapic_private.h +++ b/drivers/parisc/iosapic_private.h @@ -132,8 +132,8 @@ struct iosapic_irt { struct vector_info { struct iosapic_info *iosapic; /* I/O SAPIC this vector is on */ struct irt_entry *irte; /* IRT entry */ - u32 __iomem *eoi_addr; /* precalculate EOI reg address */ - u32 eoi_data; /* IA64: ? PA: swapped txn_data */ + __le32 __iomem *eoi_addr; /* precalculate EOI reg address */ + __le32 eoi_data; /* IA64: ? PA: swapped txn_data */ int txn_irq; /* virtual IRQ number for processor */ ulong txn_addr; /* IA64: id_eid PA: partial HPA */ u32 txn_data; /* CPU interrupt bit */ From c10f7595af65f5481b932daf8bd6998cdffd0ff7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 31 Aug 2023 22:08:32 +0200 Subject: [PATCH 34/94] parisc: drivers: Fix sparse warning [ Upstream commit b137b9d60b8add5620a06c687a71ce18776730b0 ] Fix "warning: directive in macro's argument list" warning. Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 01a2ed59d2f2..94037c8512f7 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -903,9 +903,9 @@ static __init void qemu_header(void) pr_info("#define PARISC_MODEL \"%s\"\n\n", boot_cpu_data.pdc.sys_model_name); + #define p ((unsigned long *)&boot_cpu_data.pdc.model) pr_info("#define PARISC_PDC_MODEL 0x%lx, 0x%lx, 0x%lx, " "0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx\n\n", - #define p ((unsigned long *)&boot_cpu_data.pdc.model) p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]); #undef p From b068b4e0cc8e09ebfb43f335f27c51a831a9de50 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 31 Aug 2023 22:36:12 +0200 Subject: [PATCH 35/94] parisc: irq: Make irq_stack_union static to avoid sparse warning [ Upstream commit b1bef1388c427cdad7331a9c8eb4ebbbe5b954b0 ] Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index c152c30c2d06..11c1505775f8 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -392,7 +392,7 @@ union irq_stack_union { volatile unsigned int lock[1]; }; -DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { +static DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { .slock = { 1,1,1,1 }, }; #endif From bcf01fe7626a6fe7d06c4a99361efad1b9abc89f Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 26 Jun 2023 08:11:44 +0800 Subject: [PATCH 36/94] selftests/ftrace: Correctly enable event in instance-event.tc [ Upstream commit f4e4ada586995b17f828c6d147d1800eb1471450 ] Function instance_set() expects to enable event 'sched_switch', so we should set 1 to its 'enable' file. Testcase passed after this patch: # ./ftracetest test.d/instances/instance-event.tc === Ftrace unit tests === [1] Test creation and deletion of trace instances while setting an event [PASS] # of passed: 1 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 Signed-off-by: Zheng Yejian Acked-by: Masami Hiramatsu (Google) Acked-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- .../testing/selftests/ftrace/test.d/instances/instance-event.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 4fa0f79144f4..9473934a573a 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -43,7 +43,7 @@ instance_read() { instance_set() { while :; do - echo 1 > foo/events/sched/sched_switch + echo 1 > foo/events/sched/sched_switch/enable done 2> /dev/null } From d93e05a5219a17703cfe177e50dc453a1e447904 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 6 Sep 2023 16:19:30 +0800 Subject: [PATCH 37/94] ring-buffer: Avoid softlockup in ring_buffer_resize() [ Upstream commit f6bd2c92488c30ef53b5bd80c52f0a7eee9d545a ] When user resize all trace ring buffer through file 'buffer_size_kb', then in ring_buffer_resize(), kernel allocates buffer pages for each cpu in a loop. If the kernel preemption model is PREEMPT_NONE and there are many cpus and there are many buffer pages to be allocated, it may not give up cpu for a long time and finally cause a softlockup. To avoid it, call cond_resched() after each cpu buffer allocation. Link: https://lore.kernel.org/linux-trace-kernel/20230906081930.3939106-1-zhengyejian1@huawei.com Cc: Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c8a7de7a1d63..320aa60664dc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1753,6 +1753,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, err = -ENOMEM; goto out_err; } + + cond_resched(); } get_online_cpus(); From e2b7e0a9b430e4569dc860a5fc6c8fe0cc87fe31 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Sep 2023 00:19:16 +0200 Subject: [PATCH 38/94] ata: libata-eh: do not clear ATA_PFLAG_EH_PENDING in ata_eh_reset() [ Upstream commit 80cc944eca4f0baa9c381d0706f3160e491437f2 ] ata_scsi_port_error_handler() starts off by clearing ATA_PFLAG_EH_PENDING, before calling ap->ops->error_handler() (without holding the ap->lock). If an error IRQ is received while ap->ops->error_handler() is running, the irq handler will set ATA_PFLAG_EH_PENDING. Once ap->ops->error_handler() returns, ata_scsi_port_error_handler() checks if ATA_PFLAG_EH_PENDING is set, and if it is, another iteration of ATA EH is performed. The problem is that ATA_PFLAG_EH_PENDING is not only cleared by ata_scsi_port_error_handler(), it is also cleared by ata_eh_reset(). ata_eh_reset() is called by ap->ops->error_handler(). This additional clearing done by ata_eh_reset() breaks the whole retry logic in ata_scsi_port_error_handler(). Thus, if an error IRQ is received while ap->ops->error_handler() is running, the port will currently remain frozen and will never get re-enabled. The additional clearing in ata_eh_reset() was introduced in commit 1e641060c4b5 ("libata: clear eh_info on reset completion"). Looking at the original error report: https://marc.info/?l=linux-ide&m=124765325828495&w=2 We can see the following happening: [ 1.074659] ata3: XXX port freeze [ 1.074700] ata3: XXX hardresetting link, stopping engine [ 1.074746] ata3: XXX flipping SControl [ 1.411471] ata3: XXX irq_stat=400040 CONN|PHY [ 1.411475] ata3: XXX port freeze [ 1.420049] ata3: XXX starting engine [ 1.420096] ata3: XXX rc=0, class=1 [ 1.420142] ata3: XXX clearing IRQs for thawing [ 1.420188] ata3: XXX port thawed [ 1.420234] ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300) We are not supposed to be able to receive an error IRQ while the port is frozen (PxIE is set to 0, i.e. all IRQs for the port are disabled). AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) states: "Each bit location can be thought of as reporting a '1' if the virtual "interrupt line" for that port is indicating it wishes to generate an interrupt. That is, if a port has one or more interrupt status bit set, and the enables for those status bits are set, then this bit shall be set." Additionally, AHCI state P:ComInit clearly shows that the state machine will only jump to P:ComInitSetIS (which sets IS.IPS(x) to '1'), if PxIE.PCE is set to '1'. In our case, PxIE is set to 0, so IS.IPS(x) won't get set. So IS.IPS(x) only gets set if PxIS and PxIE is set. AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) also states: "The bits in this register are read/write clear. It is set by the level of the virtual interrupt line being a set, and cleared by a write of '1' from the software." So if IS.IPS(x) is set, you need to explicitly clear it by writing a 1 to IS.IPS(x) for that port. Since PxIE is cleared, the only way to get an interrupt while the port is frozen, is if IS.IPS(x) is set, and the only way IS.IPS(x) can be set when the port is frozen, is if it was set before the port was frozen. However, since commit 737dd811a3db ("ata: libahci: clear pending interrupt status"), we clear both PxIS and IS.IPS(x) after freezing the port, but before the COMRESET, so the problem that commit 1e641060c4b5 ("libata: clear eh_info on reset completion") fixed can no longer happen. Thus, revert commit 1e641060c4b5 ("libata: clear eh_info on reset completion"), so that the retry logic in ata_scsi_port_error_handler() works once again. (The retry logic is still needed, since we can still get an error IRQ _after_ the port has been thawed, but before ata_scsi_port_error_handler() takes the ap->lock in order to check if ATA_PFLAG_EH_PENDING is set.) Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/libata-eh.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index fcc3d7985762..73a4dd37d04a 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2922,18 +2922,11 @@ int ata_eh_reset(struct ata_link *link, int classify, postreset(slave, classes); } - /* - * Some controllers can't be frozen very well and may set spurious - * error conditions during reset. Clear accumulated error - * information and re-thaw the port if frozen. As reset is the - * final recovery action and we cross check link onlineness against - * device classification later, no hotplug event is lost by this. - */ + /* clear cached SError */ spin_lock_irqsave(link->ap->lock, flags); - memset(&link->eh_info, 0, sizeof(link->eh_info)); + link->eh_info.serror = 0; if (slave) - memset(&slave->eh_info, 0, sizeof(link->eh_info)); - ap->pflags &= ~ATA_PFLAG_EH_PENDING; + slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); if (ap->pflags & ATA_PFLAG_FROZEN) From 4f8c4e167ad88387de51da02a1d2d5c366025470 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 11 Sep 2023 12:47:30 -0700 Subject: [PATCH 39/94] bpf: Clarify error expectations from bpf_clone_redirect [ Upstream commit 7cb779a6867fea00b4209bcf6de2f178a743247d ] Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped packets") exposed the fact that bpf_clone_redirect is capable of returning raw NET_XMIT_XXX return codes. This is in the conflict with its UAPI doc which says the following: "0 on success, or a negative error in case of failure." Update the UAPI to reflect the fact that bpf_clone_redirect can return positive error numbers, but don't explicitly define their meaning. Reported-by: Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230911194731.286342-1-sdf@google.com Signed-off-by: Sasha Levin --- include/uapi/linux/bpf.h | 4 +++- tools/include/uapi/linux/bpf.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6334aede67fc..91c43f375612 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -693,7 +693,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Return diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0c30ab898e2a..37259bae1501 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -691,7 +691,9 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) * Return From 5493dca025f7e9cc2eb6e30e8392a012fe146995 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Sep 2023 11:03:49 +0200 Subject: [PATCH 40/94] fbdev/sh7760fb: Depend on FB=y [ Upstream commit f75f71b2c418a27a7c05139bb27a0c83adf88d19 ] Fix linker error if FB=m about missing fb_io_read and fb_io_write. The linker's error message suggests that this config setting has already been broken for other symbols. All errors (new ones prefixed by >>): sh4-linux-ld: drivers/video/fbdev/sh7760fb.o: in function `sh7760fb_probe': sh7760fb.c:(.text+0x374): undefined reference to `framebuffer_alloc' sh4-linux-ld: sh7760fb.c:(.text+0x394): undefined reference to `fb_videomode_to_var' sh4-linux-ld: sh7760fb.c:(.text+0x39c): undefined reference to `fb_alloc_cmap' sh4-linux-ld: sh7760fb.c:(.text+0x3a4): undefined reference to `register_framebuffer' sh4-linux-ld: sh7760fb.c:(.text+0x3ac): undefined reference to `fb_dealloc_cmap' sh4-linux-ld: sh7760fb.c:(.text+0x434): undefined reference to `framebuffer_release' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o: in function `sh7760fb_remove': sh7760fb.c:(.text+0x800): undefined reference to `unregister_framebuffer' sh4-linux-ld: sh7760fb.c:(.text+0x804): undefined reference to `fb_dealloc_cmap' sh4-linux-ld: sh7760fb.c:(.text+0x814): undefined reference to `framebuffer_release' >> sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0xc): undefined reference to `fb_io_read' >> sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x10): undefined reference to `fb_io_write' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x2c): undefined reference to `cfb_fillrect' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x30): undefined reference to `cfb_copyarea' sh4-linux-ld: drivers/video/fbdev/sh7760fb.o:(.rodata+0x34): undefined reference to `cfb_imageblit' Suggested-by: Randy Dunlap Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309130632.LS04CPWu-lkp@intel.com/ Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: John Paul Adrian Glaubitz Link: https://patchwork.freedesktop.org/patch/msgid/20230918090400.13264-1-tzimmermann@suse.de Signed-off-by: Sasha Levin --- drivers/video/fbdev/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index afb0c9e4d738..8e224ee27ade 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -2085,7 +2085,7 @@ config FB_COBALT config FB_SH7760 bool "SH7760/SH7763/SH7720/SH7721 LCDC support" - depends on FB && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763 \ + depends on FB=y && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763 \ || CPU_SUBTYPE_SH7720 || CPU_SUBTYPE_SH7721) select FB_CFB_FILLRECT select FB_CFB_COPYAREA From c81e46a880bbb9d3a144ab726be12822144c2254 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Tue, 12 Sep 2023 17:52:49 +0200 Subject: [PATCH 41/94] nvme-pci: do not set the NUMA node of device if it has none [ Upstream commit dad651b2a44eb6b201738f810254279dca29d30d ] If a device has no NUMA node information associated with it, the driver puts the device in node first_memory_node (say node 0). Not having a NUMA node and being associated with node 0 are completely different things and it makes little sense to mix the two. Signed-off-by: Pratyush Yadav Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b06d2b6bd3fe..163497ef48fd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2501,8 +2501,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) size_t alloc_size; node = dev_to_node(&pdev->dev); - if (node == NUMA_NO_NODE) - set_dev_node(&pdev->dev, first_memory_node); dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) From c1c66ab9ea5485b9f8ddf4f346f7ad4e83997cef Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2023 09:05:39 +0200 Subject: [PATCH 42/94] watchdog: iTCO_wdt: No need to stop the timer in probe commit 1ae3e78c08209ac657c59f6f7ea21bbbd7f6a1d4 upstream. The watchdog core can handle pinging of the watchdog before userspace opens the device. For this reason instead of stopping the timer, just mark it as running and let the watchdog core take care of it. Cc: Malin Jonsson Signed-off-by: Mika Westerberg Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20210921102900.61586-1-mika.westerberg@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/iTCO_wdt.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 347f0389b089..930798bac582 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -401,6 +401,16 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev) return time_left; } +static void iTCO_wdt_set_running(struct iTCO_wdt_private *p) +{ + u16 val; + + /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is * enabled */ + val = inw(TCO1_CNT(p)); + if (!(val & BIT(11))) + set_bit(WDOG_HW_RUNNING, &p->wddev.status); +} + /* * Kernel Interfaces */ @@ -537,8 +547,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) watchdog_set_drvdata(&p->wddev, p); platform_set_drvdata(pdev, p); - /* Make sure the watchdog is not running */ - iTCO_wdt_stop(&p->wddev); + iTCO_wdt_set_running(p); /* Check that the heartbeat value is within it's range; if not reset to the default */ From dd081a3f69b16d559b31be85975e271da917e5dc Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2023 09:05:40 +0200 Subject: [PATCH 43/94] watchdog: iTCO_wdt: Set NO_REBOOT if the watchdog is not already running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ef9b7bf52c2f47f0a9bf988543c577b92c92d15e upstream. Daniel reported that the commit 1ae3e78c0820 ("watchdog: iTCO_wdt: No need to stop the timer in probe") makes QEMU implementation of the iTCO watchdog not to trigger reboot anymore when NO_REBOOT flag is initially cleared using this option (in QEMU command line): -global ICH9-LPC.noreboot=false The problem with the commit is that it left the unconditional setting of NO_REBOOT that is not cleared anymore when the kernel keeps pinging the watchdog (as opposed to the previous code that called iTCO_wdt_stop() that cleared it). Fix this so that we only set NO_REBOOT if the watchdog was not initially running. Fixes: 1ae3e78c0820 ("watchdog: iTCO_wdt: No need to stop the timer in probe") Reported-by: Daniel P. Berrangé Signed-off-by: Mika Westerberg Tested-by: Daniel P. Berrangé Reviewed-by: Daniel P. Berrangé Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20221028062750.45451-1-mika.westerberg@linux.intel.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/iTCO_wdt.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 930798bac582..5ec52032117a 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -401,14 +401,18 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev) return time_left; } -static void iTCO_wdt_set_running(struct iTCO_wdt_private *p) +/* Returns true if the watchdog was running */ +static bool iTCO_wdt_set_running(struct iTCO_wdt_private *p) { u16 val; - /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is * enabled */ + /* Bit 11: TCO Timer Halt -> 0 = The TCO timer is enabled */ val = inw(TCO1_CNT(p)); - if (!(val & BIT(11))) + if (!(val & BIT(11))) { set_bit(WDOG_HW_RUNNING, &p->wddev.status); + return true; + } + return false; } /* @@ -486,9 +490,6 @@ static int iTCO_wdt_probe(struct platform_device *pdev) return -ENODEV; /* Cannot reset NO_REBOOT bit */ } - /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ - p->update_no_reboot_bit(p->no_reboot_priv, true); - /* The TCO logic uses the TCO_EN bit in the SMI_EN register */ if (!devm_request_region(dev, p->smi_res->start, resource_size(p->smi_res), @@ -547,7 +548,13 @@ static int iTCO_wdt_probe(struct platform_device *pdev) watchdog_set_drvdata(&p->wddev, p); platform_set_drvdata(pdev, p); - iTCO_wdt_set_running(p); + if (!iTCO_wdt_set_running(p)) { + /* + * If the watchdog was not running set NO_REBOOT now to + * prevent later reboots. + */ + p->update_no_reboot_bit(p->no_reboot_priv, true); + } /* Check that the heartbeat value is within it's range; if not reset to the default */ From b17d81b94d24834d0ee038c6e216752f43b67cc9 Mon Sep 17 00:00:00 2001 From: Felix Riemann Date: Fri, 10 Feb 2023 13:36:44 +0100 Subject: [PATCH 44/94] net: Fix unwanted sign extension in netdev_stats_to_stats64() [ Upstream commit 9b55d3f0a69af649c62cbc2633e6d695bb3cc583 ] When converting net_device_stats to rtnl_link_stats64 sign extension is triggered on ILP32 machines as 6c1c509778 changed the previous "ulong -> u64" conversion to "long -> u64" by accessing the net_device_stats fields through a (signed) atomic_long_t. This causes for example the received bytes counter to jump to 16EiB after having received 2^31 bytes. Casting the atomic value to "unsigned long" beforehand converting it into u64 avoids this. Fixes: 6c1c5097781f ("net: add atomic_long_t to net_device_stats fields") Signed-off-by: Felix Riemann Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 3bf40c288c03..0f9214fb36e0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9041,7 +9041,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = atomic_long_read(&src[i]); + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); From dd1524d5d97f69c1289209eab2449b66ee77cb49 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 28 Jun 2019 02:50:39 -0700 Subject: [PATCH 45/94] scsi: megaraid_sas: Enable msix_load_balance for Invader and later controllers [ Upstream commit 1175b88452cad208894412b955ee698934968aed ] Load balancing IO completions across all available MSI-X vectors should be enabled for Invader and later generation controllers only. This needs to be disabled for older controllers. Add an adapter type check before setting msix_load_balance flag. Fixes: 1d15d9098ad1 ("scsi: megaraid_sas: Load balance completions across all MSI-X") Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index bdfa36712fcc..ac4800fb1a7f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5364,7 +5364,8 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->is_rdpq = (scratch_pad_2 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; - if (!instance->msix_combined) { + if (instance->adapter_type >= INVADER_SERIES && + !instance->msix_combined) { instance->msix_load_balance = true; instance->smp_affinity_enable = false; } From db01c2bf6b934c26ef204b087e776b21285edacd Mon Sep 17 00:00:00 2001 From: Vishal Goel Date: Thu, 28 Sep 2023 18:51:36 -0700 Subject: [PATCH 46/94] Smack:- Use overlay inode label in smack_inode_copy_up() commit 387ef964460f14fe1c1ea29aba70e22731ea7cf7 upstream. Currently in "smack_inode_copy_up()" function, process label is changed with the label on parent inode. Due to which, process is assigned directory label and whatever file or directory created by the process are also getting directory label which is wrong label. Changes has been done to use label of overlay inode instead of parent inode. Signed-off-by: Vishal Goel Signed-off-by: Casey Schaufler [4.19: adjusted for the lack of helper functions] Fixes: d6d80cb57be4 ("Smack: Base support for overlayfs") Signed-off-by: Munehisa Kamata Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 4f65d953fe31..a09a9c6bbdf6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4612,7 +4612,7 @@ static int smack_inode_copy_up(struct dentry *dentry, struct cred **new) /* * Get label from overlay inode and set it in create_sid */ - isp = d_inode(dentry->d_parent)->i_security; + isp = d_inode(dentry)->i_security; skp = isp->smk_inode; tsp->smk_task = skp; *new = new_creds; From 24416f2ac0604b1db19d956ffd07d0040c920de4 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 28 Sep 2023 18:51:37 -0700 Subject: [PATCH 47/94] smack: Retrieve transmuting information in smack_inode_getsecurity() commit 3a3d8fce31a49363cc31880dce5e3b0617c9c38b upstream. Enhance smack_inode_getsecurity() to retrieve the value for SMACK64TRANSMUTE from the inode security blob, similarly to SMACK64. This helps to display accurate values in the situation where the security labels come from mount options and not from xattrs. Signed-off-by: Roberto Sassu Signed-off-by: Casey Schaufler [4.19: adjusted for the lack of helper functions] Fixes: d6d80cb57be4 ("Smack: Base support for overlayfs") Signed-off-by: Munehisa Kamata Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index a09a9c6bbdf6..db729834d8ba 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1490,10 +1490,19 @@ static int smack_inode_getsecurity(struct inode *inode, struct super_block *sbp; struct inode *ip = (struct inode *)inode; struct smack_known *isp; + struct inode_smack *ispp; + size_t label_len; + char *label = NULL; - if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) + if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { isp = smk_of_inode(inode); - else { + } else if (strcmp(name, XATTR_SMACK_TRANSMUTE) == 0) { + ispp = inode->i_security; + if (ispp->smk_flags & SMK_INODE_TRANSMUTE) + label = TRANS_TRUE; + else + label = ""; + } else { /* * The rest of the Smack xattrs are only on sockets. */ @@ -1515,13 +1524,18 @@ static int smack_inode_getsecurity(struct inode *inode, return -EOPNOTSUPP; } + if (!label) + label = isp->smk_known; + + label_len = strlen(label); + if (alloc) { - *buffer = kstrdup(isp->smk_known, GFP_KERNEL); + *buffer = kstrdup(label, GFP_KERNEL); if (*buffer == NULL) return -ENOMEM; } - return strlen(isp->smk_known); + return label_len; } From 3307dd6c104e83a2f9d4a703ae06c8a8d9c11467 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 28 Sep 2023 18:51:38 -0700 Subject: [PATCH 48/94] smack: Record transmuting in smk_transmuted commit 2c085f3a8f23c9b444e8b99d93c15d7ce870fc4e upstream. smack_dentry_create_files_as() determines whether transmuting should occur based on the label of the parent directory the new inode will be added to, and not the label of the directory where it is created. This helps for example to do transmuting on overlayfs, since the latter first creates the inode in the working directory, and then moves it to the correct destination. However, despite smack_dentry_create_files_as() provides the correct label, smack_inode_init_security() does not know from passed information whether or not transmuting occurred. Without this information, smack_inode_init_security() cannot set SMK_INODE_CHANGED in smk_flags, which will result in the SMACK64TRANSMUTE xattr not being set in smack_d_instantiate(). Thus, add the smk_transmuted field to the task_smack structure, and set it in smack_dentry_create_files_as() to smk_task if transmuting occurred. If smk_task is equal to smk_transmuted in smack_inode_init_security(), act as if transmuting was successful but without taking the label from the parent directory (the inode label was already set correctly from the current credentials in smack_inode_alloc_security()). Signed-off-by: Roberto Sassu Signed-off-by: Casey Schaufler [4.19: adjusted for the lack of helper functions] Fixes: d6d80cb57be4 ("Smack: Base support for overlayfs") Signed-off-by: Munehisa Kamata Signed-off-by: Sasha Levin --- security/smack/smack.h | 1 + security/smack/smack_lsm.c | 41 +++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/security/smack/smack.h b/security/smack/smack.h index f7db791fb566..62aa4bc25426 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -120,6 +120,7 @@ struct inode_smack { struct task_smack { struct smack_known *smk_task; /* label for access control */ struct smack_known *smk_forked; /* label when forked */ + struct smack_known *smk_transmuted;/* label when transmuted */ struct list_head smk_rules; /* per task access rules */ struct mutex smk_rules_lock; /* lock for the rules */ struct list_head smk_relabel; /* transit allowed labels */ diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index db729834d8ba..266eb8ca3381 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1032,8 +1032,9 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, const char **name, void **value, size_t *len) { + struct task_smack *tsp = current_security(); struct inode_smack *issp = inode->i_security; - struct smack_known *skp = smk_of_current(); + struct smack_known *skp = smk_of_task(tsp); struct smack_known *isp = smk_of_inode(inode); struct smack_known *dsp = smk_of_inode(dir); int may; @@ -1042,20 +1043,34 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir, *name = XATTR_SMACK_SUFFIX; if (value && len) { - rcu_read_lock(); - may = smk_access_entry(skp->smk_known, dsp->smk_known, - &skp->smk_rules); - rcu_read_unlock(); + /* + * If equal, transmuting already occurred in + * smack_dentry_create_files_as(). No need to check again. + */ + if (tsp->smk_task != tsp->smk_transmuted) { + rcu_read_lock(); + may = smk_access_entry(skp->smk_known, dsp->smk_known, + &skp->smk_rules); + rcu_read_unlock(); + } /* - * If the access rule allows transmutation and - * the directory requests transmutation then - * by all means transmute. + * In addition to having smk_task equal to smk_transmuted, + * if the access rule allows transmutation and the directory + * requests transmutation then by all means transmute. * Mark the inode as changed. */ - if (may > 0 && ((may & MAY_TRANSMUTE) != 0) && - smk_inode_transmutable(dir)) { - isp = dsp; + if ((tsp->smk_task == tsp->smk_transmuted) || + (may > 0 && ((may & MAY_TRANSMUTE) != 0) && + smk_inode_transmutable(dir))) { + /* + * The caller of smack_dentry_create_files_as() + * should have overridden the current cred, so the + * inode label was already set correctly in + * smack_inode_alloc_security(). + */ + if (tsp->smk_task != tsp->smk_transmuted) + isp = dsp; issp->smk_flags |= SMK_INODE_CHANGED; } @@ -4677,8 +4692,10 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode, * providing access is transmuting use the containing * directory label instead of the process label. */ - if (may > 0 && (may & MAY_TRANSMUTE)) + if (may > 0 && (may & MAY_TRANSMUTE)) { ntsp->smk_task = isp->smk_inode; + ntsp->smk_transmuted = ntsp->smk_task; + } } return 0; } From c334650150c29234b0923476f51573ae1b2f252a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Sep 2023 01:25:55 +0300 Subject: [PATCH 49/94] serial: 8250_port: Check IRQ data before use commit cce7fc8b29961b64fadb1ce398dc5ff32a79643b upstream. In case the leaf driver wants to use IRQ polling (irq = 0) and IIR register shows that an interrupt happened in the 8250 hardware the IRQ data can be NULL. In such a case we need to skip the wake event as we came to this path from the timer interrupt and quite likely system is already awake. Without this fix we have got an Oops: serial8250: ttyS0 at I/O 0x3f8 (irq = 0, base_baud = 115200) is a 16550A ... BUG: kernel NULL pointer dereference, address: 0000000000000010 RIP: 0010:serial8250_handle_irq+0x7c/0x240 Call Trace: ? serial8250_handle_irq+0x7c/0x240 ? __pfx_serial8250_timeout+0x10/0x10 Fixes: 0ba9e3a13c6a ("serial: 8250: Add missing wakeup event reporting") Cc: stable Signed-off-by: Andy Shevchenko Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230831222555.614426-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index d3161be35b1b..7f5d51de622d 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1910,7 +1910,10 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) skip_rx = true; if (status & (UART_LSR_DR | UART_LSR_BI) && !skip_rx) { - if (irqd_is_wakeup_set(irq_get_irq_data(port->irq))) + struct irq_data *d; + + d = irq_get_irq_data(port->irq); + if (d && irqd_is_wakeup_set(d)) pm_wakeup_event(tport->tty->dev, 0); if (!up->dma || handle_rx_dma(up, iir)) status = serial8250_rx_chars(up, status); From bb61224f6abc8e71bfdf06d7c984e23460875f5b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Sep 2023 23:17:31 +0900 Subject: [PATCH 50/94] nilfs2: fix potential use after free in nilfs_gccache_submit_read_data() commit 7ee29facd8a9c5a26079148e36bcf07141b3a6bc upstream. In nilfs_gccache_submit_read_data(), brelse(bh) is called to drop the reference count of bh when the call to nilfs_dat_translate() fails. If the reference count hits 0 and its owner page gets unlocked, bh may be freed. However, bh->b_page is dereferenced to put the page after that, which may result in a use-after-free bug. This patch moves the release operation after unlocking and putting the page. NOTE: The function in question is only called in GC, and in combination with current userland tools, address translation using DAT does not occur in that function, so the code path that causes this issue will not be executed. However, it is possible to run that code path by intentionally modifying the userland GC library or by calling the GC ioctl directly. [konishi.ryusuke@gmail.com: NOTE added to the commit log] Link: https://lkml.kernel.org/r/1543201709-53191-1-git-send-email-bianpan2016@163.com Link: https://lkml.kernel.org/r/20230921141731.10073-1-konishi.ryusuke@gmail.com Fixes: a3d93f709e89 ("nilfs2: block cache for garbage collection") Signed-off-by: Pan Bian Reported-by: Ferry Meng Closes: https://lkml.kernel.org/r/20230818092022.111054-1-mengferry@linux.alibaba.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/gcinode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 114774ac2185..cef46650102e 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, struct the_nilfs *nilfs = inode->i_sb->s_fs_info; err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); - if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ - brelse(bh); + if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */ goto failed; - } } lock_buffer(bh); @@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, failed: unlock_page(bh->b_page); put_page(bh->b_page); + if (unlikely(err)) + brelse(bh); return err; } From 4546f1c4d3353823b3db0c5849de5bbadd687e0c Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 7 Sep 2023 15:24:34 +0800 Subject: [PATCH 51/94] ALSA: hda: Disable power save for solving pop issue on Lenovo ThinkCentre M70q commit 057a28ef93bdbe84326d34cdb5543afdaab49fe1 upstream. Lenovo ThinkCentre M70q had boot up pop noise. Disable power save will solve pop issue. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/315900e2efef42fd9855eacfeb443abd@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 40d596248fab..aaca36250dda 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2364,6 +2364,7 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), + SND_PCI_QUIRK(0x17aa, 0x316e, "Lenovo ThinkCentre M70q", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1689623 */ SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ From 0994f3e464ff05793438d6f20c5658fc9bc22054 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 18 Sep 2023 22:24:50 +0200 Subject: [PATCH 52/94] ata: libata-scsi: ignore reserved bits for REPORT SUPPORTED OPERATION CODES commit 3ef600923521616ebe192c893468ad0424de2afb upstream. For REPORT SUPPORTED OPERATION CODES command, the service action field is defined as bits 0-4 in the second byte in the CDB. Bits 5-7 in the second byte are reserved. Only look at the service action field in the second byte when determining if the MAINTENANCE IN opcode is a REPORT SUPPORTED OPERATION CODES command. This matches how we only look at the service action field in the second byte when determining if the SERVICE ACTION IN(16) opcode is a READ CAPACITY(16) command (reserved bits 5-7 in the second byte are ignored). Fixes: 7b2030942859 ("libata: Add support for SCT Write Same") Cc: stable@vger.kernel.org Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2295b74714e1..dc5223981270 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4561,7 +4561,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) break; case MAINTENANCE_IN: - if (scsicmd[1] == MI_REPORT_SUPPORTED_OPERATION_CODES) + if ((scsicmd[1] & 0x1f) == MI_REPORT_SUPPORTED_OPERATION_CODES) ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in); else ata_scsi_set_invalid_field(dev, cmd, 1, 0xff); From 602f3633d9043762625b7d0c718823e0607b023e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 14 Sep 2023 23:08:44 +0200 Subject: [PATCH 53/94] i2c: i801: unregister tco_pdev in i801_probe() error path commit 3914784553f68c931fc666dbe7e86fe881aada38 upstream. We have to unregister tco_pdev also if i2c_add_adapter() fails. Fixes: 9424693035a5 ("i2c: i801: Create iTCO device on newer Intel PCHs") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Mika Westerberg Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index fb0ddaad87d2..6017f6aeee89 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1679,6 +1679,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) "SMBus I801 adapter at %04lx", priv->smba); err = i2c_add_adapter(&priv->adapter); if (err) { + platform_device_unregister(priv->tco_pdev); i801_acpi_remove(priv); return err; } From 88fba1c785e4f62acf2b09ab55ed3a1b3a15c51b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 18 Sep 2023 10:34:51 -0400 Subject: [PATCH 54/94] btrfs: properly report 0 avail for very full file systems commit 58bfe2ccec5f9f137b41dd38f335290dcc13cd5c upstream. A user reported some issues with smaller file systems that get very full. While investigating this issue I noticed that df wasn't showing 100% full, despite having 0 chunk space and having < 1MiB of available metadata space. This turns out to be an overflow issue, we're doing: total_available_metadata_space - SZ_4M < global_block_rsv_size to determine if there's not enough space to make metadata allocations, which overflows if total_available_metadata_space is < 4M. Fix this by checking to see if our available space is greater than the 4M threshold. This makes df properly report 100% usage on the file system. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 521f6c2091ad..a59543951851 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2196,7 +2196,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) * calculated f_bavail. */ if (!mixed && block_rsv->space_info->full && - total_free_meta - thresh < block_rsv->size) + (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size)) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; From 4c97395712ca35f50a222886bdf3004433a22405 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 13 Sep 2023 08:26:47 +0300 Subject: [PATCH 55/94] net: thunderbolt: Fix TCPv6 GSO checksum calculation commit e0b65f9b81fef180cf5f103adecbe5505c961153 upstream. Alex reported that running ssh over IPv6 does not work with Thunderbolt/USB4 networking driver. The reason for that is that driver should call skb_is_gso() before calling skb_is_gso_v6(), and it should not return false after calculates the checksum successfully. This probably was a copy paste error from the original driver where it was done properly. Reported-by: Alex Balcanquall Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/thunderbolt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 51b5442fbc66..e0b4c54e6c08 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -961,12 +961,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, ip_hdr(skb)->protocol, 0); - } else if (skb_is_gso_v6(skb)) { + } else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) { tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - return false; } else if (protocol == htons(ETH_P_IPV6)) { tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset; *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, From af72179da9a317269da808c2b8f853cb2b90a376 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Sep 2023 20:38:13 +0900 Subject: [PATCH 56/94] ata: libata-core: Fix ata_port_request_pm() locking commit 3b8e0af4a7a331d1510e963b8fd77e2fca0a77f1 upstream. The function ata_port_request_pm() checks the port flag ATA_PFLAG_PM_PENDING and calls ata_port_wait_eh() if this flag is set to ensure that power management operations for a port are not scheduled simultaneously. However, this flag check is done without holding the port lock. Fix this by taking the port lock on entry to the function and checking the flag under this lock. The lock is released and re-taken if ata_port_wait_eh() needs to be called. The two WARN_ON() macros checking that the ATA_PFLAG_PM_PENDING flag was cleared are removed as the first call is racy and the second one done without holding the port lock. Fixes: 5ef41082912b ("ata: add ata port system PM callbacks") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Chia-Lin Kao (AceLan) Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Reviewed-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4a7da8f744e0..7f7ccfd88920 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5756,17 +5756,19 @@ static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, struct ata_link *link; unsigned long flags; - /* Previous resume operation might still be in - * progress. Wait for PM_PENDING to clear. - */ - if (ap->pflags & ATA_PFLAG_PM_PENDING) { - ata_port_wait_eh(ap); - WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); - } - - /* request PM ops to EH */ spin_lock_irqsave(ap->lock, flags); + /* + * A previous PM operation might still be in progress. Wait for + * ATA_PFLAG_PM_PENDING to clear. + */ + if (ap->pflags & ATA_PFLAG_PM_PENDING) { + spin_unlock_irqrestore(ap->lock, flags); + ata_port_wait_eh(ap); + spin_lock_irqsave(ap->lock, flags); + } + + /* Request PM operation to EH */ ap->pm_mesg = mesg; ap->pflags |= ATA_PFLAG_PM_PENDING; ata_for_each_link(link, ap, HOST_FIRST) { @@ -5778,10 +5780,8 @@ static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, spin_unlock_irqrestore(ap->lock, flags); - if (!async) { + if (!async) ata_port_wait_eh(ap); - WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); - } } /* From 040251185b9d47b3baf7b371a56db3e55137d479 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 26 Aug 2023 13:07:36 +0900 Subject: [PATCH 57/94] ata: libata-core: Fix port and device removal commit 84d76529c650f887f1e18caee72d6f0589e1baf9 upstream. Whenever an ATA adapter driver is removed (e.g. rmmod), ata_port_detach() is called repeatedly for all the adapter ports to remove (unload) the devices attached to the port and delete the port device itself. Removing of devices is done using libata EH with the ATA_PFLAG_UNLOADING port flag set. This causes libata EH to execute ata_eh_unload() which disables all devices attached to the port. ata_port_detach() finishes by calling scsi_remove_host() to remove the scsi host associated with the port. This function will trigger the removal of all scsi devices attached to the host and in the case of disks, calls to sd_shutdown() which will flush the device write cache and stop the device. However, given that the devices were already disabled by ata_eh_unload(), the synchronize write cache command and start stop unit commands fail. E.g. running "rmmod ahci" with first removing sd_mod results in error messages like: ata13.00: disable device sd 0:0:0:0: [sda] Synchronizing SCSI cache sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK sd 0:0:0:0: [sda] Stopping disk sd 0:0:0:0: [sda] Start/Stop Unit failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK Fix this by removing all scsi devices of the ata devices connected to the port before scheduling libata EH to disable the ATA devices. Fixes: 720ba12620ee ("[PATCH] libata-hp: update unload-unplug") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Chia-Lin Kao (AceLan) Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 7f7ccfd88920..b7471f732402 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6750,11 +6750,30 @@ static void ata_port_detach(struct ata_port *ap) if (!ap->ops->error_handler) goto skip_eh; - /* tell EH we're leaving & flush EH */ + /* Wait for any ongoing EH */ + ata_port_wait_eh(ap); + + mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); + + /* Remove scsi devices */ + ata_for_each_link(link, ap, HOST_FIRST) { + ata_for_each_dev(dev, link, ALL) { + if (dev->sdev) { + spin_unlock_irqrestore(ap->lock, flags); + scsi_remove_device(dev->sdev); + spin_lock_irqsave(ap->lock, flags); + dev->sdev = NULL; + } + } + } + + /* Tell EH to disable all devices */ ap->pflags |= ATA_PFLAG_UNLOADING; ata_port_schedule_eh(ap); + spin_unlock_irqrestore(ap->lock, flags); + mutex_unlock(&ap->scsi_scan_mutex); /* wait till EH commits suicide */ ata_port_wait_eh(ap); From 678cb24e70ae69a01e43687bffa51a2041eb0b9a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 8 Sep 2023 20:04:52 +0900 Subject: [PATCH 58/94] ata: libata-core: Do not register PM operations for SAS ports commit 75e2bd5f1ede42a2bc88aa34b431e1ace8e0bea0 upstream. libsas does its own domain based power management of ports. For such ports, libata should not use a device type defining power management operations as executing these operations for suspend/resume in addition to libsas calls to ata_sas_port_suspend() and ata_sas_port_resume() is not necessary (and likely dangerous to do, even though problems are not seen currently). Introduce the new ata_port_sas_type device_type for ports managed by libsas. This new device type is used in ata_tport_add() and is defined without power management operations. Fixes: 2fcbdcb4c802 ("[SCSI] libata: export ata_port suspend/resume infrastructure for sas") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Chia-Lin Kao (AceLan) Tested-by: Geert Uytterhoeven Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 2 +- drivers/ata/libata-transport.c | 9 ++++++++- drivers/ata/libata.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b7471f732402..c563b9545c6c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5947,7 +5947,7 @@ void ata_host_resume(struct ata_host *host) #endif const struct device_type ata_port_type = { - .name = "ata_port", + .name = ATA_PORT_TYPE_NAME, #ifdef CONFIG_PM .pm = &ata_port_pm_ops, #endif diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index f04f4f977400..5eae76d8b655 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -266,6 +266,10 @@ void ata_tport_delete(struct ata_port *ap) put_device(dev); } +static const struct device_type ata_port_sas_type = { + .name = ATA_PORT_TYPE_NAME, +}; + /** ata_tport_add - initialize a transport ATA port structure * * @parent: parent device @@ -283,7 +287,10 @@ int ata_tport_add(struct device *parent, struct device *dev = &ap->tdev; device_initialize(dev); - dev->type = &ata_port_type; + if (ap->flags & ATA_FLAG_SAS_HOST) + dev->type = &ata_port_sas_type; + else + dev->type = &ata_port_type; dev->parent = parent; ata_host_get(ap->host); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index f953cb4bb1ba..b568d6b9350a 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -46,6 +46,8 @@ enum { ATA_DNXFER_QUIET = (1 << 31), }; +#define ATA_PORT_TYPE_NAME "ata_port" + extern atomic_t ata_print_id; extern int atapi_passthru16; extern int libata_fua; From 474f306e739d81c4ecb148ae94f2c85415157e23 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 22 Sep 2023 22:55:16 +0200 Subject: [PATCH 59/94] ata: libata-sata: increase PMP SRST timeout to 10s commit 753a4d531bc518633ea88ac0ed02b25a16823d51 upstream. On certain SATA controllers, softreset fails after wakeup from S2RAM with the message "softreset failed (1st FIS failed)", sometimes resulting in drives not being detected again. With the increased timeout, this issue is avoided. Instead, "softreset failed (device not ready)" is now logged 1-2 times; this later failure seems to cause fewer problems however, and the drives are detected reliably once they've spun up and the probe is retried. The issue was observed with the primary SATA controller of the QNAP TS-453B, which is an "Intel Corporation Celeron/Pentium Silver Processor SATA Controller [8086:31e3] (rev 06)" integrated in the Celeron J4125 CPU, and the following drives: - Seagate IronWolf ST12000VN0008 - Seagate IronWolf ST8000NE0004 The SATA controller seems to be more relevant to this issue than the drives, as the same drives are always detected reliably on the secondary SATA controller on the same board (an ASMedia 106x) without any "softreset failed" errors even without the increased timeout. Fixes: e7d3ef13d52a ("libata: change drive ready wait after hard reset to 5s") Cc: stable@vger.kernel.org Signed-off-by: Matthias Schiffer Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/libata.h b/include/linux/libata.h index 73cd0182452c..af50f37d934b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -311,7 +311,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 5000, + ATA_TMOUT_PMP_SRST_WAIT = 10000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that From 3dc98986373e703a2b463e5963ff978469c87b0f Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Thu, 7 Sep 2023 11:18:08 +1000 Subject: [PATCH 60/94] fs: binfmt_elf_efpic: fix personality for ELF-FDPIC commit 7c3151585730b7095287be8162b846d31e6eee61 upstream. The elf-fdpic loader hard sets the process personality to either PER_LINUX_FDPIC for true elf-fdpic binaries or to PER_LINUX for normal ELF binaries (in this case they would be constant displacement compiled with -pie for example). The problem with that is that it will lose any other bits that may be in the ELF header personality (such as the "bug emulation" bits). On the ARM architecture the ADDR_LIMIT_32BIT flag is used to signify a normal 32bit binary - as opposed to a legacy 26bit address binary. This matters since start_thread() will set the ARM CPSR register as required based on this flag. If the elf-fdpic loader loses this bit the process will be mis-configured and crash out pretty quickly. Modify elf-fdpic loader personality setting so that it preserves the upper three bytes by using the SET_PERSONALITY macro to set it. This macro in the generic case sets PER_LINUX and preserves the upper bytes. Architectures can override this for their specific use case, and ARM does exactly this. The problem shows up quite easily running under qemu using the ARM architecture, but not necessarily on all types of real ARM hardware. If the underlying ARM processor does not support the legacy 26-bit addressing mode then everything will work as expected. Link: https://lkml.kernel.org/r/20230907011808.2985083-1-gerg@kernel.org Fixes: 1bde925d23547 ("fs/binfmt_elf_fdpic.c: provide NOMMU loader for regular ELF binaries") Signed-off-by: Greg Ungerer Cc: Al Viro Cc: Christian Brauner Cc: Eric W. Biederman Cc: Greg Ungerer Cc: Kees Cook Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_elf_fdpic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 64d0b838085d..a7c2efcd0a4a 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -349,10 +349,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* there's now no turning back... the old userspace image is dead, * defunct, deceased, etc. */ + SET_PERSONALITY(exec_params.hdr); if (elf_check_fdpic(&exec_params.hdr)) - set_personality(PER_LINUX_FDPIC); - else - set_personality(PER_LINUX); + current->personality |= PER_LINUX_FDPIC; if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; From 996f30b44369fa2d1fe62d30a14ef27e05b30448 Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Thu, 3 Aug 2023 14:09:38 +0800 Subject: [PATCH 61/94] ext4: fix rec_len verify error commit 7fda67e8c3ab6069f75888f67958a6d30454a9f6 upstream. With the configuration PAGE_SIZE 64k and filesystem blocksize 64k, a problem occurred when more than 13 million files were directly created under a directory: EXT4-fs error (device xx): ext4_dx_csum_set:492: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D. EXT4-fs error (device xx): ext4_dx_csum_verify:463: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D. EXT4-fs error (device xx): dx_probe:856: inode #xxxx: block 8188: comm xxxxx: Directory index failed checksum When enough files are created, the fake_dirent->reclen will be 0xffff. it doesn't equal to the blocksize 65536, i.e. 0x10000. But it is not the same condition when blocksize equals to 4k. when enough files are created, the fake_dirent->reclen will be 0x1000. it equals to the blocksize 4k, i.e. 0x1000. The problem seems to be related to the limitation of the 16-bit field when the blocksize is set to 64k. To address this, helpers like ext4_rec_len_{from,to}_disk has already been introduced to complete the conversion between the encoded and the plain form of rec_len. So fix this one by using the helper, and all the other in this file too. Cc: stable@kernel.org Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes") Suggested-by: Andreas Dilger Suggested-by: Darrick J. Wong Signed-off-by: Shida Zhang Reviewed-by: Andreas Dilger Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20230803060938.1929759-1-zhangshida@kylinos.cn Signed-off-by: Theodore Ts'o Signed-off-by: Shida Zhang Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index db9bba3473b5..93d392576c12 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -322,17 +322,17 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, struct ext4_dir_entry *de) { struct ext4_dir_entry_tail *t; + int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); #ifdef PARANOID struct ext4_dir_entry *d, *top; d = de; top = (struct ext4_dir_entry *)(((void *)de) + - (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); - while (d < top && d->rec_len) + (blocksize - sizeof(struct ext4_dir_entry_tail))); + while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize)) d = (struct ext4_dir_entry *)(((void *)d) + - le16_to_cpu(d->rec_len)); + ext4_rec_len_from_disk(d->rec_len, blocksize)); if (d != top) return NULL; @@ -343,7 +343,8 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, #endif if (t->det_reserved_zero1 || - le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || + (ext4_rec_len_from_disk(t->det_rec_len, blocksize) != + sizeof(struct ext4_dir_entry_tail)) || t->det_reserved_zero2 || t->det_reserved_ft != EXT4_FT_DIR_CSUM) return NULL; @@ -425,13 +426,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode, struct ext4_dir_entry *dp; struct dx_root_info *root; int count_offset; + int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); + unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize); - if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) + if (rlen == blocksize) count_offset = 8; - else if (le16_to_cpu(dirent->rec_len) == 12) { + else if (rlen == 12) { dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); - if (le16_to_cpu(dp->rec_len) != - EXT4_BLOCK_SIZE(inode->i_sb) - 12) + if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12) return NULL; root = (struct dx_root_info *)(((void *)dp + 12)); if (root->reserved_zero || @@ -1244,6 +1246,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh, unsigned int buflen = bh->b_size; char *base = bh->b_data; struct dx_hash_info h = *hinfo; + int blocksize = EXT4_BLOCK_SIZE(dir->i_sb); if (ext4_has_metadata_csum(dir->i_sb)) buflen -= sizeof(struct ext4_dir_entry_tail); @@ -1257,11 +1260,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh, map_tail--; map_tail->hash = h.hash; map_tail->offs = ((char *) de - base)>>2; - map_tail->size = le16_to_cpu(de->rec_len); + map_tail->size = ext4_rec_len_from_disk(de->rec_len, + blocksize); count++; cond_resched(); } - de = ext4_next_entry(de, dir->i_sb->s_blocksize); + de = ext4_next_entry(de, blocksize); } return count; } From 6b6d459bbf6e7365d39c7794c7b556a9bad5d217 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 4 Sep 2023 22:42:56 +0200 Subject: [PATCH 62/94] ata: libata: disallow dev-initiated LPM transitions to unsupported states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 24e0e61db3cb86a66824531989f1df80e0939f26 upstream. In AHCI 1.3.1, the register description for CAP.SSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Slumber state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Slumber requests." In AHCI 1.3.1, the register description for CAP.PSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Partial state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Partial requests." Ensure that we always set the corresponding bits in PxSCTL.IPM, such that a device is not allowed to initiate transitions to power states which are unsupported by the HBA. DevSleep is always initiated by the HBA, however, for completeness, set the corresponding bit in PxSCTL.IPM such that agressive link power management cannot transition to DevSleep if DevSleep is not supported. sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp. However, only libahci has the ability to read the CAP/CAP2 register to see if these features are supported. Therefore, in order to not introduce any regressions on ata_piix or libata-pmp, create flags that indicate that the respective feature is NOT supported. This way, the behavior for ata_piix and libata-pmp should remain unchanged. This change is based on a patch originally submitted by Runa Guo-oc. Signed-off-by: Niklas Cassel Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 9 +++++++++ drivers/ata/libata-core.c | 19 ++++++++++++++++--- include/linux/libata.h | 4 ++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 2e4bcf2d0a51..ab3ea47ecce3 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1864,6 +1864,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n"); + if (!(hpriv->cap & HOST_CAP_PART)) + host->flags |= ATA_HOST_NO_PART; + + if (!(hpriv->cap & HOST_CAP_SSC)) + host->flags |= ATA_HOST_NO_SSC; + + if (!(hpriv->cap2 & HOST_CAP2_SDS)) + host->flags |= ATA_HOST_NO_DEVSLP; + if (pi.flags & ATA_FLAG_EM) ahci_reset_em(host); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c563b9545c6c..2b9f6769f80d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3997,10 +3997,23 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, case ATA_LPM_MED_POWER_WITH_DIPM: case ATA_LPM_MIN_POWER_WITH_PARTIAL: case ATA_LPM_MIN_POWER: - if (ata_link_nr_enabled(link) > 0) - /* no restrictions on LPM transitions */ + if (ata_link_nr_enabled(link) > 0) { + /* assume no restrictions on LPM transitions */ scontrol &= ~(0x7 << 8); - else { + + /* + * If the controller does not support partial, slumber, + * or devsleep, then disallow these transitions. + */ + if (link->ap->host->flags & ATA_HOST_NO_PART) + scontrol |= (0x1 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_SSC) + scontrol |= (0x2 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_DEVSLP) + scontrol |= (0x4 << 8); + } else { /* empty port, power off */ scontrol &= ~0xf; scontrol |= (0x1 << 2); diff --git a/include/linux/libata.h b/include/linux/libata.h index af50f37d934b..361a52e418f7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -278,6 +278,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ From ce63d45f45ae8b03f28b3329f6b6e4d072f7d2c5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 7 Oct 2023 13:46:47 +0200 Subject: [PATCH 63/94] Revert "drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions" This reverts commit 3ce2cd63e8ee037644db0cbea65e6c40ab6cc178 which is commit aa838896d87af561a33ecefea1caa4c15a68bc47 upstream. Ben writes: When I looked into the referenced security issue, it seemed to only be exploitable through wakelock names, and in the upstream kernel only after commit c8377adfa781 "PM / wakeup: Show wakeup sources stats in sysfs" (first included in 5.4). So I would be interested to know if and why a fix was needed for 4.19. More importantly, this backported version uniformly converts to sysfs_emit(), but there are 3 places sysfs_emit_at() must be used instead: Reported-by: Ben Hutchings Link: https://lore.kernel.org/r/95831df76c41a53bc3e1ac8ece64915dd63763a1.camel@decadent.org.uk Cc: Joe Perches Cc: Brennan Lamoreaux Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 2 +- drivers/base/cacheinfo.c | 18 ++++----- drivers/base/core.c | 8 ++-- drivers/base/cpu.c | 34 ++++++++--------- drivers/base/firmware_loader/fallback.c | 2 +- drivers/base/memory.c | 24 ++++++------ drivers/base/node.c | 34 ++++++++--------- drivers/base/platform.c | 2 +- drivers/base/power/sysfs.c | 50 ++++++++++++------------- drivers/base/soc.c | 8 ++-- 10 files changed, 91 insertions(+), 91 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index d89f618231cb..e7cb0c6ade81 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -44,7 +44,7 @@ static ssize_t cpu_capacity_show(struct device *dev, { struct cpu *cpu = container_of(dev, struct cpu, dev); - return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id)); + return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id)); } static ssize_t cpu_capacity_store(struct device *dev, diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 51eb403f89de..ce015ce2977c 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -372,7 +372,7 @@ static ssize_t size_show(struct device *dev, { struct cacheinfo *this_leaf = dev_get_drvdata(dev); - return sysfs_emit(buf, "%uK\n", this_leaf->size >> 10); + return sprintf(buf, "%uK\n", this_leaf->size >> 10); } static ssize_t shared_cpumap_show_func(struct device *dev, bool list, char *buf) @@ -402,11 +402,11 @@ static ssize_t type_show(struct device *dev, switch (this_leaf->type) { case CACHE_TYPE_DATA: - return sysfs_emit(buf, "Data\n"); + return sprintf(buf, "Data\n"); case CACHE_TYPE_INST: - return sysfs_emit(buf, "Instruction\n"); + return sprintf(buf, "Instruction\n"); case CACHE_TYPE_UNIFIED: - return sysfs_emit(buf, "Unified\n"); + return sprintf(buf, "Unified\n"); default: return -EINVAL; } @@ -420,11 +420,11 @@ static ssize_t allocation_policy_show(struct device *dev, int n = 0; if ((ci_attr & CACHE_READ_ALLOCATE) && (ci_attr & CACHE_WRITE_ALLOCATE)) - n = sysfs_emit(buf, "ReadWriteAllocate\n"); + n = sprintf(buf, "ReadWriteAllocate\n"); else if (ci_attr & CACHE_READ_ALLOCATE) - n = sysfs_emit(buf, "ReadAllocate\n"); + n = sprintf(buf, "ReadAllocate\n"); else if (ci_attr & CACHE_WRITE_ALLOCATE) - n = sysfs_emit(buf, "WriteAllocate\n"); + n = sprintf(buf, "WriteAllocate\n"); return n; } @@ -436,9 +436,9 @@ static ssize_t write_policy_show(struct device *dev, int n = 0; if (ci_attr & CACHE_WRITE_THROUGH) - n = sysfs_emit(buf, "WriteThrough\n"); + n = sprintf(buf, "WriteThrough\n"); else if (ci_attr & CACHE_WRITE_BACK) - n = sysfs_emit(buf, "WriteBack\n"); + n = sprintf(buf, "WriteBack\n"); return n; } diff --git a/drivers/base/core.c b/drivers/base/core.c index 0332800dffd8..6e380ad9d08a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -994,7 +994,7 @@ ssize_t device_show_ulong(struct device *dev, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); - return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var)); + return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_ulong); @@ -1019,7 +1019,7 @@ ssize_t device_show_int(struct device *dev, { struct dev_ext_attribute *ea = to_ext_attr(attr); - return sysfs_emit(buf, "%d\n", *(int *)(ea->var)); + return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_int); @@ -1040,7 +1040,7 @@ ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, { struct dev_ext_attribute *ea = to_ext_attr(attr); - return sysfs_emit(buf, "%d\n", *(bool *)(ea->var)); + return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_bool); @@ -1273,7 +1273,7 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, device_lock(dev); val = !dev->offline; device_unlock(dev); - return sysfs_emit(buf, "%u\n", val); + return sprintf(buf, "%u\n", val); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 607fc189e57c..b1bb6f43f896 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -156,7 +156,7 @@ static ssize_t show_crash_notes(struct device *dev, struct device_attribute *att * operation should be safe. No locking required. */ addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum)); - rc = sysfs_emit(buf, "%Lx\n", addr); + rc = sprintf(buf, "%Lx\n", addr); return rc; } static DEVICE_ATTR(crash_notes, 0400, show_crash_notes, NULL); @@ -167,7 +167,7 @@ static ssize_t show_crash_notes_size(struct device *dev, { ssize_t rc; - rc = sysfs_emit(buf, "%zu\n", sizeof(note_buf_t)); + rc = sprintf(buf, "%zu\n", sizeof(note_buf_t)); return rc; } static DEVICE_ATTR(crash_notes_size, 0400, show_crash_notes_size, NULL); @@ -264,7 +264,7 @@ static ssize_t print_cpus_offline(struct device *dev, nr_cpu_ids, total_cpus-1); } - n += sysfs_emit(&buf[n], "\n"); + n += snprintf(&buf[n], len - n, "\n"); return n; } static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); @@ -272,7 +272,7 @@ static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { - int n = 0; + int n = 0, len = PAGE_SIZE-2; cpumask_var_t isolated; if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) @@ -280,7 +280,7 @@ static ssize_t print_cpus_isolated(struct device *dev, cpumask_andnot(isolated, cpu_possible_mask, housekeeping_cpumask(HK_FLAG_DOMAIN)); - n = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated)); + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated)); free_cpumask_var(isolated); @@ -292,9 +292,9 @@ static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); static ssize_t print_cpus_nohz_full(struct device *dev, struct device_attribute *attr, char *buf) { - int n = 0; + int n = 0, len = PAGE_SIZE-2; - n = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); return n; } @@ -328,7 +328,7 @@ static ssize_t print_cpu_modalias(struct device *dev, ssize_t n; u32 i; - n = sysfs_emit(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", + n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", CPU_FEATURE_TYPEVAL); for (i = 0; i < MAX_CPU_FEATURES; i++) @@ -520,56 +520,56 @@ static void __init cpu_dev_register_generic(void) ssize_t __weak cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "Not affected\n"); + return sprintf(buf, "Not affected\n"); } ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 2116926cc1d5..821e27bda4ca 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -215,7 +215,7 @@ static ssize_t firmware_loading_show(struct device *dev, loading = fw_sysfs_loading(fw_sysfs->fw_priv); mutex_unlock(&fw_lock); - return sysfs_emit(buf, "%d\n", loading); + return sprintf(buf, "%d\n", loading); } /* one pages buffer should be mapped/unmapped only once */ diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5dbe00a5c7c1..e270abc86d46 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -121,7 +121,7 @@ static ssize_t show_mem_start_phys_index(struct device *dev, unsigned long phys_index; phys_index = mem->start_section_nr / sections_per_block; - return sysfs_emit(buf, "%08lx\n", phys_index); + return sprintf(buf, "%08lx\n", phys_index); } /* @@ -145,7 +145,7 @@ static ssize_t show_mem_removable(struct device *dev, } out: - return sysfs_emit(buf, "%d\n", ret); + return sprintf(buf, "%d\n", ret); } /* @@ -163,17 +163,17 @@ static ssize_t show_mem_state(struct device *dev, */ switch (mem->state) { case MEM_ONLINE: - len = sysfs_emit(buf, "online\n"); + len = sprintf(buf, "online\n"); break; case MEM_OFFLINE: - len = sysfs_emit(buf, "offline\n"); + len = sprintf(buf, "offline\n"); break; case MEM_GOING_OFFLINE: - len = sysfs_emit(buf, "going-offline\n"); + len = sprintf(buf, "going-offline\n"); break; default: - len = sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", - mem->state); + len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", + mem->state); WARN_ON(1); break; } @@ -384,7 +384,7 @@ static ssize_t show_phys_device(struct device *dev, struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); - return sysfs_emit(buf, "%d\n", mem->phys_device); + return sprintf(buf, "%d\n", mem->phys_device); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -422,7 +422,7 @@ static ssize_t show_valid_zones(struct device *dev, */ if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn)) - return sysfs_emit(buf, "none\n"); + return sprintf(buf, "none\n"); start_pfn = valid_start_pfn; strcat(buf, page_zone(pfn_to_page(start_pfn))->name); goto out; @@ -456,7 +456,7 @@ static ssize_t print_block_size(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%lx\n", get_memory_block_size()); + return sprintf(buf, "%lx\n", get_memory_block_size()); } static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL); @@ -470,9 +470,9 @@ show_auto_online_blocks(struct device *dev, struct device_attribute *attr, char *buf) { if (memhp_auto_online) - return sysfs_emit(buf, "online\n"); + return sprintf(buf, "online\n"); else - return sysfs_emit(buf, "offline\n"); + return sprintf(buf, "offline\n"); } static ssize_t diff --git a/drivers/base/node.c b/drivers/base/node.c index 8defeace001e..60c2e32f9f61 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -69,7 +69,7 @@ static ssize_t node_read_meminfo(struct device *dev, struct sysinfo i; si_meminfo_node(&i, nid); - n = sysfs_emit(buf, + n = sprintf(buf, "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" "Node %d MemUsed: %8lu kB\n" @@ -96,7 +96,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); #ifdef CONFIG_HIGHMEM - n += sysfs_emit(buf + n, + n += sprintf(buf + n, "Node %d HighTotal: %8lu kB\n" "Node %d HighFree: %8lu kB\n" "Node %d LowTotal: %8lu kB\n" @@ -106,7 +106,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(i.totalram - i.totalhigh), nid, K(i.freeram - i.freehigh)); #endif - n += sysfs_emit(buf + n, + n += sprintf(buf + n, "Node %d Dirty: %8lu kB\n" "Node %d Writeback: %8lu kB\n" "Node %d FilePages: %8lu kB\n" @@ -162,19 +162,19 @@ static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); static ssize_t node_read_numastat(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, - "numa_hit %lu\n" - "numa_miss %lu\n" - "numa_foreign %lu\n" - "interleave_hit %lu\n" - "local_node %lu\n" - "other_node %lu\n", - sum_zone_numa_state(dev->id, NUMA_HIT), - sum_zone_numa_state(dev->id, NUMA_MISS), - sum_zone_numa_state(dev->id, NUMA_FOREIGN), - sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), - sum_zone_numa_state(dev->id, NUMA_LOCAL), - sum_zone_numa_state(dev->id, NUMA_OTHER)); + return sprintf(buf, + "numa_hit %lu\n" + "numa_miss %lu\n" + "numa_foreign %lu\n" + "interleave_hit %lu\n" + "local_node %lu\n" + "other_node %lu\n", + sum_zone_numa_state(dev->id, NUMA_HIT), + sum_zone_numa_state(dev->id, NUMA_MISS), + sum_zone_numa_state(dev->id, NUMA_FOREIGN), + sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), + sum_zone_numa_state(dev->id, NUMA_LOCAL), + sum_zone_numa_state(dev->id, NUMA_OTHER)); } static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); @@ -612,7 +612,7 @@ static ssize_t print_nodes_state(enum node_states state, char *buf) { int n; - n = sysfs_emit(buf, "%*pbl", + n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", nodemask_pr_args(&node_states[state])); buf[n++] = '\n'; buf[n] = '\0'; diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 1819da6889a7..2f89e618b142 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -927,7 +927,7 @@ static ssize_t driver_override_show(struct device *dev, ssize_t len; device_lock(dev); - len = sysfs_emit(buf, "%s\n", pdev->driver_override); + len = sprintf(buf, "%s\n", pdev->driver_override); device_unlock(dev); return len; } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index c61b50aa1d81..d713738ce796 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -101,7 +101,7 @@ static const char ctrl_on[] = "on"; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%s\n", + return sprintf(buf, "%s\n", dev->power.runtime_auto ? ctrl_auto : ctrl_on); } @@ -127,7 +127,7 @@ static ssize_t runtime_active_time_show(struct device *dev, int ret; spin_lock_irq(&dev->power.lock); update_pm_runtime_accounting(dev); - ret = sysfs_emit(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies)); + ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies)); spin_unlock_irq(&dev->power.lock); return ret; } @@ -140,7 +140,7 @@ static ssize_t runtime_suspended_time_show(struct device *dev, int ret; spin_lock_irq(&dev->power.lock); update_pm_runtime_accounting(dev); - ret = sysfs_emit(buf, "%i\n", + ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.suspended_jiffies)); spin_unlock_irq(&dev->power.lock); return ret; @@ -175,7 +175,7 @@ static ssize_t runtime_status_show(struct device *dev, return -EIO; } } - return sysfs_emit(buf, p); + return sprintf(buf, p); } static DEVICE_ATTR_RO(runtime_status); @@ -185,7 +185,7 @@ static ssize_t autosuspend_delay_ms_show(struct device *dev, { if (!dev->power.use_autosuspend) return -EIO; - return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay); + return sprintf(buf, "%d\n", dev->power.autosuspend_delay); } static ssize_t autosuspend_delay_ms_store(struct device *dev, @@ -214,11 +214,11 @@ static ssize_t pm_qos_resume_latency_us_show(struct device *dev, s32 value = dev_pm_qos_requested_resume_latency(dev); if (value == 0) - return sysfs_emit(buf, "n/a\n"); + return sprintf(buf, "n/a\n"); if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) value = 0; - return sysfs_emit(buf, "%d\n", value); + return sprintf(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_us_store(struct device *dev, @@ -258,11 +258,11 @@ static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev, s32 value = dev_pm_qos_get_user_latency_tolerance(dev); if (value < 0) - return sysfs_emit(buf, "auto\n"); + return sprintf(buf, "auto\n"); if (value == PM_QOS_LATENCY_ANY) - return sysfs_emit(buf, "any\n"); + return sprintf(buf, "any\n"); - return sysfs_emit(buf, "%d\n", value); + return sprintf(buf, "%d\n", value); } static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev, @@ -294,8 +294,8 @@ static ssize_t pm_qos_no_power_off_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) - & PM_QOS_FLAG_NO_POWER_OFF)); + return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) + & PM_QOS_FLAG_NO_POWER_OFF)); } static ssize_t pm_qos_no_power_off_store(struct device *dev, @@ -323,9 +323,9 @@ static const char _disabled[] = "disabled"; static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%s\n", device_can_wakeup(dev) - ? (device_may_wakeup(dev) ? _enabled : _disabled) - : ""); + return sprintf(buf, "%s\n", device_can_wakeup(dev) + ? (device_may_wakeup(dev) ? _enabled : _disabled) + : ""); } static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr, @@ -511,7 +511,7 @@ static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); static ssize_t runtime_usage_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count)); + return sprintf(buf, "%d\n", atomic_read(&dev->power.usage_count)); } static DEVICE_ATTR_RO(runtime_usage); @@ -519,8 +519,8 @@ static ssize_t runtime_active_kids_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%d\n", dev->power.ignore_children ? - 0 : atomic_read(&dev->power.child_count)); + return sprintf(buf, "%d\n", dev->power.ignore_children ? + 0 : atomic_read(&dev->power.child_count)); } static DEVICE_ATTR_RO(runtime_active_kids); @@ -528,12 +528,12 @@ static ssize_t runtime_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { if (dev->power.disable_depth && (dev->power.runtime_auto == false)) - return sysfs_emit(buf, "disabled & forbidden\n"); + return sprintf(buf, "disabled & forbidden\n"); if (dev->power.disable_depth) - return sysfs_emit(buf, "disabled\n"); + return sprintf(buf, "disabled\n"); if (dev->power.runtime_auto == false) - return sysfs_emit(buf, "forbidden\n"); - return sysfs_emit(buf, "enabled\n"); + return sprintf(buf, "forbidden\n"); + return sprintf(buf, "enabled\n"); } static DEVICE_ATTR_RO(runtime_enabled); @@ -541,9 +541,9 @@ static DEVICE_ATTR_RO(runtime_enabled); static ssize_t async_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%s\n", - device_async_suspend_enabled(dev) ? - _enabled : _disabled); + return sprintf(buf, "%s\n", + device_async_suspend_enabled(dev) ? + _enabled : _disabled); } static ssize_t async_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/base/soc.c b/drivers/base/soc.c index 23bc9eb794a2..7e91894a380b 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -72,13 +72,13 @@ static ssize_t soc_info_get(struct device *dev, struct soc_device *soc_dev = container_of(dev, struct soc_device, dev); if (attr == &dev_attr_machine) - return sysfs_emit(buf, "%s\n", soc_dev->attr->machine); + return sprintf(buf, "%s\n", soc_dev->attr->machine); if (attr == &dev_attr_family) - return sysfs_emit(buf, "%s\n", soc_dev->attr->family); + return sprintf(buf, "%s\n", soc_dev->attr->family); if (attr == &dev_attr_revision) - return sysfs_emit(buf, "%s\n", soc_dev->attr->revision); + return sprintf(buf, "%s\n", soc_dev->attr->revision); if (attr == &dev_attr_soc_id) - return sysfs_emit(buf, "%s\n", soc_dev->attr->soc_id); + return sprintf(buf, "%s\n", soc_dev->attr->soc_id); return -EINVAL; From ed829c9e67470baa7400b7455bf0098176ed4c6f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 7 Oct 2023 13:50:27 +0200 Subject: [PATCH 64/94] media: dvb: symbol fixup for dvb_attach() - again In commit f296b374b9c1 ("media: dvb: symbol fixup for dvb_attach()") in the 4.19.y tree, a few symbols were missed due to files being renamed in newer kernel versions. Fix this up by properly marking up the sp8870_attach and xc2028_attach symbols. Reported-by: Ben Hutchings Link: https://lore.kernel.org/r/b12435b2311ada131db05d3cf195b4b5d87708eb.camel@decadent.org.uk Fixes: f296b374b9c1 ("media: dvb: symbol fixup for dvb_attach()") Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/sp8870.c | 2 +- drivers/media/tuners/tuner-xc2028.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c index 3a577788041d..c55bcd809458 100644 --- a/drivers/media/dvb-frontends/sp8870.c +++ b/drivers/media/dvb-frontends/sp8870.c @@ -619,4 +619,4 @@ MODULE_DESCRIPTION("Spase SP8870 DVB-T Demodulator driver"); MODULE_AUTHOR("Juergen Peitz"); MODULE_LICENSE("GPL"); -EXPORT_SYMBOL(sp8870_attach); +EXPORT_SYMBOL_GPL(sp8870_attach); diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index aa6861dcd3fd..802a4d77f26b 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1513,7 +1513,7 @@ struct dvb_frontend *xc2028_attach(struct dvb_frontend *fe, return NULL; } -EXPORT_SYMBOL(xc2028_attach); +EXPORT_SYMBOL_GPL(xc2028_attach); MODULE_DESCRIPTION("Xceive xc2028/xc3028 tuner driver"); MODULE_AUTHOR("Michel Ludwig "); From f736f1e791dbd953f920dbfaa37c9c689aa962ef Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 7 Oct 2023 13:57:34 +0200 Subject: [PATCH 65/94] Revert "PCI: qcom: Disable write access to read only registers for IP v2.3.3" This reverts commit 3a4ecf4c9d793d0ecd07fc49cd76a2e24652d3b7 which is commit a33d700e8eea76c62120cb3dbf5e01328f18319a upstream. It was applied to the incorrect function as the original function the commit changed is not in this kernel branch. Reported-by: Ben Hutchings Link: https://lore.kernel.org/r/f23affddab4d8b3cc07508f2d8735d88d823821d.camel@decadent.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-qcom.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index ea0cd2401d6b..133fad284c9f 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -758,8 +758,6 @@ static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie) if (IS_ERR(res->phy_ahb_reset)) return PTR_ERR(res->phy_ahb_reset); - dw_pcie_dbi_ro_wr_dis(pci); - return 0; } From 7a65ee164a073eab2efd90c671905ef96957149d Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 23 Sep 2023 18:37:23 +0800 Subject: [PATCH 66/94] scsi: zfcp: Fix a double put in zfcp_port_enqueue() commit b481f644d9174670b385c3a699617052cd2a79d3 upstream. When device_register() fails, zfcp_port_release() will be called after put_device(). As a result, zfcp_ccw_adapter_put() will be called twice: one in zfcp_port_release() and one in the error path after device_register(). So the reference on the adapter object is doubly put, which may lead to a premature free. Fix this by adjusting the error tag after device_register(). Fixes: f3450c7b9172 ("[SCSI] zfcp: Replace local reference counting with common kref") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20230923103723.10320-1-dinghao.liu@zju.edu.cn Acked-by: Benjamin Block Cc: stable@vger.kernel.org # v2.6.33+ Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_aux.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index d1b531fe9ada..e5697ae9b4a3 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -493,12 +493,12 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, if (port) { put_device(&port->dev); retval = -EEXIST; - goto err_out; + goto err_put; } port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL); if (!port) - goto err_out; + goto err_put; rwlock_init(&port->unit_list_lock); INIT_LIST_HEAD(&port->unit_list); @@ -521,7 +521,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) { kfree(port); - goto err_out; + goto err_put; } retval = -EINVAL; @@ -538,8 +538,9 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, return port; -err_out: +err_put: zfcp_ccw_adapter_put(adapter); +err_out: return ERR_PTR(retval); } From 83f7ba5b02449727f03fbe7daee90f3bfa75cde3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 23 Sep 2023 19:15:59 -0600 Subject: [PATCH 67/94] qed/red_ll2: Fix undefined behavior bug in struct qed_ll2_info commit eea03d18af9c44235865a4bc9bec4d780ef6cf21 upstream. The flexible structure (a structure that contains a flexible-array member at the end) `qed_ll2_tx_packet` is nested within the second layer of `struct qed_ll2_info`: struct qed_ll2_tx_packet { ... /* Flexible Array of bds_set determined by max_bds_per_packet */ struct { struct core_tx_bd *txq_bd; dma_addr_t tx_frag; u16 frag_len; } bds_set[]; }; struct qed_ll2_tx_queue { ... struct qed_ll2_tx_packet cur_completing_packet; }; struct qed_ll2_info { ... struct qed_ll2_tx_queue tx_queue; struct qed_ll2_cbs cbs; }; The problem is that member `cbs` in `struct qed_ll2_info` is placed just after an object of type `struct qed_ll2_tx_queue`, which is in itself an implicit flexible structure, which by definition ends in a flexible array member, in this case `bds_set`. This causes an undefined behavior bug at run-time when dynamic memory is allocated for `bds_set`, which could lead to a serious issue if `cbs` in `struct qed_ll2_info` is overwritten by the contents of `bds_set`. Notice that the type of `cbs` is a structure full of function pointers (and a cookie :) ): include/linux/qed/qed_ll2_if.h: 107 typedef 108 void (*qed_ll2_complete_rx_packet_cb)(void *cxt, 109 struct qed_ll2_comp_rx_data *data); 110 111 typedef 112 void (*qed_ll2_release_rx_packet_cb)(void *cxt, 113 u8 connection_handle, 114 void *cookie, 115 dma_addr_t rx_buf_addr, 116 bool b_last_packet); 117 118 typedef 119 void (*qed_ll2_complete_tx_packet_cb)(void *cxt, 120 u8 connection_handle, 121 void *cookie, 122 dma_addr_t first_frag_addr, 123 bool b_last_fragment, 124 bool b_last_packet); 125 126 typedef 127 void (*qed_ll2_release_tx_packet_cb)(void *cxt, 128 u8 connection_handle, 129 void *cookie, 130 dma_addr_t first_frag_addr, 131 bool b_last_fragment, bool b_last_packet); 132 133 typedef 134 void (*qed_ll2_slowpath_cb)(void *cxt, u8 connection_handle, 135 u32 opaque_data_0, u32 opaque_data_1); 136 137 struct qed_ll2_cbs { 138 qed_ll2_complete_rx_packet_cb rx_comp_cb; 139 qed_ll2_release_rx_packet_cb rx_release_cb; 140 qed_ll2_complete_tx_packet_cb tx_comp_cb; 141 qed_ll2_release_tx_packet_cb tx_release_cb; 142 qed_ll2_slowpath_cb slowpath_cb; 143 void *cookie; 144 }; Fix this by moving the declaration of `cbs` to the middle of its containing structure `qed_ll2_info`, preventing it from being overwritten by the contents of `bds_set` at run-time. This bug was introduced in 2017, when `bds_set` was converted to a one-element array, and started to be used as a Variable Length Object (VLO) at run-time. Fixes: f5823fe6897c ("qed: Add ll2 option to limit the number of bds per packet") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ZQ+Nz8DfPg56pIzr@work Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_ll2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index f65817012e97..785899d3511c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -122,9 +122,9 @@ struct qed_ll2_info { enum core_tx_dest tx_dest; u8 tx_stats_en; bool main_func_queue; + struct qed_ll2_cbs cbs; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; - struct qed_ll2_cbs cbs; }; /** From 284b51858f295815c98b3e8033d11b14853c35ce Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 24 Aug 2023 21:06:51 -0600 Subject: [PATCH 68/94] wifi: mwifiex: Fix tlv_buf_left calculation commit eec679e4ac5f47507774956fb3479c206e761af7 upstream. In a TLV encoding scheme, the Length part represents the length after the header containing the values for type and length. In this case, `tlv_len` should be: tlv_len == (sizeof(*tlv_rxba) - 1) - sizeof(tlv_rxba->header) + tlv_bitmap_len Notice that the `- 1` accounts for the one-element array `bitmap`, which 1-byte size is already included in `sizeof(*tlv_rxba)`. So, if the above is correct, there is a double-counting of some members in `struct mwifiex_ie_types_rxba_sync`, when `tlv_buf_left` and `tmp` are calculated: 968 tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); 969 tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); in specific, members: drivers/net/wireless/marvell/mwifiex/fw.h:777 777 u8 mac[ETH_ALEN]; 778 u8 tid; 779 u8 reserved; 780 __le16 seq_num; 781 __le16 bitmap_len; This is clearly wrong, and affects the subsequent decoding of data in `event_buf` through `tlv_rxba`: 970 tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; Fix this by using `sizeof(tlv_rxba->header)` instead of `sizeof(*tlv_rxba)` in the calculation of `tlv_buf_left` and `tmp`. This results in the following binary differences before/after changes: | drivers/net/wireless/marvell/mwifiex/11n_rxreorder.o | @@ -4698,11 +4698,11 @@ | drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c:968 | tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len); | - 1da7: lea -0x11(%rbx),%edx | + 1da7: lea -0x4(%rbx),%edx | 1daa: movzwl %bp,%eax | drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c:969 | tmp = (u8 *)tlv_rxba + sizeof(tlv_rxba->header) + tlv_len; | - 1dad: lea 0x11(%r15,%rbp,1),%r15 | + 1dad: lea 0x4(%r15,%rbp,1),%r15 The above reflects the desired change: avoid counting 13 too many bytes; which is the total size of the double-counted members in `struct mwifiex_ie_types_rxba_sync`: $ pahole -C mwifiex_ie_types_rxba_sync drivers/net/wireless/marvell/mwifiex/11n_rxreorder.o struct mwifiex_ie_types_rxba_sync { struct mwifiex_ie_types_header header; /* 0 4 */ |----------------------------------------------------------------------- | u8 mac[6]; /* 4 6 */ | | u8 tid; /* 10 1 */ | | u8 reserved; /* 11 1 */ | | __le16 seq_num; /* 12 2 */ | | __le16 bitmap_len; /* 14 2 */ | | u8 bitmap[1]; /* 16 1 */ | |----------------------------------------------------------------------| | 13 bytes| ----------- /* size: 17, cachelines: 1, members: 7 */ /* last cacheline: 17 bytes */ } __attribute__((__packed__)); Fixes: 99ffe72cdae4 ("mwifiex: process rxba_sync event") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/06668edd68e7a26bbfeebd1201ae077a2a7a8bce.1692931954.git.gustavoars@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 5380fba652cc..1aa0bcdab8ef 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -986,8 +986,8 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, } } - tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); - tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); + tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len); + tmp = (u8 *)tlv_rxba + sizeof(tlv_rxba->header) + tlv_len; tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; } } From 8ea00e1ba5aa2feccf6933b67828e98ace32112b Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Thu, 21 Sep 2023 18:46:40 -0500 Subject: [PATCH 69/94] net: replace calls to sock->ops->connect() with kernel_connect() commit 26297b4ce1ce4ea40bc9a48ec99f45da3f64d2e2 upstream. commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect") ensured that kernel_connect() will not overwrite the address parameter in cases where BPF connect hooks perform an address rewrite. This change replaces direct calls to sock->ops->connect() in net with kernel_connect() to make these call safe. Link: https://lore.kernel.org/netdev/20230912013332.2048422-1-jrife@google.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Signed-off-by: Jordan Rife Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/rds/tcp_connect.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f6af13c16cf5..e0a4f59a68a2 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1510,8 +1510,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id, } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); - result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - salen, 0); + result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 008f50fb25dd..23d6d2612708 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -169,7 +169,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) * own the socket */ rds_tcp_set_callbacks(sock, cp); - ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK); + ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK); rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) From a0d71e9e61da8a85a46774c67549739e28fda795 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sun, 23 Apr 2023 19:10:41 +0800 Subject: [PATCH 70/94] ubi: Refuse attaching if mtd's erasesize is 0 [ Upstream commit 017c73a34a661a861712f7cc1393a123e5b2208c ] There exists mtd devices with zero erasesize, which will trigger a divide-by-zero exception while attaching ubi device. Fix it by refusing attaching if mtd's erasesize is 0. Fixes: 801c135ce73d ("UBI: Unsorted Block Images") Reported-by: Yu Hao Link: https://lore.kernel.org/lkml/977347543.226888.1682011999468.JavaMail.zimbra@nod.at/T/ Signed-off-by: Zhihao Cheng Reviewed-by: Miquel Raynal Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- drivers/mtd/ubi/build.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 3eb14c68cb9b..3e7e5b51eafd 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -878,6 +878,13 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, return -EINVAL; } + /* UBI cannot work on flashes with zero erasesize. */ + if (!mtd->erasesize) { + pr_err("ubi: refuse attaching mtd%d - zero erasesize flash is not supported\n", + mtd->index); + return -EINVAL; + } + if (ubi_num == UBI_DEV_NUM_AUTO) { /* Search for an empty slot in the @ubi_devices array */ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) From 16cc18b9080892d1a0200a38e36ae52e464bc555 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 8 Sep 2023 18:41:12 +0800 Subject: [PATCH 71/94] wifi: mwifiex: Fix oob check condition in mwifiex_process_rx_packet [ Upstream commit aef7a0300047e7b4707ea0411dc9597cba108fc8 ] Only skip the code path trying to access the rfc1042 headers when the buffer is too small, so the driver can still process packets without rfc1042 headers. Fixes: 119585281617 ("wifi: mwifiex: Fix OOB and integer underflow when rx packets") Signed-off-by: Pin-yen Lin Acked-by: Brian Norris Reviewed-by: Matthew Wang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230908104308.1546501-1-treapking@chromium.org Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/sta_rx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c b/drivers/net/wireless/marvell/mwifiex/sta_rx.c index f3c6daeba1b8..346e91b9f2ad 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c @@ -98,7 +98,8 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv, rx_pkt_len = le16_to_cpu(local_rx_pd->rx_pkt_length); rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_off; - if (sizeof(*rx_pkt_hdr) + rx_pkt_off > skb->len) { + if (sizeof(rx_pkt_hdr->eth803_hdr) + sizeof(rfc1042_header) + + rx_pkt_off > skb->len) { mwifiex_dbg(priv->adapter, ERROR, "wrong rx packet offset: len=%d, rx_pkt_off=%d\n", skb->len, rx_pkt_off); @@ -107,12 +108,13 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv, return -1; } - if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header, - sizeof(bridge_tunnel_header))) || - (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header, - sizeof(rfc1042_header)) && - ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP && - ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX)) { + if (sizeof(*rx_pkt_hdr) + rx_pkt_off <= skb->len && + ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header, + sizeof(bridge_tunnel_header))) || + (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header, + sizeof(rfc1042_header)) && + ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP && + ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX))) { /* * Replace the 803 header and rfc1042 header (llc/snap) with an * EthernetII header, keep the src/dst and snap_type From ded67599d61cb9c7986c50a46a6f1043da56f57d Mon Sep 17 00:00:00 2001 From: Alexandra Diupina Date: Tue, 19 Sep 2023 17:25:02 +0300 Subject: [PATCH 72/94] drivers/net: process the result of hdlc_open() and add call of hdlc_close() in uhdlc_close() [ Upstream commit a59addacf899b1b21a7b7449a1c52c98704c2472 ] Process the result of hdlc_open() and call uhdlc_close() in case of an error. It is necessary to pass the error code up the control flow, similar to a possible error in request_irq(). Also add a hdlc_close() call to the uhdlc_close() because the comment to hdlc_close() says it must be called by the hardware driver when the HDLC device is being closed Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: Alexandra Diupina Reviewed-by: Christophe Leroy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/fsl_ucc_hdlc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 5df6e85e7ccb..c8cff000a931 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -37,6 +37,8 @@ #define TDM_PPPOHT_SLIC_MAXIN +static int uhdlc_close(struct net_device *dev); + static struct ucc_tdm_info utdm_primary_info = { .uf_info = { .tsa = 0, @@ -661,6 +663,7 @@ static int uhdlc_open(struct net_device *dev) hdlc_device *hdlc = dev_to_hdlc(dev); struct ucc_hdlc_private *priv = hdlc->priv; struct ucc_tdm *utdm = priv->utdm; + int rc = 0; if (priv->hdlc_busy != 1) { if (request_irq(priv->ut_info->uf_info.irq, @@ -683,10 +686,13 @@ static int uhdlc_open(struct net_device *dev) netif_device_attach(priv->ndev); napi_enable(&priv->napi); netif_start_queue(dev); - hdlc_open(dev); + + rc = hdlc_open(dev); + if (rc) + uhdlc_close(dev); } - return 0; + return rc; } static void uhdlc_memclean(struct ucc_hdlc_private *priv) @@ -775,6 +781,8 @@ static int uhdlc_close(struct net_device *dev) netif_stop_queue(dev); priv->hdlc_busy = 0; + hdlc_close(dev); + return 0; } From 1bddd95376c4bdb84fa5c53f641799925a537014 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 22 Sep 2023 16:37:11 +0100 Subject: [PATCH 73/94] regmap: rbtree: Fix wrong register marked as in-cache when creating new node [ Upstream commit 7a795ac8d49e2433e1b97caf5e99129daf8e1b08 ] When regcache_rbtree_write() creates a new rbtree_node it was passing the wrong bit number to regcache_rbtree_set_register(). The bit number is the offset __in number of registers__, but in the case of creating a new block regcache_rbtree_write() was not dividing by the address stride to get the number of registers. Fix this by dividing by map->reg_stride. Compare with regcache_rbtree_read() where the bit is checked. This bug meant that the wrong register was marked as present. The register that was written to the cache could not be read from the cache because it was not marked as cached. But a nearby register could be marked as having a cached value even if it was never written to the cache. Signed-off-by: Richard Fitzgerald Fixes: 3f4ff561bc88 ("regmap: rbtree: Make cache_present bitmap per node") Link: https://lore.kernel.org/r/20230922153711.28103-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/base/regmap/regcache-rbtree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 7353c5527087..b6f8f4059e25 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -467,7 +467,8 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg, if (!rbnode) return -ENOMEM; regcache_rbtree_set_register(map, rbnode, - reg - rbnode->base_reg, value); + (reg - rbnode->base_reg) / map->reg_stride, + value); regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode); rbtree_ctx->cached_rbnode = rbnode; } From e5ccce2c1d54c9303eb923554bb8412de5f8100e Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 18 Sep 2023 15:58:48 -0700 Subject: [PATCH 74/94] scsi: target: core: Fix deadlock due to recursive locking [ Upstream commit a154f5f643c6ecddd44847217a7a3845b4350003 ] The following call trace shows a deadlock issue due to recursive locking of mutex "device_mutex". First lock acquire is in target_for_each_device() and second in target_free_device(). PID: 148266 TASK: ffff8be21ffb5d00 CPU: 10 COMMAND: "iscsi_ttx" #0 [ffffa2bfc9ec3b18] __schedule at ffffffffa8060e7f #1 [ffffa2bfc9ec3ba0] schedule at ffffffffa8061224 #2 [ffffa2bfc9ec3bb8] schedule_preempt_disabled at ffffffffa80615ee #3 [ffffa2bfc9ec3bc8] __mutex_lock at ffffffffa8062fd7 #4 [ffffa2bfc9ec3c40] __mutex_lock_slowpath at ffffffffa80631d3 #5 [ffffa2bfc9ec3c50] mutex_lock at ffffffffa806320c #6 [ffffa2bfc9ec3c68] target_free_device at ffffffffc0935998 [target_core_mod] #7 [ffffa2bfc9ec3c90] target_core_dev_release at ffffffffc092f975 [target_core_mod] #8 [ffffa2bfc9ec3ca0] config_item_put at ffffffffa79d250f #9 [ffffa2bfc9ec3cd0] config_item_put at ffffffffa79d2583 #10 [ffffa2bfc9ec3ce0] target_devices_idr_iter at ffffffffc0933f3a [target_core_mod] #11 [ffffa2bfc9ec3d00] idr_for_each at ffffffffa803f6fc #12 [ffffa2bfc9ec3d60] target_for_each_device at ffffffffc0935670 [target_core_mod] #13 [ffffa2bfc9ec3d98] transport_deregister_session at ffffffffc0946408 [target_core_mod] #14 [ffffa2bfc9ec3dc8] iscsit_close_session at ffffffffc09a44a6 [iscsi_target_mod] #15 [ffffa2bfc9ec3df0] iscsit_close_connection at ffffffffc09a4a88 [iscsi_target_mod] #16 [ffffa2bfc9ec3df8] finish_task_switch at ffffffffa76e5d07 #17 [ffffa2bfc9ec3e78] iscsit_take_action_for_connection_exit at ffffffffc0991c23 [iscsi_target_mod] #18 [ffffa2bfc9ec3ea0] iscsi_target_tx_thread at ffffffffc09a403b [iscsi_target_mod] #19 [ffffa2bfc9ec3f08] kthread at ffffffffa76d8080 #20 [ffffa2bfc9ec3f50] ret_from_fork at ffffffffa8200364 Fixes: 36d4cb460bcb ("scsi: target: Avoid that EXTENDED COPY commands trigger lock inversion") Signed-off-by: Junxiao Bi Link: https://lore.kernel.org/r/20230918225848.66463-1-junxiao.bi@oracle.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 1b381519c164..a23dcbe79e14 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -881,7 +881,6 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) EXPORT_SYMBOL(target_to_linux_sector); struct devices_idr_iter { - struct config_item *prev_item; int (*fn)(struct se_device *dev, void *data); void *data; }; @@ -891,11 +890,9 @@ static int target_devices_idr_iter(int id, void *p, void *data) { struct devices_idr_iter *iter = data; struct se_device *dev = p; + struct config_item *item; int ret; - config_item_put(iter->prev_item); - iter->prev_item = NULL; - /* * We add the device early to the idr, so it can be used * by backend modules during configuration. We do not want @@ -905,12 +902,13 @@ static int target_devices_idr_iter(int id, void *p, void *data) if (!target_dev_configured(dev)) return 0; - iter->prev_item = config_item_get_unless_zero(&dev->dev_group.cg_item); - if (!iter->prev_item) + item = config_item_get_unless_zero(&dev->dev_group.cg_item); + if (!item) return 0; mutex_unlock(&device_mutex); ret = iter->fn(dev, iter->data); + config_item_put(item); mutex_lock(&device_mutex); return ret; @@ -933,7 +931,6 @@ int target_for_each_device(int (*fn)(struct se_device *dev, void *data), mutex_lock(&device_mutex); ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter); mutex_unlock(&device_mutex); - config_item_put(iter.prev_item); return ret; } From 272ba9ebfb35b27c6bf31304e68b51fcc3bf5a95 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 28 Sep 2023 17:28:07 -0300 Subject: [PATCH 75/94] modpost: add missing else to the "of" check [ Upstream commit cbc3d00cf88fda95dbcafee3b38655b7a8f2650a ] Without this 'else' statement, an "usb" name goes into two handlers: the first/previous 'if' statement _AND_ the for-loop over 'devtable', but the latter is useless as it has no 'usb' device_id entry anyway. Tested with allmodconfig before/after patch; no changes to *.mod.c: git checkout v6.6-rc3 make -j$(nproc) allmodconfig make -j$(nproc) olddefconfig make -j$(nproc) find . -name '*.mod.c' | cpio -pd /tmp/before # apply patch make -j$(nproc) find . -name '*.mod.c' | cpio -pd /tmp/after diff -r /tmp/before/ /tmp/after/ # no difference Fixes: acbef7b76629 ("modpost: fix module autoloading for OF devices with generic compatible property") Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/file2alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 7f40b6aab689..90868df7865e 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1395,7 +1395,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, /* First handle the "special" cases */ if (sym_is(name, namelen, "usb")) do_usb_table(symval, sym->st_size, mod); - if (sym_is(name, namelen, "of")) + else if (sym_is(name, namelen, "of")) do_of_table(symval, sym->st_size, mod); else if (sym_is(name, namelen, "pnp")) do_pnp_device_entry(symval, sym->st_size, mod); From 559d697c5d072593d22b3e0bd8b8081108aeaf59 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Sep 2023 11:41:19 +0100 Subject: [PATCH 76/94] ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data() [ Upstream commit 9d4c75800f61e5d75c1659ba201b6c0c7ead3070 ] Including the transhdrlen in length is a problem when the packet is partially filled (e.g. something like send(MSG_MORE) happened previously) when appending to an IPv4 or IPv6 packet as we don't want to repeat the transport header or account for it twice. This can happen under some circumstances, such as splicing into an L2TP socket. The symptom observed is a warning in __ip6_append_data(): WARNING: CPU: 1 PID: 5042 at net/ipv6/ip6_output.c:1800 __ip6_append_data.isra.0+0x1be8/0x47f0 net/ipv6/ip6_output.c:1800 that occurs when MSG_SPLICE_PAGES is used to append more data to an already partially occupied skbuff. The warning occurs when 'copy' is larger than the amount of data in the message iterator. This is because the requested length includes the transport header length when it shouldn't. This can be triggered by, for example: sfd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP); bind(sfd, ...); // ::1 connect(sfd, ...); // ::1 port 7 send(sfd, buffer, 4100, MSG_MORE); sendfile(sfd, dfd, NULL, 1024); Fix this by only adding transhdrlen into the length if the write queue is empty in l2tp_ip6_sendmsg(), analogously to how UDP does things. l2tp_ip_sendmsg() looks like it won't suffer from this problem as it builds the UDP packet itself. Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Reported-by: syzbot+62cbf263225ae13ff153@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/0000000000001c12b30605378ce8@google.com/ Suggested-by: Willem de Bruijn Signed-off-by: David Howells cc: Eric Dumazet cc: Willem de Bruijn cc: "David S. Miller" cc: David Ahern cc: Paolo Abeni cc: Jakub Kicinski cc: netdev@vger.kernel.org cc: bpf@vger.kernel.org cc: syzkaller-bugs@googlegroups.com Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f3f0b4b7c386..7342344d99a9 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -525,7 +525,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ if (len > INT_MAX - transhdrlen) return -EMSGSIZE; - ulen = len + transhdrlen; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) @@ -649,6 +648,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); + ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, From 2a36d9e2995c8c3c3f179aab1215a69cff06cbed Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 24 Sep 2023 02:35:49 +0900 Subject: [PATCH 77/94] net: usb: smsc75xx: Fix uninit-value access in __smsc75xx_read_reg [ Upstream commit e9c65989920f7c28775ec4e0c11b483910fb67b8 ] syzbot reported the following uninit-value access issue: ===================================================== BUG: KMSAN: uninit-value in smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:975 [inline] BUG: KMSAN: uninit-value in smsc75xx_bind+0x5c9/0x11e0 drivers/net/usb/smsc75xx.c:1482 CPU: 0 PID: 8696 Comm: kworker/0:3 Not tainted 5.8.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:975 [inline] smsc75xx_bind+0x5c9/0x11e0 drivers/net/usb/smsc75xx.c:1482 usbnet_probe+0x1152/0x3f90 drivers/net/usb/usbnet.c:1737 usb_probe_interface+0xece/0x1550 drivers/usb/core/driver.c:374 really_probe+0xf20/0x20b0 drivers/base/dd.c:529 driver_probe_device+0x293/0x390 drivers/base/dd.c:701 __device_attach_driver+0x63f/0x830 drivers/base/dd.c:807 bus_for_each_drv+0x2ca/0x3f0 drivers/base/bus.c:431 __device_attach+0x4e2/0x7f0 drivers/base/dd.c:873 device_initial_probe+0x4a/0x60 drivers/base/dd.c:920 bus_probe_device+0x177/0x3d0 drivers/base/bus.c:491 device_add+0x3b0e/0x40d0 drivers/base/core.c:2680 usb_set_configuration+0x380f/0x3f10 drivers/usb/core/message.c:2032 usb_generic_driver_probe+0x138/0x300 drivers/usb/core/generic.c:241 usb_probe_device+0x311/0x490 drivers/usb/core/driver.c:272 really_probe+0xf20/0x20b0 drivers/base/dd.c:529 driver_probe_device+0x293/0x390 drivers/base/dd.c:701 __device_attach_driver+0x63f/0x830 drivers/base/dd.c:807 bus_for_each_drv+0x2ca/0x3f0 drivers/base/bus.c:431 __device_attach+0x4e2/0x7f0 drivers/base/dd.c:873 device_initial_probe+0x4a/0x60 drivers/base/dd.c:920 bus_probe_device+0x177/0x3d0 drivers/base/bus.c:491 device_add+0x3b0e/0x40d0 drivers/base/core.c:2680 usb_new_device+0x1bd4/0x2a30 drivers/usb/core/hub.c:2554 hub_port_connect drivers/usb/core/hub.c:5208 [inline] hub_port_connect_change drivers/usb/core/hub.c:5348 [inline] port_event drivers/usb/core/hub.c:5494 [inline] hub_event+0x5e7b/0x8a70 drivers/usb/core/hub.c:5576 process_one_work+0x1688/0x2140 kernel/workqueue.c:2269 worker_thread+0x10bc/0x2730 kernel/workqueue.c:2415 kthread+0x551/0x590 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Local variable ----buf.i87@smsc75xx_bind created at: __smsc75xx_read_reg drivers/net/usb/smsc75xx.c:83 [inline] smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:968 [inline] smsc75xx_bind+0x485/0x11e0 drivers/net/usb/smsc75xx.c:1482 __smsc75xx_read_reg drivers/net/usb/smsc75xx.c:83 [inline] smsc75xx_wait_ready drivers/net/usb/smsc75xx.c:968 [inline] smsc75xx_bind+0x485/0x11e0 drivers/net/usb/smsc75xx.c:1482 This issue is caused because usbnet_read_cmd() reads less bytes than requested (zero byte in the reproducer). In this case, 'buf' is not properly filled. This patch fixes the issue by returning -ENODATA if usbnet_read_cmd() reads less bytes than requested. Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver") Reported-and-tested-by: syzbot+6966546b78d050bb0b5d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6966546b78d050bb0b5d Signed-off-by: Shigeru Yoshida Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230923173549.3284502-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/smsc75xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 313a4b0edc6b..573d7ad2e708 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -102,7 +102,9 @@ static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); - if (unlikely(ret < 0)) { + if (unlikely(ret < 4)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); return ret; From 770cc2ea5a1dff5f211e120bd598a0daa37c5f53 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Wed, 27 Sep 2023 13:57:49 -0400 Subject: [PATCH 78/94] net: stmmac: dwmac-stm32: fix resume on STM32 MCU [ Upstream commit 6f195d6b0da3b689922ba9e302af2f49592fa9fc ] The STM32MP1 keeps clk_rx enabled during suspend, and therefore the driver does not enable the clock in stm32_dwmac_init() if the device was suspended. The problem is that this same code runs on STM32 MCUs, which do disable clk_rx during suspend, causing the clock to never be re-enabled on resume. This patch adds a variant flag to indicate that clk_rx remains enabled during suspend, and uses this to decide whether to enable the clock in stm32_dwmac_init() if the device was suspended. This approach fixes this specific bug with limited opportunity for unintended side-effects, but I have a follow up patch that will refactor the clock configuration and hopefully make it less error prone. Fixes: 6528e02cc9ff ("net: ethernet: stmmac: add adaptation for stm32mp157c.") Signed-off-by: Ben Wolsieffer Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20230927175749.1419774-1-ben.wolsieffer@hefring.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 7e2e79dedebf..df7fc6b675a5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -57,6 +57,7 @@ struct stm32_ops { int (*parse_data)(struct stm32_dwmac *dwmac, struct device *dev); u32 syscfg_eth_mask; + bool clk_rx_enable_in_suspend; }; static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) @@ -74,7 +75,8 @@ static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) if (ret) return ret; - if (!dwmac->dev->power.is_suspended) { + if (!dwmac->ops->clk_rx_enable_in_suspend || + !dwmac->dev->power.is_suspended) { ret = clk_prepare_enable(dwmac->clk_rx); if (ret) { clk_disable_unprepare(dwmac->clk_tx); @@ -413,7 +415,8 @@ static struct stm32_ops stm32mp1_dwmac_data = { .suspend = stm32mp1_suspend, .resume = stm32mp1_resume, .parse_data = stm32mp1_parse_data, - .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK + .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK, + .clk_rx_enable_in_suspend = true }; static const struct of_device_id stm32_dwmac_match[] = { From b86bfa833405ee615a5758cbe890ceb955dd7235 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 1 Oct 2023 11:12:38 -0400 Subject: [PATCH 79/94] tcp: fix quick-ack counting to count actual ACKs of new data [ Upstream commit 059217c18be6757b95bfd77ba53fb50b48b8a816 ] This commit fixes quick-ack counting so that it only considers that a quick-ack has been provided if we are sending an ACK that newly acknowledges data. The code was erroneously using the number of data segments in outgoing skbs when deciding how many quick-ack credits to remove. This logic does not make sense, and could cause poor performance in request-response workloads, like RPC traffic, where requests or responses can be multi-segment skbs. When a TCP connection decides to send N quick-acks, that is to accelerate the cwnd growth of the congestion control module controlling the remote endpoint of the TCP connection. That quick-ack decision is purely about the incoming data and outgoing ACKs. It has nothing to do with the outgoing data or the size of outgoing data. And in particular, an ACK only serves the intended purpose of allowing the remote congestion control to grow the congestion window quickly if the ACK is ACKing or SACKing new data. The fix is simple: only count packets as serving the goal of the quickack mechanism if they are ACKing/SACKing new data. We can tell whether this is the case by checking inet_csk_ack_scheduled(), since we schedule an ACK exactly when we are ACKing/SACKing new data. Fixes: fc6415bcb0f5 ("[TCP]: Fix quick-ack decrementing with TSO.") Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231001151239.1866845-1-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 6 ++++-- net/ipv4/tcp_output.c | 7 +++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9c43299ff870..427553adf82c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,12 +346,14 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -static inline void tcp_dec_quickack_mode(struct sock *sk, - const unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { + /* How many ACKs S/ACKing new data have we sent? */ + const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; + if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9299de0da351..dcca9554071d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -164,8 +164,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, - u32 rcv_nxt) +static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt) { struct tcp_sock *tp = tcp_sk(sk); @@ -179,7 +178,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, if (unlikely(rcv_nxt != tp->rcv_nxt)) return; /* Special ACK sent by DCTCP to reflect ECN */ - tcp_dec_quickack_mode(sk, pkts); + tcp_dec_quickack_mode(sk); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1139,7 +1138,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); + tcp_event_ack_sent(sk, rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); From 9041b39020c2672f1ee7ca539aba09fdfd69e364 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 1 Oct 2023 11:12:39 -0400 Subject: [PATCH 80/94] tcp: fix delayed ACKs for MSS boundary condition [ Upstream commit 4720852ed9afb1c5ab84e96135cb5b73d5afde6f ] This commit fixes poor delayed ACK behavior that can cause poor TCP latency in a particular boundary condition: when an application makes a TCP socket write that is an exact multiple of the MSS size. The problem is that there is painful boundary discontinuity in the current delayed ACK behavior. With the current delayed ACK behavior, we have: (1) If an app reads data when > 1*MSS is unacknowledged, then tcp_cleanup_rbuf() ACKs immediately because of: tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || (2) If an app reads all received data, and the packets were < 1*MSS, and either (a) the app is not ping-pong or (b) we received two packets < 1*MSS, then tcp_cleanup_rbuf() ACKs immediately beecause of: ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !inet_csk_in_pingpong_mode(sk))) && (3) *However*: if an app reads exactly 1*MSS of data, tcp_cleanup_rbuf() does not send an immediate ACK. This is true even if the app is not ping-pong and the 1*MSS of data had the PSH bit set, suggesting the sending application completed an application write. Thus if the app is not ping-pong, we have this painful case where >1*MSS gets an immediate ACK, and <1*MSS gets an immediate ACK, but a write whose last skb is an exact multiple of 1*MSS can get a 40ms delayed ACK. This means that any app that transfers data in one direction and takes care to align write size or packet size with MSS can suffer this problem. With receive zero copy making 4KB MSS values more common, it is becoming more common to have application writes naturally align with MSS, and more applications are likely to encounter this delayed ACK problem. The fix in this commit is to refine the delayed ACK heuristics with a simple check: immediately ACK a received 1*MSS skb with PSH bit set if the app reads all data. Why? If an skb has a len of exactly 1*MSS and has the PSH bit set then it is likely the end of an application write. So more data may not be arriving soon, and yet the data sender may be waiting for an ACK if cwnd-bound or using TX zero copy. Thus we set ICSK_ACK_PUSHED in this case so that tcp_cleanup_rbuf() will send an ACK immediately if the app reads all of the data and is not ping-pong. Note that this logic is also executed for the case where len > MSS, but in that case this logic does not matter (and does not hurt) because tcp_cleanup_rbuf() will always ACK immediately if the app reads data and there is more than an MSS of unACKed data. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Cc: Xin Guo Link: https://lore.kernel.org/r/20231001151239.1866845-2-ncardwell.sw@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9e1ec69fe5b4..0052a6194cc1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -172,6 +172,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (unlikely(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE)) tcp_gro_dev_warn(sk, skb, len); + /* If the skb has a len of exactly 1*MSS and has the PSH bit + * set then it is likely the end of an application write. So + * more data may not be arriving soon, and yet the data sender + * may be waiting for an ACK if cwnd-bound or using TX zero + * copy. So we set ICSK_ACK_PUSHED here so that + * tcp_cleanup_rbuf() will send an ACK immediately if the app + * reads all of the data and is not ping-pong. If len > MSS + * then this logic does not matter (and does not hurt) because + * tcp_cleanup_rbuf() will always ACK immediately if the app + * reads data and there is more than an MSS of unACKed data. + */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. From 698c4642055a0266029d28ee157f2b599051c6f6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2023 10:58:45 -0400 Subject: [PATCH 81/94] sctp: update transport state when processing a dupcook packet [ Upstream commit 2222a78075f0c19ca18db53fd6623afb4aff602d ] During the 4-way handshake, the transport's state is set to ACTIVE in sctp_process_init() when processing INIT_ACK chunk on client or COOKIE_ECHO chunk on server. In the collision scenario below: 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] 192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] when processing COOKIE_ECHO on 192.168.1.2, as it's in COOKIE_WAIT state, sctp_sf_do_dupcook_b() is called by sctp_sf_do_5_2_4_dupcook() where it creates a new association and sets its transport to ACTIVE then updates to the old association in sctp_assoc_update(). However, in sctp_assoc_update(), it will skip the transport update if it finds a transport with the same ipaddr already existing in the old asoc, and this causes the old asoc's transport state not to move to ACTIVE after the handshake. This means if DATA retransmission happens at this moment, it won't be able to enter PF state because of the check 'transport->state == SCTP_ACTIVE' in sctp_do_8_2_transport_strike(). This patch fixes it by updating the transport in sctp_assoc_update() with sctp_assoc_add_peer() where it updates the transport state if there is already a transport with the same ipaddr exists in the old asoc. Signed-off-by: Xin Long Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/fd17356abe49713ded425250cc1ae51e9f5846c6.1696172325.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/associola.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d17708800652..78c1429d1301 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1181,8 +1181,7 @@ int sctp_assoc_update(struct sctp_association *asoc, /* Add any peer addresses from the new association. */ list_for_each_entry(trans, &new->peer.transport_addr_list, transports) - if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) && - !sctp_assoc_add_peer(asoc, &trans->ipaddr, + if (!sctp_assoc_add_peer(asoc, &trans->ipaddr, GFP_ATOMIC, trans->state)) return -ENOMEM; From 84b62072e3062ce60d2268383ad9ede92a2d03bc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2023 11:04:20 -0400 Subject: [PATCH 82/94] sctp: update hb timer immediately after users change hb_interval [ Upstream commit 1f4e803cd9c9166eb8b6c8b0b8e4124f7499fc07 ] Currently, when hb_interval is changed by users, it won't take effect until the next expiry of hb timer. As the default value is 30s, users have to wait up to 30s to wait its hb_interval update to work. This becomes pretty bad in containers where a much smaller value is usually set on hb_interval. This patch improves it by resetting the hb timer immediately once the value of hb_interval is updated by users. Note that we don't address the already existing 'problem' when sending a heartbeat 'on demand' if one hb has just been sent(from the timer) mentioned in: https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg590224.html Signed-off-by: Xin Long Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/75465785f8ee5df2fb3acdca9b8fafdc18984098.1696172660.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 432dccd37506..f954d3c8876d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2578,6 +2578,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if (trans) { trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + sctp_transport_reset_hb_timer(trans); } else if (asoc) { asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); From 5500293194dfacb4f46a88ba909ae2eb5b3e7b7f Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Mon, 4 Jan 2021 15:57:18 -0800 Subject: [PATCH 83/94] cpupower: add Makefile dependencies for install targets commit fb7791e213a64495ec2336869b868fcd8af14346 upstream. This allows building cpupower in parallel rather than serially. Signed-off-by: Ivan Babrou Signed-off-by: Shuah Khan Cc: Hauke Mehrtens Signed-off-by: Greg Kroah-Hartman --- tools/power/cpupower/Makefile | 8 ++++---- tools/power/cpupower/bench/Makefile | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 1dd5f4fcffd5..392906c8d3a6 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -278,14 +278,14 @@ clean: $(MAKE) -C bench O=$(OUTPUT) clean -install-lib: +install-lib: libcpupower $(INSTALL) -d $(DESTDIR)${libdir} $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/ $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h -install-tools: +install-tools: $(OUTPUT)cpupower $(INSTALL) -d $(DESTDIR)${bindir} $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir} @@ -299,14 +299,14 @@ install-man: $(INSTALL_DATA) -D man/cpupower-info.1 $(DESTDIR)${mandir}/man1/cpupower-info.1 $(INSTALL_DATA) -D man/cpupower-monitor.1 $(DESTDIR)${mandir}/man1/cpupower-monitor.1 -install-gmo: +install-gmo: create-gmo $(INSTALL) -d $(DESTDIR)${localedir} for HLANG in $(LANGUAGES); do \ echo '$(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \ $(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; -install-bench: +install-bench: compile-bench @#DESTDIR must be set from outside to survive @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index f68b4bc55273..d9d9923af85c 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -27,7 +27,7 @@ $(OUTPUT)cpufreq-bench: $(OBJS) all: $(OUTPUT)cpufreq-bench -install: +install: $(OUTPUT)cpufreq-bench mkdir -p $(DESTDIR)/$(sbindir) mkdir -p $(DESTDIR)/$(bindir) mkdir -p $(DESTDIR)/$(docdir) From 7ddb0a57f66928958a82d1e8cc4558da8a1bf1ee Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2023 07:55:56 +0200 Subject: [PATCH 84/94] IB/mlx4: Fix the size of a buffer in add_port_entries() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d7f393430a17c2bfcdf805462a5aa80be4285b27 upstream. In order to be sure that 'buff' is never truncated, its size should be 12, not 11. When building with W=1, this fixes the following warnings: drivers/infiniband/hw/mlx4/sysfs.c: In function ‘add_port_entries’: drivers/infiniband/hw/mlx4/sysfs.c:268:34: error: ‘sprintf’ may write a terminating nul past the end of the destination [-Werror=format-overflow=] 268 | sprintf(buff, "%d", i); | ^ drivers/infiniband/hw/mlx4/sysfs.c:268:17: note: ‘sprintf’ output between 2 and 12 bytes into a destination of size 11 268 | sprintf(buff, "%d", i); | ^~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/mlx4/sysfs.c:286:34: error: ‘sprintf’ may write a terminating nul past the end of the destination [-Werror=format-overflow=] 286 | sprintf(buff, "%d", i); | ^ drivers/infiniband/hw/mlx4/sysfs.c:286:17: note: ‘sprintf’ output between 2 and 12 bytes into a destination of size 11 286 | sprintf(buff, "%d", i); | ^~~~~~~~~~~~~~~~~~~~~~ Fixes: c1e7e466120b ("IB/mlx4: Add iov directory in sysfs under the ib device") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/0bb1443eb47308bc9be30232cc23004c4d4cf43e.1695448530.git.christophe.jaillet@wanadoo.fr Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index d2da28d613f2..eb95292d12dc 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -221,7 +221,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, static int add_port_entries(struct mlx4_ib_dev *device, int port_num) { int i; - char buff[11]; + char buff[12]; struct mlx4_ib_iov_port *port = NULL; int ret = 0 ; struct ib_port_attr attr; From 007282b8b1e016c818126ae2dffe804359d3ef1c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 3 Oct 2023 09:39:26 +0200 Subject: [PATCH 85/94] gpio: aspeed: fix the GPIO number passed to pinctrl_gpio_set_config() commit f9315f17bf778cb8079a29639419fcc8a41a3c84 upstream. pinctrl_gpio_set_config() expects the GPIO number from the global GPIO numberspace, not the controller-relative offset, which needs to be added to the chip base. Fixes: 5ae4cb94b313 ("gpio: aspeed: Add debounce support") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Andrew Jeffery Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index e627e0e9001a..ba1cd971d50b 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -999,7 +999,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset, else if (param == PIN_CONFIG_BIAS_DISABLE || param == PIN_CONFIG_BIAS_PULL_DOWN || param == PIN_CONFIG_DRIVE_STRENGTH) - return pinctrl_gpio_set_config(offset, config); + return pinctrl_gpio_set_config(chip->base + offset, config); else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN || param == PIN_CONFIG_DRIVE_OPEN_SOURCE) /* Return -ENOTSUPP to trigger emulation, as per datasheet */ From 0db6d5496a1918a0f322a4a4cfa23cd16f0bc464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Fri, 29 Sep 2023 17:41:57 +0200 Subject: [PATCH 86/94] gpio: pxa: disable pinctrl calls for MMP_GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f0575116507b981e6a810e78ce3c9040395b958b upstream. Similarly to PXA3xx and MMP2, pinctrl-single isn't capable of setting pin direction on MMP either. Fixes: a770d946371e ("gpio: pxa: add pin control gpio direction and request") Signed-off-by: Duje Mihanović Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-pxa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index bcc6be4a5cb2..61df316e0d17 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -246,6 +246,7 @@ static bool pxa_gpio_has_pinctrl(void) switch (gpio_type) { case PXA3XX_GPIO: case MMP2_GPIO: + case MMP_GPIO: return false; default: From c0dab6e9284eff3d4203beb787a4f761a5091dd2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 11 Sep 2023 15:18:06 +0300 Subject: [PATCH 87/94] RDMA/cma: Fix truncation compilation warning in make_cma_ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18126c767658ae8a831257c6cb7776c5ba5e7249 upstream. The following compilation error is false alarm as RDMA devices don't have such large amount of ports to actually cause to format truncation. drivers/infiniband/core/cma_configfs.c: In function ‘make_cma_ports’: drivers/infiniband/core/cma_configfs.c:223:57: error: ‘snprintf’ output may be truncated before the last format character [-Werror=format-truncation=] 223 | snprintf(port_str, sizeof(port_str), "%u", i + 1); | ^ drivers/infiniband/core/cma_configfs.c:223:17: note: ‘snprintf’ output between 2 and 11 bytes into a destination of size 10 223 | snprintf(port_str, sizeof(port_str), "%u", i + 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors make[5]: *** [scripts/Makefile.build:243: drivers/infiniband/core/cma_configfs.o] Error 1 Fixes: 045959db65c6 ("IB/cma: Add configfs for rdma_cm") Link: https://lore.kernel.org/r/a7e3b347ee134167fa6a3787c56ef231a04bc8c2.1694434639.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index ce183d054785..f9b0303b3a01 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -215,7 +215,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, } for (i = 0; i < ports_num; i++) { - char port_str[10]; + char port_str[11]; ports[i].port_num = i + 1; snprintf(port_str, sizeof(port_str), "%u", i + 1); From 423667bfe9304530d0c3dfd35669421d9d825d9a Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 20 Sep 2023 13:01:56 +0300 Subject: [PATCH 88/94] RDMA/mlx5: Fix NULL string error commit dab994bcc609a172bfdab15a0d4cb7e50e8b5458 upstream. checkpath is complaining about NULL string, change it to 'Unknown'. Fixes: 37aa5c36aa70 ("IB/mlx5: Add UARs write-combining and non-cached mapping") Signed-off-by: Shay Drory Link: https://lore.kernel.org/r/8638e5c14fadbde5fa9961874feae917073af920.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1688c06d5c3c..f7df27c7c634 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2005,7 +2005,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) case MLX5_IB_MMAP_DEVICE_MEM: return "Device Memory"; default: - return NULL; + return "Unknown"; } } From a7796ccd3b5731fc1e065c12c313e987c0daecdc Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 19 Sep 2023 17:51:40 +0000 Subject: [PATCH 89/94] parisc: Restore __ldcw_align for PA-RISC 2.0 processors commit 914988e099fc658436fbd7b8f240160c352b6552 upstream. Back in 2005, Kyle McMartin removed the 16-byte alignment for ldcw semaphores on PA 2.0 machines (CONFIG_PA20). This broke spinlocks on pre PA8800 processors. The main symptom was random faults in mmap'd memory (e.g., gcc compilations, etc). Unfortunately, the errata for this ldcw change is lost. The issue is the 16-byte alignment required for ldcw semaphore instructions can only be reduced to natural alignment when the ldcw operation can be handled coherently in cache. Only PA8800 and PA8900 processors actually support doing the operation in cache. Aligning the spinlock dynamically adds two integer instructions to each spinlock. Tested on rp3440, c8000 and a500. Signed-off-by: John David Anglin Link: https://lore.kernel.org/linux-parisc/6b332788-2227-127f-ba6d-55e99ecf4ed8@bell.net/T/#t Link: https://lore.kernel.org/linux-parisc/20050609050702.GB4641@roadwarrior.mcmartin.ca/ Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/ldcw.h | 36 +++++++++++++----------- arch/parisc/include/asm/spinlock_types.h | 5 ---- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 3eb4bfc1fb36..5ed52819e956 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -2,14 +2,28 @@ #ifndef __PARISC_LDCW_H #define __PARISC_LDCW_H -#ifndef CONFIG_PA20 /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, and GCC only guarantees 8-byte alignment for stack locals, we can't be assured of 16-byte alignment for atomic lock data even if we specify "__attribute ((aligned(16)))" in the type declaration. So, we use a struct containing an array of four ints for the atomic lock type and dynamically select the 16-byte aligned int from the array - for the semaphore. */ + for the semaphore. */ + +/* From: "Jim Hull" + I've attached a summary of the change, but basically, for PA 2.0, as + long as the ",CO" (coherent operation) completer is implemented, then the + 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead + they only require "natural" alignment (4-byte for ldcw, 8-byte for + ldcd). + + Although the cache control hint is accepted by all PA 2.0 processors, + it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still + require 16-byte alignment. If the address is unaligned, the operation + of the instruction is undefined. The ldcw instruction does not generate + unaligned data reference traps so misaligned accesses are not detected. + This hid the problem for years. So, restore the 16-byte alignment dropped + by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ #define __PA_LDCW_ALIGNMENT 16 #define __PA_LDCW_ALIGN_ORDER 4 @@ -19,22 +33,12 @@ & ~(__PA_LDCW_ALIGNMENT - 1); \ (volatile unsigned int *) __ret; \ }) -#define __LDCW "ldcw" -#else /*CONFIG_PA20*/ -/* From: "Jim Hull" - I've attached a summary of the change, but basically, for PA 2.0, as - long as the ",CO" (coherent operation) completer is specified, then the - 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead - they only require "natural" alignment (4-byte for ldcw, 8-byte for - ldcd). */ - -#define __PA_LDCW_ALIGNMENT 4 -#define __PA_LDCW_ALIGN_ORDER 2 -#define __ldcw_align(a) (&(a)->slock) +#ifdef CONFIG_PA20 #define __LDCW "ldcw,co" - -#endif /*!CONFIG_PA20*/ +#else +#define __LDCW "ldcw" +#endif /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. We don't explicitly expose that "*a" may be written as reload diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index 42979c5704dc..82d2384c3f22 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -3,13 +3,8 @@ #define __ASM_SPINLOCK_TYPES_H typedef struct { -#ifdef CONFIG_PA20 - volatile unsigned int slock; -# define __ARCH_SPIN_LOCK_UNLOCKED { 1 } -#else volatile unsigned int lock[4]; # define __ARCH_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } -#endif } arch_spinlock_t; typedef struct { From 62c218124fe58372e0e1f60d5b634d21c264b337 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Sep 2023 19:00:35 +0000 Subject: [PATCH 90/94] dccp: fix dccp_v4_err()/dccp_v6_err() again commit 6af289746a636f71f4c0535a9801774118486c7a upstream. dh->dccph_x is the 9th byte (offset 8) in "struct dccp_hdr", not in the "byte 7" as Jann claimed. We need to make sure the ICMP messages are big enough, using more standard ways (no more assumptions). syzbot reported: BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2667 [inline] BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2681 [inline] BUG: KMSAN: uninit-value in dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94 pskb_may_pull_reason include/linux/skbuff.h:2667 [inline] pskb_may_pull include/linux/skbuff.h:2681 [inline] dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94 icmpv6_notify+0x4c7/0x880 net/ipv6/icmp.c:867 icmpv6_rcv+0x19d5/0x30d0 ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:304 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:468 [inline] ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:304 [inline] ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5523 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5637 netif_receive_skb_internal net/core/dev.c:5723 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5782 tun_rx_batched+0x83b/0x920 tun_get_user+0x564c/0x6940 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1985 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x15c0 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:559 __alloc_skb+0x318/0x740 net/core/skbuff.c:650 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6313 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2795 tun_alloc_skb drivers/net/tun.c:1531 [inline] tun_get_user+0x23cf/0x6940 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:1985 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x15c0 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd CPU: 0 PID: 4995 Comm: syz-executor153 Not tainted 6.6.0-rc1-syzkaller-00014-ga747acc0b752 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 Fixes: 977ad86c2a1b ("dccp: Fix out of bounds access in DCCP error handler") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jann Horn Reviewed-by: Jann Horn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 9 ++------- net/dccp/ipv6.c | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b2fc9ef7708f..892fbd1f650d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -247,13 +247,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, - * which is in byte 7 of the dccp header. - * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. - * - * Later on, we want to access the sequence number fields, which are - * beyond 8 bytes, so we have to pskb_may_pull() ourselves. - */ + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f8d8caa967b1..9b8c6cf0e5ee 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -80,13 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x, - * which is in byte 7 of the dccp header. - * Our caller (icmpv6_notify()) already pulled 8 bytes for us. - * - * Later on, we want to access the sequence number fields, which are - * beyond 8 bytes, so we have to pskb_may_pull() ourselves. - */ + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return; From 88f8a01c42535285bcf2666d1f7eb9ada0de88cc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Oct 2023 20:25:00 +0200 Subject: [PATCH 91/94] Revert "rtnetlink: Reject negative ifindexes in RTM_NEWLINK" This reverts commit 42c8406432e730cb7442d97ecfdbf47084a5af4d which is commit 30188bd7838c16a98a520db1fe9df01ffc6ed368 upstream. It was improperly backported to 4.19.y, and applied to the wrong function, which obviously causes problems. A fixed version will be applied as a separate commit later. Reported-by: Boris Ostrovsky Link: https://lore.kernel.org/r/ZSQeA8fhUT++iZvz@ostr-mac Cc: Ido Schimmel Cc: Jiri Pirko Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 794db633f1c9..79f62517e24a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2702,10 +2702,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else if (ifm->ifi_index < 0) { - NL_SET_ERR_MSG(extack, "ifindex can't be negative"); - return -EINVAL; - } else if (tb[IFLA_IFNAME]) + else if (tb[IFLA_IFNAME]) dev = __dev_get_by_name(net, ifname); else goto errout; From 8180d4b00f4cbc05e7e74a8e063e46070f39269f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 23 Aug 2023 09:43:48 +0300 Subject: [PATCH 92/94] rtnetlink: Reject negative ifindexes in RTM_NEWLINK commit 30188bd7838c16a98a520db1fe9df01ffc6ed368 upstream. Negative ifindexes are illegal, but the kernel does not validate the ifindex in the ancillary header of RTM_NEWLINK messages, resulting in the kernel generating a warning [1] when such an ifindex is specified. Fix by rejecting negative ifindexes. [1] WARNING: CPU: 0 PID: 5031 at net/core/dev.c:9593 dev_index_reserve+0x1a2/0x1c0 net/core/dev.c:9593 [...] Call Trace: register_netdevice+0x69a/0x1490 net/core/dev.c:10081 br_dev_newlink+0x27/0x110 net/bridge/br_netlink.c:1552 rtnl_newlink_create net/core/rtnetlink.c:3471 [inline] __rtnl_newlink+0x115e/0x18c0 net/core/rtnetlink.c:3688 rtnl_newlink+0x67/0xa0 net/core/rtnetlink.c:3701 rtnetlink_rcv_msg+0x439/0xd30 net/core/rtnetlink.c:6427 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2545 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0x536/0x810 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x93c/0xe40 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:728 [inline] sock_sendmsg+0xd9/0x180 net/socket.c:751 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2538 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2592 __sys_sendmsg+0x117/0x1e0 net/socket.c:2621 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 38f7b870d4a6 ("[RTNETLINK]: Link creation API") Reported-by: syzbot+5ba06978f34abb058571@syzkaller.appspotmail.com Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230823064348.2252280-1-idosch@nvidia.com Signed-off-by: Paolo Abeni Cc: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 79f62517e24a..0d3f724da78b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2970,9 +2970,12 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, ifname[0] = '\0'; ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) + if (ifm->ifi_index > 0) { dev = __dev_get_by_index(net, ifm->ifi_index); - else { + } else if (ifm->ifi_index < 0) { + NL_SET_ERR_MSG(extack, "ifindex can't be negative"); + return -EINVAL; + } else { if (ifname[0]) dev = __dev_get_by_name(net, ifname); else From 3fdf2be9089b5096a28e76376656c60ce410ac4a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 28 Aug 2023 08:09:47 +0200 Subject: [PATCH 93/94] xen/events: replace evtchn_rwlock with RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 87797fad6cce28ec9be3c13f031776ff4f104cfc upstream. In unprivileged Xen guests event handling can cause a deadlock with Xen console handling. The evtchn_rwlock and the hvc_lock are taken in opposite sequence in __hvc_poll() and in Xen console IRQ handling. Normally this is no problem, as the evtchn_rwlock is taken as a reader in both paths, but as soon as an event channel is being closed, the lock will be taken as a writer, which will cause read_lock() to block: CPU0 CPU1 CPU2 (IRQ handling) (__hvc_poll()) (closing event channel) read_lock(evtchn_rwlock) spin_lock(hvc_lock) write_lock(evtchn_rwlock) [blocks] spin_lock(hvc_lock) [blocks] read_lock(evtchn_rwlock) [blocks due to writer waiting, and not in_interrupt()] This issue can be avoided by replacing evtchn_rwlock with RCU in xen_free_irq(). Note that RCU is used only to delay freeing of the irq_info memory. There is no RCU based dereferencing or replacement of pointers involved. In order to avoid potential races between removing the irq_info reference and handling of interrupts, set the irq_info pointer to NULL only when freeing its memory. The IRQ itself must be freed at that time, too, as otherwise the same IRQ number could be allocated again before handling of the old instance would have been finished. This is XSA-441 / CVE-2023-34324. Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 85 ++++++++++++++-------------- drivers/xen/events/events_internal.h | 2 + 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b6b3131cb079..960666aba12b 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -82,23 +82,13 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); -/* - * Lock protecting event handling loop against removing event channels. - * Adding of event channels is no issue as the associated IRQ becomes active - * only after everything is setup (before request_[threaded_]irq() the handler - * can't be entered for an event, as the event channel will be unmasked only - * then). - */ -static DEFINE_RWLOCK(evtchn_rwlock); - /* * Lock hierarchy: * * irq_mapping_update_lock - * evtchn_rwlock - * IRQ-desc lock - * percpu eoi_list_lock - * irq_info->lock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -213,6 +203,22 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +static void delayed_free_irq(struct work_struct *work) +{ + struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, + rwork); + unsigned int irq = info->irq; + + /* Remove the info pointer only now, with no potential users left. */ + set_info_for_irq(irq, NULL); + + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -547,33 +553,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); while (true) { - spin_lock(&eoi->eoi_list_lock); + spin_lock_irqsave(&eoi->eoi_list_lock, flags); info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, eoi_list); - if (info == NULL || now < info->eoi_time) { - spin_unlock(&eoi->eoi_list_lock); + if (info == NULL) + break; + + if (now < info->eoi_time) { + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, + info->eoi_time - now); break; } list_del_init(&info->eoi_list); - spin_unlock(&eoi->eoi_list_lock); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); info->eoi_time = 0; xen_irq_lateeoi_locked(info, false); } - if (info) - mod_delayed_work_on(info->eoi_cpu, system_wq, - &eoi->delayed, info->eoi_time - now); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } static void xen_cpu_init_eoi(unsigned int cpu) @@ -588,16 +597,15 @@ static void xen_cpu_init_eoi(unsigned int cpu) void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; - unsigned long flags; - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); info = info_for_irq(irq); if (info) xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); @@ -616,6 +624,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); set_info_for_irq(irq, info); @@ -668,31 +677,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); - unsigned long flags; if (WARN_ON(!info)) return; - write_lock_irqsave(&evtchn_rwlock, flags); - if (!list_empty(&info->eoi_list)) lateeoi_list_del(info); list_del(&info->list); - set_info_for_irq(irq, NULL); - WARN_ON(info->refcnt > 0); - write_unlock_irqrestore(&evtchn_rwlock, flags); - - kfree(info); - - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < nr_legacy_irqs()) - return; - - irq_free_desc(irq); + queue_rcu_work(system_wq, &info->rwork); } static void xen_evtchn_close(unsigned int port) @@ -1603,7 +1599,14 @@ static void __xen_evtchn_do_upcall(void) unsigned count; struct evtchn_loop_ctrl ctrl = { 0 }; - read_lock(&evtchn_rwlock); + /* + * When closing an event channel the associated IRQ must not be freed + * until all cpus have left the event handling loop. This is ensured + * by taking the rcu_read_lock() while handling events, as freeing of + * the IRQ is handled via queue_rcu_work() _after_ closing the event + * channel. + */ + rcu_read_lock(); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1620,7 +1623,7 @@ static void __xen_evtchn_do_upcall(void) } while (count != 1 || vcpu_info->evtchn_upcall_pending); out: - read_unlock(&evtchn_rwlock); + rcu_read_unlock(); /* * Increment irq_epoch only now to defer EOIs only for diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index cc37b711491c..1d9d9e6f1dee 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -8,6 +8,7 @@ */ #ifndef __EVENTS_INTERNAL_H__ #define __EVENTS_INTERNAL_H__ +#include /* Interrupt types. */ enum xen_irq_type { @@ -33,6 +34,7 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; + struct rcu_work rwork; short refcnt; short spurious_cnt; short type; /* type */ From 1b540579cf668bd805cdcca5285f579dcf6e4312 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Oct 2023 21:45:02 +0200 Subject: [PATCH 94/94] Linux 4.19.296 Link: https://lore.kernel.org/r/20231009130111.518916887@linuxfoundation.org Tested-by: Shuah Khan Tested-by: Jon Hunter Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6eb08388fa15..002d81755142 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 295 +SUBLEVEL = 296 EXTRAVERSION = NAME = "People's Front"