From 4ce844d0f50f789cc70de7bd02511df0f40c64b6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 26 Jan 2024 10:36:31 -0700 Subject: [PATCH 01/13] powerpc: Use always instead of always-y in for crtsavres.o This commit is for linux-4.19.y only, it has no direct upstream equivalent. Prior to commit 5f2fb52fac15 ("kbuild: rename hostprogs-y/always to hostprogs/always-y"), always-y did not exist, making the backport of mainline commit 1b1e38002648 ("powerpc: add crtsavres.o to always-y instead of extra-y") to linux-4.19.y as commit b7b85ec5ec15 ("powerpc: add crtsavres.o to always-y instead of extra-y") incorrect, breaking the build with linkers that need crtsavres.o: ld.lld: error: cannot open arch/powerpc/lib/crtsavres.o: No such file or directory Backporting the aforementioned kbuild commit is not suitable for stable due to its size and number of conflicts, so transform the always-y usage to an equivalent form using always, which resolves the build issues. Fixes: b7b85ec5ec15 ("powerpc: add crtsavres.o to always-y instead of extra-y") Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 6f1e57182876..f0aa6fc8c6b2 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -21,8 +21,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs -ifeq ($(call ld-ifversion, -lt, 225000000, y),y) -always-$(CONFIG_PPC64) += crtsavres.o +ifeq ($(call ld-ifversion, -lt, 225000000, y)$(CONFIG_PPC64),yy) +always += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ From 771df0145297a1a9f1e7f799da43f8b0f8850e7c Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 25 Jan 2024 19:06:54 +0100 Subject: [PATCH 02/13] x86/CPU/AMD: Fix disabling XSAVES on AMD family 0x17 due to erratum The stable kernel version backport of the patch disabling XSAVES on AMD Zen family 0x17 applied this change to the wrong function (init_amd_k6()), one which isn't called for Zen CPUs. Move the erratum to the init_amd_zn() function instead. Add an explicit family 0x17 check to the erratum so nothing will break if someone naively makes this kernel version call init_amd_zn() also for family 0x19 in the future (as the current upstream code does). Fixes: f028a7db9824 ("x86/CPU/AMD: Disable XSAVES on AMD family 0x17") Signed-off-by: Maciej S. Szmigiero Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 84667781c41d..5b75a4ff6802 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -271,15 +271,6 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } #endif - /* - * Work around Erratum 1386. The XSAVES instruction malfunctions in - * certain circumstances on Zen1/2 uarch, and not all parts have had - * updated microcode at the time of writing (March 2023). - * - * Affected parts all have no supervisor XSAVE states, meaning that - * the XSAVEC instruction (which works fine) is equivalent. - */ - clear_cpu_cap(c, X86_FEATURE_XSAVES); } static void init_amd_k7(struct cpuinfo_x86 *c) @@ -979,6 +970,17 @@ static void init_amd_zn(struct cpuinfo_x86 *c) if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) set_cpu_cap(c, X86_FEATURE_BTC_NO); } + + /* + * Work around Erratum 1386. The XSAVES instruction malfunctions in + * certain circumstances on Zen1/2 uarch, and not all parts have had + * updated microcode at the time of writing (March 2023). + * + * Affected parts all have no supervisor XSAVE states, meaning that + * the XSAVEC instruction (which works fine) is equivalent. + */ + if (c->x86 == 0x17) + clear_cpu_cap(c, X86_FEATURE_XSAVES); } static bool cpu_has_zenbleed_microcode(void) From d31978bfec1d251a75d4a038e564ef2ff9d8be40 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Aug 2020 13:44:59 +0300 Subject: [PATCH 03/13] driver core: Annotate dev_err_probe() with __must_check commit e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 upstream. We have got already new users of this API which interpret it differently and miss the opportunity to optimize their code. In order to avoid similar cases in the future, annotate dev_err_probe() with __must_check. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200826104459.81979-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 0714d6e5d500..2e5926792f4e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1582,7 +1582,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) extern __printf(3, 4) -int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +int __must_check dev_err_probe(const struct device *dev, int err, const char *fmt, ...); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ From b715d543d42e8e4ddd645193410cd4511fc46d6c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Sep 2020 09:37:40 +0200 Subject: [PATCH 04/13] Revert "driver core: Annotate dev_err_probe() with __must_check" commit f601e8f37c2c1c52f2923fffc48204a7f7dc023d upstream. This reverts commit e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 as it's already starting to cause build warnings in linux-next for things that are "obviously correct". It's up to driver authors do "do the right thing" here with this function, and if they don't want to call it as the last line of a function, that's up to them, otherwise code that looks like: ret = dev_err_probe(..., ret, ...); does look really "odd". Reported-by: Stephen Rothwell Reported-by: Krzysztof Kozlowski Fixes: e1f82a0dcf38 ("driver core: Annotate dev_err_probe() with __must_check") Cc: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 2e5926792f4e..0714d6e5d500 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1582,7 +1582,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) extern __printf(3, 4) -int __must_check dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ From cf07cb793264fd3c459918dda7e839d6a279493c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 28 Aug 2020 18:14:35 +0200 Subject: [PATCH 05/13] driver code: print symbolic error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 693a8e936590f93451e6f5a3d748616f5a59c80b upstream. dev_err_probe() prepends the message with an error code. Let's make it more readable by translating the code to a more recognisable symbol. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/ea3f973e4708919573026fdce52c264db147626d.1598630856.git.mirq-linux@rere.qmqm.pl Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b66647277d52..8a53e346e7aa 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3366,9 +3366,9 @@ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) vaf.va = &args; if (err != -EPROBE_DEFER) - dev_err(dev, "error %d: %pV", err, &vaf); + dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); else - dev_dbg(dev, "error %d: %pV", err, &vaf); + dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); va_end(args); From 4d61ff79b439fe9cd5eaa3363a25853f230e2026 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 9 Sep 2020 11:53:43 +0200 Subject: [PATCH 06/13] drivers: core: fix kernel-doc markup for dev_err_probe() commit 074b3aad307de6126fbac1fff4996d1034b48fee upstream. There are two literal blocks there. Fix the markups, in order to produce the right html output and solve those warnings: ./drivers/base/core.c:4218: WARNING: Unexpected indentation. ./drivers/base/core.c:4222: WARNING: Definition list ends without a blank line; unexpected unindent. ./drivers/base/core.c:4223: WARNING: Block quote ends without a blank line; unexpected unindent. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 8a53e346e7aa..5fd2b887cc04 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3344,13 +3344,15 @@ define_dev_printk_level(_dev_info, KERN_INFO); * This helper implements common pattern present in probe functions for error * checking: print debug or error message depending if the error value is * -EPROBE_DEFER and propagate error upwards. - * It replaces code sequence: + * It replaces code sequence:: * if (err != -EPROBE_DEFER) * dev_err(dev, ...); * else * dev_dbg(dev, ...); * return err; - * with + * + * with:: + * * return dev_err_probe(dev, err, ...); * * Returns @err. From 27aea64838914c6122db5b8bd4bed865c9736f22 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 18 Jan 2024 12:32:10 +0800 Subject: [PATCH 07/13] net/smc: fix illegal rmb_desc access in SMC-D connection dump [ Upstream commit dbc153fd3c142909e564bb256da087e13fbf239c ] A crash was found when dumping SMC-D connections. It can be reproduced by following steps: - run nginx/wrk test: smc_run nginx smc_run wrk -t 16 -c 1000 -d -H 'Connection: Close' - continuously dump SMC-D connections in parallel: watch -n 1 'smcss -D' BUG: kernel NULL pointer dereference, address: 0000000000000030 CPU: 2 PID: 7204 Comm: smcss Kdump: loaded Tainted: G E 6.7.0+ #55 RIP: 0010:__smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x66/0x150 ? exc_page_fault+0x69/0x140 ? asm_exc_page_fault+0x26/0x30 ? __smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] ? __kmalloc_node_track_caller+0x35d/0x430 ? __alloc_skb+0x77/0x170 smc_diag_dump_proto+0xd0/0xf0 [smc_diag] smc_diag_dump+0x26/0x60 [smc_diag] netlink_dump+0x19f/0x320 __netlink_dump_start+0x1dc/0x300 smc_diag_handler_dump+0x6a/0x80 [smc_diag] ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag] sock_diag_rcv_msg+0x121/0x140 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 sock_diag_rcv+0x28/0x40 netlink_unicast+0x22a/0x330 netlink_sendmsg+0x1f8/0x420 __sock_sendmsg+0xb0/0xc0 ____sys_sendmsg+0x24e/0x300 ? copy_msghdr_from_user+0x62/0x80 ___sys_sendmsg+0x7c/0xd0 ? __do_fault+0x34/0x160 ? do_read_fault+0x5f/0x100 ? do_fault+0xb0/0x110 ? __handle_mm_fault+0x2b0/0x6c0 __sys_sendmsg+0x4d/0x80 do_syscall_64+0x69/0x180 entry_SYSCALL_64_after_hwframe+0x6e/0x76 It is possible that the connection is in process of being established when we dump it. Assumed that the connection has been registered in a link group by smc_conn_create() but the rmb_desc has not yet been initialized by smc_buf_create(), thus causing the illegal access to conn->rmb_desc. So fix it by checking before dump. Fixes: 4b1b7d3b30a6 ("net/smc: add SMC-D diag support") Signed-off-by: Wen Gu Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/smc_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 6c4a7a5938b7..3ab14678afff 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -167,7 +167,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc->conn.lgr && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; From 91759822dd336c20f817e6fd59cccee3952599f7 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 18 Jan 2024 21:03:06 +0800 Subject: [PATCH 08/13] vlan: skip nested type that is not IFLA_VLAN_QOS_MAPPING [ Upstream commit 6c21660fe221a15c789dee2bc2fd95516bc5aeaf ] In the vlan_changelink function, a loop is used to parse the nested attributes IFLA_VLAN_EGRESS_QOS and IFLA_VLAN_INGRESS_QOS in order to obtain the struct ifla_vlan_qos_mapping. These two nested attributes are checked in the vlan_validate_qos_map function, which calls nla_validate_nested_deprecated with the vlan_map_policy. However, this deprecated validator applies a LIBERAL strictness, allowing the presence of an attribute with the type IFLA_VLAN_QOS_UNSPEC. Consequently, the loop in vlan_changelink may parse an attribute of type IFLA_VLAN_QOS_UNSPEC and believe it carries a payload of struct ifla_vlan_qos_mapping, which is not necessarily true. To address this issue and ensure compatibility, this patch introduces two type checks that skip attributes whose type is not IFLA_VLAN_QOS_MAPPING. Fixes: 07b5b17e157b ("[VLAN]: Use rtnl_link API") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240118130306.1644001-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/8021q/vlan_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 74042b9d7f73..231eae2f1685 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -120,12 +120,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) From 84e9d10419f6f4f3f3cd8f9aaf44a48719aa4b1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2024 18:36:25 +0000 Subject: [PATCH 09/13] llc: make llc_ui_sendmsg() more robust against bonding changes [ Upstream commit dad555c816a50c6a6a8a86be1f9177673918c647 ] syzbot was able to trick llc_ui_sendmsg(), allocating an skb with no headroom, but subsequently trying to push 14 bytes of Ethernet header [1] Like some others, llc_ui_sendmsg() releases the socket lock before calling sock_alloc_send_skb(). Then it acquires it again, but does not redo all the sanity checks that were performed. This fix: - Uses LL_RESERVED_SPACE() to reserve space. - Check all conditions again after socket lock is held again. - Do not account Ethernet header for mtu limitation. [1] skbuff: skb_under_panic: text:ffff800088baa334 len:1514 put:14 head:ffff0000c9c37000 data:ffff0000c9c36ff2 tail:0x5dc end:0x6c0 dev:bond0 kernel BUG at net/core/skbuff.c:193 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 6875 Comm: syz-executor.0 Not tainted 6.7.0-rc8-syzkaller-00101-g0802e17d9aca-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:189 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 lr : skb_panic net/core/skbuff.c:189 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 sp : ffff800096f97000 x29: ffff800096f97010 x28: ffff80008cc8d668 x27: dfff800000000000 x26: ffff0000cb970c90 x25: 00000000000005dc x24: ffff0000c9c36ff2 x23: ffff0000c9c37000 x22: 00000000000005ea x21: 00000000000006c0 x20: 000000000000000e x19: ffff800088baa334 x18: 1fffe000368261ce x17: ffff80008e4ed000 x16: ffff80008a8310f8 x15: 0000000000000001 x14: 1ffff00012df2d58 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000001 x10: 0000000000ff0100 x9 : e28a51f1087e8400 x8 : e28a51f1087e8400 x7 : ffff80008028f8d0 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : ffff800082b78714 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 0000000000000089 Call trace: skb_panic net/core/skbuff.c:189 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 skb_push+0xf0/0x108 net/core/skbuff.c:2451 eth_header+0x44/0x1f8 net/ethernet/eth.c:83 dev_hard_header include/linux/netdevice.h:3188 [inline] llc_mac_hdr_init+0x110/0x17c net/llc/llc_output.c:33 llc_sap_action_send_xid_c+0x170/0x344 net/llc/llc_s_ac.c:85 llc_exec_sap_trans_actions net/llc/llc_sap.c:153 [inline] llc_sap_next_state net/llc/llc_sap.c:182 [inline] llc_sap_state_process+0x1ec/0x774 net/llc/llc_sap.c:209 llc_build_and_send_xid_pkt+0x12c/0x1c0 net/llc/llc_sap.c:270 llc_ui_sendmsg+0x7bc/0xb1c net/llc/af_llc.c:997 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x194/0x274 net/socket.c:767 splice_to_socket+0x7cc/0xd58 fs/splice.c:881 do_splice_from fs/splice.c:933 [inline] direct_splice_actor+0xe4/0x1c0 fs/splice.c:1142 splice_direct_to_actor+0x2a0/0x7e4 fs/splice.c:1088 do_splice_direct+0x20c/0x348 fs/splice.c:1194 do_sendfile+0x4bc/0xc70 fs/read_write.c:1254 __do_sys_sendfile64 fs/read_write.c:1322 [inline] __se_sys_sendfile64 fs/read_write.c:1308 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1308 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x158 arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595 Code: aa1803e6 aa1903e7 a90023f5 94792f6a (d4210000) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+2a7024e9502df538e8ef@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240118183625.4007013-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/llc/af_llc.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5cba9199c3c9..a5c104ea477c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -926,14 +926,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; + int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; + struct net_device *dev; size_t size = 0; - int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); @@ -953,22 +954,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (rc) goto out; } - hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); + dev = llc->dev; + hh_len = LL_RESERVED_SPACE(dev); + hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > llc->dev->mtu) - size = llc->dev->mtu; + size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); + skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; - skb->dev = llc->dev; + if (sock_flag(sk, SOCK_ZAPPED) || + llc->dev != dev || + hdrlen != llc_ui_header_len(sk, addr) || + hh_len != LL_RESERVED_SPACE(dev) || + size > READ_ONCE(dev->mtu)) + goto out; + skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); - skb_reserve(skb, hdrlen); + skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; From 165ad1e22779685c3ed3dd349c6c4c632309cc62 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 Jan 2024 17:55:15 -0800 Subject: [PATCH 10/13] llc: Drop support for ETH_P_TR_802_2. [ Upstream commit e3f9bed9bee261e3347131764e42aeedf1ffea61 ] syzbot reported an uninit-value bug below. [0] llc supports ETH_P_802_2 (0x0004) and used to support ETH_P_TR_802_2 (0x0011), and syzbot abused the latter to trigger the bug. write$tun(r0, &(0x7f0000000040)={@val={0x0, 0x11}, @val, @mpls={[], @llc={@snap={0xaa, 0x1, ')', "90e5dd"}}}}, 0x16) llc_conn_handler() initialises local variables {saddr,daddr}.mac based on skb in llc_pdu_decode_sa()/llc_pdu_decode_da() and passes them to __llc_lookup(). However, the initialisation is done only when skb->protocol is htons(ETH_P_802_2), otherwise, __llc_lookup_established() and __llc_lookup_listener() will read garbage. The missing initialisation existed prior to commit 211ed865108e ("net: delete all instances of special processing for token ring"). It removed the part to kick out the token ring stuff but forgot to close the door allowing ETH_P_TR_802_2 packets to sneak into llc_rcv(). Let's remove llc_tr_packet_type and complete the deprecation. [0]: BUG: KMSAN: uninit-value in __llc_lookup_established+0xe9d/0xf90 __llc_lookup_established+0xe9d/0xf90 __llc_lookup net/llc/llc_conn.c:611 [inline] llc_conn_handler+0x4bd/0x1360 net/llc/llc_conn.c:791 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 __netif_receive_skb_one_core net/core/dev.c:5527 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5641 netif_receive_skb_internal net/core/dev.c:5727 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5786 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2020 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x1490 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable daddr created at: llc_conn_handler+0x53/0x1360 net/llc/llc_conn.c:783 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 CPU: 1 PID: 5004 Comm: syz-executor994 Not tainted 6.6.0-syzkaller-14500-g1c41041124bd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Fixes: 211ed865108e ("net: delete all instances of special processing for token ring") Reported-by: syzbot+b5ad66046b913bc04c6f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b5ad66046b913bc04c6f Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119015515.61898-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/llc_pdu.h | 6 ++---- net/llc/llc_core.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 49aa79c7b278..581cd37aa98b 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 64d4bef04e73..4900a27b5176 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = { .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TR_802_2), - .func = llc_rcv, -}; - static int __init llc_init(void) { dev_add_pack(&llc_packet_type); - dev_add_pack(&llc_tr_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); - dev_remove_pack(&llc_tr_packet_type); } module_init(llc_init); From 344350bfa3b4b37d7c3d5a00536e6fbf0e953fbf Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Fri, 19 Jan 2024 17:48:39 -0800 Subject: [PATCH 11/13] net/rds: Fix UBSAN: array-index-out-of-bounds in rds_cmsg_recv [ Upstream commit 13e788deb7348cc88df34bed736c3b3b9927ea52 ] Syzcaller UBSAN crash occurs in rds_cmsg_recv(), which reads inc->i_rx_lat_trace[j + 1] with index 4 (3 + 1), but with array size of 4 (RDS_RX_MAX_TRACES). Here 'j' is assigned from rs->rs_rx_trace[i] and in-turn from trace.rx_trace_pos[i] in rds_recv_track_latency(), with both arrays sized 3 (RDS_MSG_RX_DGRAM_TRACE_MAX). So fix the off-by-one bounds check in rds_recv_track_latency() to prevent a potential crash in rds_cmsg_recv(). Found by syzcaller: ================================================================= UBSAN: array-index-out-of-bounds in net/rds/recv.c:585:39 index 4 is out of range for type 'u64 [4]' CPU: 1 PID: 8058 Comm: syz-executor228 Not tainted 6.6.0-gd2f51b3516da #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x136/0x150 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_out_of_bounds+0xd5/0x130 lib/ubsan.c:348 rds_cmsg_recv+0x60d/0x700 net/rds/recv.c:585 rds_recvmsg+0x3fb/0x1610 net/rds/recv.c:716 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1066 __sys_recvfrom+0x1b6/0x2f0 net/socket.c:2246 __do_sys_recvfrom net/socket.c:2264 [inline] __se_sys_recvfrom net/socket.c:2260 [inline] __x64_sys_recvfrom+0xe0/0x1b0 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b ================================================================== Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-rdma/CALGdzuoVdq-wtQ4Az9iottBqC5cv9ZhcE5q8N7LfYFvkRsOVcw@mail.gmail.com/ Signed-off-by: Sharath Srinivasan Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/af_rds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index cd7e01ea8144..5eb71d276706 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -385,7 +385,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } From 5022b331c041e8c54b9a6a3251579bd1e8c0fc0b Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 22 Jan 2024 16:09:28 +0100 Subject: [PATCH 12/13] tracing: Ensure visibility when inserting an element into tracing_map [ Upstream commit 2b44760609e9eaafc9d234a6883d042fc21132a7 ] Running the following two commands in parallel on a multi-processor AArch64 machine can sporadically produce an unexpected warning about duplicate histogram entries: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist sleep 0.001 done $ stress-ng --sysbadaddr $(nproc) The warning looks as follows: [ 2911.172474] ------------[ cut here ]------------ [ 2911.173111] Duplicates detected: 1 [ 2911.173574] WARNING: CPU: 2 PID: 12247 at kernel/trace/tracing_map.c:983 tracing_map_sort_entries+0x3e0/0x408 [ 2911.174702] Modules linked in: iscsi_ibft(E) iscsi_boot_sysfs(E) rfkill(E) af_packet(E) nls_iso8859_1(E) nls_cp437(E) vfat(E) fat(E) ena(E) tiny_power_button(E) qemu_fw_cfg(E) button(E) fuse(E) efi_pstore(E) ip_tables(E) x_tables(E) xfs(E) libcrc32c(E) aes_ce_blk(E) aes_ce_cipher(E) crct10dif_ce(E) polyval_ce(E) polyval_generic(E) ghash_ce(E) gf128mul(E) sm4_ce_gcm(E) sm4_ce_ccm(E) sm4_ce(E) sm4_ce_cipher(E) sm4(E) sm3_ce(E) sm3(E) sha3_ce(E) sha512_ce(E) sha512_arm64(E) sha2_ce(E) sha256_arm64(E) nvme(E) sha1_ce(E) nvme_core(E) nvme_auth(E) t10_pi(E) sg(E) scsi_mod(E) scsi_common(E) efivarfs(E) [ 2911.174738] Unloaded tainted modules: cppc_cpufreq(E):1 [ 2911.180985] CPU: 2 PID: 12247 Comm: cat Kdump: loaded Tainted: G E 6.7.0-default #2 1b58bbb22c97e4399dc09f92d309344f69c44a01 [ 2911.182398] Hardware name: Amazon EC2 c7g.8xlarge/, BIOS 1.0 11/1/2018 [ 2911.183208] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 2911.184038] pc : tracing_map_sort_entries+0x3e0/0x408 [ 2911.184667] lr : tracing_map_sort_entries+0x3e0/0x408 [ 2911.185310] sp : ffff8000a1513900 [ 2911.185750] x29: ffff8000a1513900 x28: ffff0003f272fe80 x27: 0000000000000001 [ 2911.186600] x26: ffff0003f272fe80 x25: 0000000000000030 x24: 0000000000000008 [ 2911.187458] x23: ffff0003c5788000 x22: ffff0003c16710c8 x21: ffff80008017f180 [ 2911.188310] x20: ffff80008017f000 x19: ffff80008017f180 x18: ffffffffffffffff [ 2911.189160] x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000a15134b8 [ 2911.190015] x14: 0000000000000000 x13: 205d373432323154 x12: 5b5d313131333731 [ 2911.190844] x11: 00000000fffeffff x10: 00000000fffeffff x9 : ffffd1b78274a13c [ 2911.191716] x8 : 000000000017ffe8 x7 : c0000000fffeffff x6 : 000000000057ffa8 [ 2911.192554] x5 : ffff0012f6c24ec0 x4 : 0000000000000000 x3 : ffff2e5b72b5d000 [ 2911.193404] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0003ff254480 [ 2911.194259] Call trace: [ 2911.194626] tracing_map_sort_entries+0x3e0/0x408 [ 2911.195220] hist_show+0x124/0x800 [ 2911.195692] seq_read_iter+0x1d4/0x4e8 [ 2911.196193] seq_read+0xe8/0x138 [ 2911.196638] vfs_read+0xc8/0x300 [ 2911.197078] ksys_read+0x70/0x108 [ 2911.197534] __arm64_sys_read+0x24/0x38 [ 2911.198046] invoke_syscall+0x78/0x108 [ 2911.198553] el0_svc_common.constprop.0+0xd0/0xf8 [ 2911.199157] do_el0_svc+0x28/0x40 [ 2911.199613] el0_svc+0x40/0x178 [ 2911.200048] el0t_64_sync_handler+0x13c/0x158 [ 2911.200621] el0t_64_sync+0x1a8/0x1b0 [ 2911.201115] ---[ end trace 0000000000000000 ]--- The problem appears to be caused by CPU reordering of writes issued from __tracing_map_insert(). The check for the presence of an element with a given key in this function is: val = READ_ONCE(entry->val); if (val && keys_match(key, val->key, map->key_size)) ... The write of a new entry is: elt = get_free_elt(map); memcpy(elt->key, key, map->key_size); entry->val = elt; The "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;" stores may become visible in the reversed order on another CPU. This second CPU might then incorrectly determine that a new key doesn't match an already present val->key and subsequently insert a new element, resulting in a duplicate. Fix the problem by adding a write barrier between "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;", and for good measure, also use WRITE_ONCE(entry->val, elt) for publishing the element. The sequence pairs with the mentioned "READ_ONCE(entry->val);" and the "val->key" check which has an address dependency. The barrier is placed on a path executed when adding an element for a new key. Subsequent updates targeting the same key remain unaffected. From the user's perspective, the issue was introduced by commit c193707dde77 ("tracing: Remove code which merges duplicates"), which followed commit cbf4100efb8f ("tracing: Add support to detect and avoid duplicates"). The previous code operated differently; it inherently expected potential races which result in duplicates but merged them later when they occurred. Link: https://lore.kernel.org/linux-trace-kernel/20240122150928.27725-1-petr.pavlu@suse.com Fixes: c193707dde77 ("tracing: Remove code which merges duplicates") Signed-off-by: Petr Pavlu Acked-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/tracing_map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 83c2a0598c64..33c463967bb3 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; From 52c46caf283bfa3016a5e41363df93c02037f788 Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Fri, 19 Jan 2024 11:01:33 -0800 Subject: [PATCH 13/13] tcp: Add memory barrier to tcp_push() [ Upstream commit 7267e8dcad6b2f9fce05a6a06335d7040acbc2b6 ] On CPUs with weak memory models, reads and updates performed by tcp_push to the sk variables can get reordered leaving the socket throttled when it should not. The tasklet running tcp_wfree() may also not observe the memory updates in time and will skip flushing any packets throttled by tcp_push(), delaying the sending. This can pathologically cause 40ms extra latency due to bad interactions with delayed acks. Adding a memory barrier in tcp_push removes the bug, similarly to the previous commit bf06200e732d ("tcp: tsq: fix nonagle handling"). smp_mb__after_atomic() is used to not incur in unnecessary overhead on x86 since not affected. Patch has been tested using an AWS c7g.2xlarge instance with Ubuntu 22.04 and Apache Tomcat 9.0.83 running the basic servlet below: import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; public class HelloWorldServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=utf-8"); OutputStreamWriter osw = new OutputStreamWriter(response.getOutputStream(),"UTF-8"); String s = "a".repeat(3096); osw.write(s,0,s.length()); osw.flush(); } } Load was applied using wrk2 (https://github.com/kinvolk/wrk2) from an AWS c6i.8xlarge instance. Before the patch an additional 40ms latency from P99.99+ values is observed while, with the patch, the extra latency disappears. No patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.91ms 75.000% 1.13ms 90.000% 1.46ms 99.000% 1.74ms 99.900% 1.89ms 99.990% 41.95ms <<< 40+ ms extra latency 99.999% 48.32ms 100.000% 48.96ms With patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.90ms 75.000% 1.13ms 90.000% 1.45ms 99.000% 1.72ms 99.900% 1.83ms 99.990% 2.11ms <<< no 40+ ms extra latency 99.999% 2.53ms 100.000% 2.62ms Patch has been also tested on x86 (m7i.2xlarge instance) which it is not affected by this issue and the patch doesn't introduce any additional delay. Fixes: 7aa5470c2c09 ("tcp: tsq: move tsq_flags close to sk_wmem_alloc") Signed-off-by: Salvatore Dipietro Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119190133.43698-1-dipiets@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 00648a478c6a..712186336997 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -733,6 +733,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED.