From ce7d8be2eaa4cab3032e256d154d1c33843d2367 Mon Sep 17 00:00:00 2001 From: Xiaolong Huang Date: Fri, 20 Aug 2021 03:50:34 +0800 Subject: [PATCH 01/34] net: qrtr: fix another OOB Read in qrtr_endpoint_post commit 7e78c597c3ebfd0cb329aa09a838734147e4f117 upstream. This check was incomplete, did not consider size is 0: if (len != ALIGN(size, 4) + hdrlen) goto err; if size from qrtr_hdr is 0, the result of ALIGN(size, 4) will be 0, In case of len == hdrlen and size == 0 in header this check won't fail and if (cb->type == QRTR_TYPE_NEW_SERVER) { /* Remote node endpoint can bridge other distant nodes */ const struct qrtr_ctrl_pkt *pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } will also read out of bound from data, which is hdrlen allocated block. Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets") Fixes: ad9d24c9429e ("net: qrtr: fix OOB Read in qrtr_endpoint_post") Signed-off-by: Xiaolong Huang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 1e2772913957..128d0a48478d 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -321,7 +321,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) goto err; } - if (len != ALIGN(size, 4) + hdrlen) + if (!size || len != ALIGN(size, 4) + hdrlen) goto err; if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA) From c348d806ed1d3075af52345344243824d72c4945 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Aug 2021 10:55:31 -0300 Subject: [PATCH 02/34] bpf: Do not use ax register in interpreter on div/mod Partially undo old commit 144cd91c4c2b ("bpf: move tmp variable into ax register in interpreter"). The reason we need this here is because ax register will be used for holding temporary state for div/mod instruction which otherwise interpreter would corrupt. This will cause a small +8 byte stack increase for interpreter, but with the gain that we can use it from verifier rewrites as scratch register. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend [cascardo: This partial revert is needed in order to support using AX for the following two commits, as there is no JMP32 on 4.19.y] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/core.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 36be400c3e65..d2b6d2459aad 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -705,9 +705,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, * below. * * Constant blinding is only used by JITs, not in the interpreter. - * The interpreter uses AX in some occasions as a local temporary - * register e.g. in DIV or MOD instructions. - * * In restricted circumstances, the verifier can also use the AX * register for rewrites as long as they do not interfere with * the above cases! @@ -1057,6 +1054,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) #undef BPF_INSN_3_LBL #undef BPF_INSN_2_LBL u32 tail_call_cnt = 0; + u64 tmp; #define CONT ({ insn++; goto select_insn; }) #define CONT_JMP ({ insn++; goto select_insn; }) @@ -1117,36 +1115,36 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: - div64_u64_rem(DST, SRC, &AX); - DST = AX; + div64_u64_rem(DST, SRC, &tmp); + DST = tmp; CONT; ALU_MOD_X: - AX = (u32) DST; - DST = do_div(AX, (u32) SRC); + tmp = (u32) DST; + DST = do_div(tmp, (u32) SRC); CONT; ALU64_MOD_K: - div64_u64_rem(DST, IMM, &AX); - DST = AX; + div64_u64_rem(DST, IMM, &tmp); + DST = tmp; CONT; ALU_MOD_K: - AX = (u32) DST; - DST = do_div(AX, (u32) IMM); + tmp = (u32) DST; + DST = do_div(tmp, (u32) IMM); CONT; ALU64_DIV_X: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - AX = (u32) DST; - do_div(AX, (u32) SRC); - DST = (u32) AX; + tmp = (u32) DST; + do_div(tmp, (u32) SRC); + DST = (u32) tmp; CONT; ALU64_DIV_K: DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: - AX = (u32) DST; - do_div(AX, (u32) IMM); - DST = (u32) AX; + tmp = (u32) DST; + do_div(tmp, (u32) IMM); + DST = (u32) tmp; CONT; ALU_END_TO_BE: switch (IMM) { From 8313432df224d926590731ec3ace3e1bd7bc4a1a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Aug 2021 10:55:32 -0300 Subject: [PATCH 03/34] bpf: Fix 32 bit src register truncation on div/mod Commit e88b2c6e5a4d9ce30d75391e4d950da74bb2bd90 upstream. While reviewing a different fix, John and I noticed an oddity in one of the BPF program dumps that stood out, for example: # bpftool p d x i 13 0: (b7) r0 = 808464450 1: (b4) w4 = 808464432 2: (bc) w0 = w0 3: (15) if r0 == 0x0 goto pc+1 4: (9c) w4 %= w0 [...] In line 2 we noticed that the mov32 would 32 bit truncate the original src register for the div/mod operation. While for the two operations the dst register is typically marked unknown e.g. from adjust_scalar_min_max_vals() the src register is not, and thus verifier keeps tracking original bounds, simplified: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (b7) r0 = -1 1: R0_w=invP-1 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (b7) r1 = -1 2: R0_w=invP-1 R1_w=invP-1 R10=fp0 2: (3c) w0 /= w1 3: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP-1 R10=fp0 3: (77) r1 >>= 32 4: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP4294967295 R10=fp0 4: (bf) r0 = r1 5: R0_w=invP4294967295 R1_w=invP4294967295 R10=fp0 5: (95) exit processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 Runtime result of r0 at exit is 0 instead of expected -1. Remove the verifier mov32 src rewrite in div/mod and replace it with a jmp32 test instead. After the fix, we result in the following code generation when having dividend r1 and divisor r6: div, 64 bit: div, 32 bit: 0: (b7) r6 = 8 0: (b7) r6 = 8 1: (b7) r1 = 8 1: (b7) r1 = 8 2: (55) if r6 != 0x0 goto pc+2 2: (56) if w6 != 0x0 goto pc+2 3: (ac) w1 ^= w1 3: (ac) w1 ^= w1 4: (05) goto pc+1 4: (05) goto pc+1 5: (3f) r1 /= r6 5: (3c) w1 /= w6 6: (b7) r0 = 0 6: (b7) r0 = 0 7: (95) exit 7: (95) exit mod, 64 bit: mod, 32 bit: 0: (b7) r6 = 8 0: (b7) r6 = 8 1: (b7) r1 = 8 1: (b7) r1 = 8 2: (15) if r6 == 0x0 goto pc+1 2: (16) if w6 == 0x0 goto pc+1 3: (9f) r1 %= r6 3: (9c) w1 %= w6 4: (b7) r0 = 0 4: (b7) r0 = 0 5: (95) exit 5: (95) exit x86 in particular can throw a 'divide error' exception for div instruction not only for divisor being zero, but also for the case when the quotient is too large for the designated register. For the edx:eax and rdx:rax dividend pair it is not an issue in x86 BPF JIT since we always zero edx (rdx). Hence really the only protection needed is against divisor being zero. Fixes: 68fda450a7df ("bpf: fix 32-bit divide by zero") Co-developed-by: John Fastabend Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann [Salvatore Bonaccorso: This is an earlier version of the patch provided by Daniel Borkmann which does not rely on availability of the BPF_JMP32 instruction class. This means it is not even strictly a backport of the upstream commit mentioned but based on Daniel's and John's work to address the issue.] Tested-by: Salvatore Bonaccorso Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- include/linux/filter.h | 24 ++++++++++++++++++++++++ kernel/bpf/verifier.c | 22 +++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 117f9380069a..7c84762cb59e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -77,6 +77,14 @@ struct sock_reuseport; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ +#define BPF_ALU_REG(CLASS, OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = CLASS | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + #define BPF_ALU64_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ @@ -123,6 +131,14 @@ struct sock_reuseport; /* Short form of mov, dst_reg = src_reg */ +#define BPF_MOV_REG(CLASS, DST, SRC) \ + ((struct bpf_insn) { \ + .code = CLASS | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + #define BPF_MOV64_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ @@ -157,6 +173,14 @@ struct sock_reuseport; .off = 0, \ .imm = IMM }) +#define BPF_RAW_REG(insn, DST, SRC) \ + ((struct bpf_insn) { \ + .code = (insn).code, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = (insn).off, \ + .imm = (insn).imm }) + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2bf83305e5ab..a346ecfe6241 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6177,28 +6177,28 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; struct bpf_insn mask_and_div[] = { - BPF_MOV32_REG(insn->src_reg, insn->src_reg), + BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg), /* Rx div 0 -> 0 */ - BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2), - BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 2), + BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - *insn, + BPF_ALU_REG(BPF_CLASS(insn->code), BPF_XOR, insn->dst_reg, insn->dst_reg), }; struct bpf_insn mask_and_mod[] = { - BPF_MOV32_REG(insn->src_reg, insn->src_reg), + BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg), /* Rx mod 0 -> Rx */ - BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1), - *insn, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 1), + BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX), }; struct bpf_insn *patchlet; if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { - patchlet = mask_and_div + (is64 ? 1 : 0); - cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0); + patchlet = mask_and_div; + cnt = ARRAY_SIZE(mask_and_div); } else { - patchlet = mask_and_mod + (is64 ? 1 : 0); - cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0); + patchlet = mask_and_mod; + cnt = ARRAY_SIZE(mask_and_mod); } new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); From 39f74b7c81cca139c05757d9c8f9d1e35fbbf56b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Aug 2021 10:55:33 -0300 Subject: [PATCH 04/34] bpf: Fix truncation handling for mod32 dst reg wrt zero Commit 9b00f1b78809309163dda2d044d9e94a3c0248a3 upstream. Recently noticed that when mod32 with a known src reg of 0 is performed, then the dst register is 32-bit truncated in verifier: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (b7) r0 = 0 1: R0_w=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (b7) r1 = -1 2: R0_w=inv0 R1_w=inv-1 R10=fp0 2: (b4) w2 = -1 3: R0_w=inv0 R1_w=inv-1 R2_w=inv4294967295 R10=fp0 3: (9c) w1 %= w0 4: R0_w=inv0 R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2_w=inv4294967295 R10=fp0 4: (b7) r0 = 1 5: R0_w=inv1 R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2_w=inv4294967295 R10=fp0 5: (1d) if r1 == r2 goto pc+1 R0_w=inv1 R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2_w=inv4294967295 R10=fp0 6: R0_w=inv1 R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2_w=inv4294967295 R10=fp0 6: (b7) r0 = 2 7: R0_w=inv2 R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2_w=inv4294967295 R10=fp0 7: (95) exit 7: R0=inv1 R1=inv(id=0,umin_value=4294967295,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv4294967295 R10=fp0 7: (95) exit However, as a runtime result, we get 2 instead of 1, meaning the dst register does not contain (u32)-1 in this case. The reason is fairly straight forward given the 0 test leaves the dst register as-is: # ./bpftool p d x i 23 0: (b7) r0 = 0 1: (b7) r1 = -1 2: (b4) w2 = -1 3: (16) if w0 == 0x0 goto pc+1 4: (9c) w1 %= w0 5: (b7) r0 = 1 6: (1d) if r1 == r2 goto pc+1 7: (b7) r0 = 2 8: (95) exit This was originally not an issue given the dst register was marked as completely unknown (aka 64 bit unknown). However, after 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification") the verifier casts the register output to 32 bit, and hence it becomes 32 bit unknown. Note that for the case where the src register is unknown, the dst register is marked 64 bit unknown. After the fix, the register is truncated by the runtime and the test passes: # ./bpftool p d x i 23 0: (b7) r0 = 0 1: (b7) r1 = -1 2: (b4) w2 = -1 3: (16) if w0 == 0x0 goto pc+2 4: (9c) w1 %= w0 5: (05) goto pc+1 6: (bc) w1 = w1 7: (b7) r0 = 1 8: (1d) if r1 == r2 goto pc+1 9: (b7) r0 = 2 10: (95) exit Semantics also match with {R,W}x mod{64,32} 0 -> {R,W}x. Invalid div has always been {R,W}x div{64,32} 0 -> 0. Rewrites are as follows: mod32: mod64: (16) if w0 == 0x0 goto pc+2 (15) if r0 == 0x0 goto pc+1 (9c) w1 %= w0 (9f) r1 %= r0 (05) goto pc+1 (bc) w1 = w1 Fixes: 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification") Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend [Salvatore Bonaccorso: This is an earlier version based on work by Daniel and John which does not rely on availability of the BPF_JMP32 instruction class. This means it is not even strictly a backport of the upstream commit mentioned but based on Daniel's and John's work to address the issue and was finalized by Thadeu Lima de Souza Cascardo.] Tested-by: Salvatore Bonaccorso Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a346ecfe6241..abdc9eca463c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6178,7 +6178,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; struct bpf_insn mask_and_div[] = { BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg), - /* Rx div 0 -> 0 */ + /* [R,W]x div 0 -> 0 */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 2), BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX), BPF_JMP_IMM(BPF_JA, 0, 0, 1), @@ -6186,9 +6186,10 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) }; struct bpf_insn mask_and_mod[] = { BPF_MOV_REG(BPF_CLASS(insn->code), BPF_REG_AX, insn->src_reg), - /* Rx mod 0 -> Rx */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, 1 + (is64 ? 0 : 1)), BPF_RAW_REG(*insn, insn->dst_reg, BPF_REG_AX), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; struct bpf_insn *patchlet; @@ -6198,7 +6199,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) cnt = ARRAY_SIZE(mask_and_div); } else { patchlet = mask_and_mod; - cnt = ARRAY_SIZE(mask_and_mod); + cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 2 : 0); } new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); From 7027119349fe919c39a3f75de1f8d77fe6cb1a98 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Jul 2021 07:50:33 -0700 Subject: [PATCH 05/34] ARC: Fix CONFIG_STACKDEPOT [ Upstream commit bf79167fd86f3b97390fe2e70231d383526bd9cc ] Enabling CONFIG_STACKDEPOT results in the following build error. arc-elf-ld: lib/stackdepot.o: in function `filter_irq_stacks': stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' Other architectures address this problem by adding IRQENTRY_TEXT and SOFTIRQENTRY_TEXT to the text segment, so do the same here. Signed-off-by: Guenter Roeck Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index f35ed578e007..4d823d3f65bb 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -92,6 +92,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } From dafc95a1e473a0b857af34ecbb17b8b1c90edd75 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Jul 2021 00:29:19 +0200 Subject: [PATCH 06/34] netfilter: conntrack: collect all entries in one cycle [ Upstream commit 4608fdfc07e116f9fc0895beb40abad7cdb5ee3d ] Michal Kubecek reports that conntrack gc is responsible for frequent wakeups (every 125ms) on idle systems. On busy systems, timed out entries are evicted during lookup. The gc worker is only needed to remove entries after system becomes idle after a busy period. To resolve this, always scan the entire table. If the scan is taking too long, reschedule so other work_structs can run and resume from next bucket. After a completed scan, wait for 2 minutes before the next cycle. Heuristics for faster re-schedule are removed. GC_SCAN_INTERVAL could be exposed as a sysctl in the future to allow tuning this as-needed or even turn the gc worker off. Reported-by: Michal Kubecek Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 71 ++++++++++--------------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c5590d36b775..a38caf317dbb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -70,10 +70,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; - u32 last_bucket; + u32 next_bucket; bool exiting; bool early_drop; - long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; @@ -81,12 +80,8 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; -/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 128u -/* upper bound of full table scan */ -#define GC_MAX_SCAN_JIFFIES (16u * HZ) -/* desired ratio of entries found to be expired */ -#define GC_EVICT_RATIO 50u +#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) static struct conntrack_gc_work conntrack_gc_work; @@ -1198,17 +1193,13 @@ static void nf_ct_offload_timeout(struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); - unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned int nf_conntrack_max95 = 0; + unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; + unsigned int i, hashsz, nf_conntrack_max95 = 0; + unsigned long next_run = GC_SCAN_INTERVAL; struct conntrack_gc_work *gc_work; - unsigned int ratio, scanned = 0; - unsigned long next_run; - gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; - i = gc_work->last_bucket; + i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; @@ -1216,22 +1207,21 @@ static void gc_worker(struct work_struct *work) struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int hashsz; struct nf_conn *tmp; - i++; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); - if (i >= hashsz) - i = 0; + if (i >= hashsz) { + rcu_read_unlock(); + break; + } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); - scanned++; if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { nf_ct_offload_timeout(tmp); continue; @@ -1239,7 +1229,6 @@ static void gc_worker(struct work_struct *work) if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); - expired_count++; continue; } @@ -1271,7 +1260,14 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched(); - } while (++buckets < goal); + i++; + + if (time_after(jiffies, end_time) && i < hashsz) { + gc_work->next_bucket = i; + next_run = 0; + break; + } + } while (i < hashsz); if (gc_work->exiting) return; @@ -1282,40 +1278,17 @@ static void gc_worker(struct work_struct *work) * * This worker is only here to reap expired entries when system went * idle after a busy period. - * - * The heuristics below are supposed to balance conflicting goals: - * - * 1. Minimize time until we notice a stale entry - * 2. Maximize scan intervals to not waste cycles - * - * Normally, expire ratio will be close to 0. - * - * As soon as a sizeable fraction of the entries have expired - * increase scan frequency. */ - ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio > GC_EVICT_RATIO) { - gc_work->next_gc_run = min_interval; - } else { - unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - - BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); - - gc_work->next_gc_run += min_interval; - if (gc_work->next_gc_run > max) - gc_work->next_gc_run = max; + if (next_run) { + gc_work->early_drop = false; + gc_work->next_bucket = 0; } - - next_run = gc_work->next_gc_run; - gc_work->last_bucket = i; - gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = HZ; gc_work->exiting = false; } From 7014a0479d4213b8e6466c4a54c04fddd79155a0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 6 Aug 2021 16:21:24 +0800 Subject: [PATCH 07/34] once: Fix panic when module unload [ Upstream commit 1027b96ec9d34f9abab69bc1a4dc5b1ad8ab1349 ] DO_ONCE DEFINE_STATIC_KEY_TRUE(___once_key); __do_once_done once_disable_jump(once_key); INIT_WORK(&w->work, once_deferred); struct once_work *w; w->key = key; schedule_work(&w->work); module unload //*the key is destroy* process_one_work once_deferred BUG_ON(!static_key_enabled(work->key)); static_key_count((struct static_key *)x) //*access key, crash* When module uses DO_ONCE mechanism, it could crash due to the above concurrency problem, we could reproduce it with link[1]. Fix it by add/put module refcount in the once work process. [1] https://lore.kernel.org/netdev/eaa6c371-465e-57eb-6be9-f4b16b9d7cbf@huawei.com/ Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Cc: David S. Miller Cc: Eric Dumazet Reported-by: Minmin chen Signed-off-by: Kefeng Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/once.h | 4 ++-- lib/once.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/once.h b/include/linux/once.h index 9225ee6d96c7..ae6f4eb41cbe 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -7,7 +7,7 @@ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags); + unsigned long *flags, struct module *mod); /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only @@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, if (unlikely(___ret)) { \ func(__VA_ARGS__); \ __do_once_done(&___done, &___once_key, \ - &___flags); \ + &___flags, THIS_MODULE); \ } \ } \ ___ret; \ diff --git a/lib/once.c b/lib/once.c index 8b7d6235217e..59149bf3bfb4 100644 --- a/lib/once.c +++ b/lib/once.c @@ -3,10 +3,12 @@ #include #include #include +#include struct once_work { struct work_struct work; struct static_key_true *key; + struct module *module; }; static void once_deferred(struct work_struct *w) @@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); static_branch_disable(work->key); + module_put(work->module); kfree(work); } -static void once_disable_jump(struct static_key_true *key) +static void once_disable_jump(struct static_key_true *key, struct module *mod) { struct once_work *w; @@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key) INIT_WORK(&w->work, once_deferred); w->key = key; + w->module = mod; + __module_get(mod); schedule_work(&w->work); } @@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags) EXPORT_SYMBOL(__do_once_start); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags) + unsigned long *flags, struct module *mod) __releases(once_lock) { *done = true; spin_unlock_irqrestore(&once_lock, *flags); - once_disable_jump(once_key); + once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); From e5cc2285c6e7969d62e2bf1173ee8e1d4854a41b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=A4tje?= Date: Wed, 25 Aug 2021 23:52:27 +0200 Subject: [PATCH 08/34] can: usb: esd_usb2: esd_usb2_rx_event(): fix the interchange of the CAN RX and TX error counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 044012b52029204900af9e4230263418427f4ba4 upstream. This patch fixes the interchanged fetch of the CAN RX and TX error counters from the ESD_EV_CAN_ERROR_EXT message. The RX error counter is really in struct rx_msg::data[2] and the TX error counter is in struct rx_msg::data[3]. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Link: https://lore.kernel.org/r/20210825215227.4947-2-stefan.maetje@esd.eu Cc: stable@vger.kernel.org Signed-off-by: Stefan Mätje Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/esd_usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index e95358269525..d4e6b40f0ed4 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -236,8 +236,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, if (id == ESD_EV_CAN_ERROR_EXT) { u8 state = msg->msg.rx.data[0]; u8 ecc = msg->msg.rx.data[1]; - u8 txerr = msg->msg.rx.data[2]; - u8 rxerr = msg->msg.rx.data[3]; + u8 rxerr = msg->msg.rx.data[2]; + u8 txerr = msg->msg.rx.data[3]; skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { From 56c653382239adab6a1b7d4a23ad2e3d5fe2885b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 24 Aug 2021 14:19:26 +0200 Subject: [PATCH 09/34] Revert "USB: serial: ch341: fix character loss at high transfer rates" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df7b16d1c00ecb3da3a30c999cdb39f273c99a2f upstream. This reverts commit 3c18e9baee0ef97510dcda78c82285f52626764b. These devices do not appear to send a zero-length packet when the transfer size is a multiple of the bulk-endpoint max-packet size. This means that incoming data may not be processed by the driver until a short packet is received or the receive buffer is full. Revert back to using endpoint-sized receive buffers to avoid stalled reads. Reported-by: Paul Größel Link: https://bugzilla.kernel.org/show_bug.cgi?id=214131 Fixes: 3c18e9baee0e ("USB: serial: ch341: fix character loss at high transfer rates") Cc: stable@vger.kernel.org Cc: Willy Tarreau Link: https://lore.kernel.org/r/20210824121926.19311-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index e6dce35ca1aa..c87cb25e70ec 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -625,7 +625,6 @@ static struct usb_serial_driver ch341_device = { .owner = THIS_MODULE, .name = "ch341-uart", }, - .bulk_in_size = 512, .id_table = id_table, .num_ports = 1, .open = ch341_open, From af1305414b85b432096c3b6559200c45749cb8fd Mon Sep 17 00:00:00 2001 From: Zhengjun Zhang Date: Mon, 9 Aug 2021 21:35:53 +0800 Subject: [PATCH 10/34] USB: serial: option: add new VID/PID to support Fibocom FG150 commit 2829a4e3cf3a6ac2fa3cdb681b37574630fb9c1a upstream. Fibocom FG150 is a 5G module based on Qualcomm SDX55 platform, support Sub-6G band. Here are the outputs of lsusb -v and usb-devices: > T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 > D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 > P: Vendor=2cb7 ProdID=010b Rev=04.14 > S: Manufacturer=Fibocom > S: Product=Fibocom Modem_SN:XXXXXXXX > S: SerialNumber=XXXXXXXX > C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA > I: If#=0x0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host > I: If#=0x1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host > I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) > I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) > I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) > Bus 002 Device 002: ID 2cb7:010b Fibocom Fibocom Modem_SN:XXXXXXXX > Device Descriptor: > bLength 18 > bDescriptorType 1 > bcdUSB 3.20 > bDeviceClass 0 > bDeviceSubClass 0 > bDeviceProtocol 0 > bMaxPacketSize0 9 > idVendor 0x2cb7 Fibocom > idProduct 0x010b > bcdDevice 4.14 > iManufacturer 1 Fibocom > iProduct 2 Fibocom Modem_SN:XXXXXXXX > iSerial 3 XXXXXXXX > bNumConfigurations 1 > Configuration Descriptor: > bLength 9 > bDescriptorType 2 > wTotalLength 0x00e6 > bNumInterfaces 5 > bConfigurationValue 1 > iConfiguration 4 RNDIS_DUN_DIAG_ADB > bmAttributes 0xa0 > (Bus Powered) > Remote Wakeup > MaxPower 896mA > Interface Association: > bLength 8 > bDescriptorType 11 > bFirstInterface 0 > bInterfaceCount 2 > bFunctionClass 239 Miscellaneous Device > bFunctionSubClass 4 > bFunctionProtocol 1 > iFunction 7 RNDIS > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 0 > bAlternateSetting 0 > bNumEndpoints 1 > bInterfaceClass 239 Miscellaneous Device > bInterfaceSubClass 4 > bInterfaceProtocol 1 > iInterface 0 > ** UNRECOGNIZED: 05 24 00 10 01 > ** UNRECOGNIZED: 05 24 01 00 01 > ** UNRECOGNIZED: 04 24 02 00 > ** UNRECOGNIZED: 05 24 06 00 01 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x81 EP 1 IN > bmAttributes 3 > Transfer Type Interrupt > Synch Type None > Usage Type Data > wMaxPacketSize 0x0008 1x 8 bytes > bInterval 9 > bMaxBurst 0 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 1 > bAlternateSetting 0 > bNumEndpoints 2 > bInterfaceClass 10 CDC Data > bInterfaceSubClass 0 > bInterfaceProtocol 0 > iInterface 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x8e EP 14 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 6 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x0f EP 15 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 6 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 2 > bAlternateSetting 0 > bNumEndpoints 3 > bInterfaceClass 255 Vendor Specific Class > bInterfaceSubClass 0 > bInterfaceProtocol 0 > iInterface 0 > ** UNRECOGNIZED: 05 24 00 10 01 > ** UNRECOGNIZED: 05 24 01 00 00 > ** UNRECOGNIZED: 04 24 02 02 > ** UNRECOGNIZED: 05 24 06 00 00 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x83 EP 3 IN > bmAttributes 3 > Transfer Type Interrupt > Synch Type None > Usage Type Data > wMaxPacketSize 0x000a 1x 10 bytes > bInterval 9 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x82 EP 2 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x01 EP 1 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 3 > bAlternateSetting 0 > bNumEndpoints 2 > bInterfaceClass 255 Vendor Specific Class > bInterfaceSubClass 255 Vendor Specific Subclass > bInterfaceProtocol 48 > iInterface 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x84 EP 4 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x02 EP 2 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 4 > bAlternateSetting 0 > bNumEndpoints 2 > bInterfaceClass 255 Vendor Specific Class > bInterfaceSubClass 66 > bInterfaceProtocol 1 > iInterface 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x03 EP 3 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x85 EP 5 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Binary Object Store Descriptor: > bLength 5 > bDescriptorType 15 > wTotalLength 0x0016 > bNumDeviceCaps 2 > USB 2.0 Extension Device Capability: > bLength 7 > bDescriptorType 16 > bDevCapabilityType 2 > bmAttributes 0x00000006 > BESL Link Power Management (LPM) Supported > SuperSpeed USB Device Capability: > bLength 10 > bDescriptorType 16 > bDevCapabilityType 3 > bmAttributes 0x00 > wSpeedsSupported 0x000f > Device can operate at Low Speed (1Mbps) > Device can operate at Full Speed (12Mbps) > Device can operate at High Speed (480Mbps) > Device can operate at SuperSpeed (5Gbps) > bFunctionalitySupport 1 > Lowest fully-functional device speed is Full Speed (12Mbps) > bU1DevExitLat 1 micro seconds > bU2DevExitLat 500 micro seconds > Device Status: 0x0000 > (Bus Powered) Signed-off-by: Zhengjun Zhang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e6103a27e440..d08b799c91fc 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2074,6 +2074,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ From 8fc75a3c1de74e892526a38477ae1bfd277d4b3d Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 19 Aug 2021 03:17:03 +0200 Subject: [PATCH 11/34] usb: dwc3: gadget: Fix dwc3_calc_trbs_left() commit 51f1954ad853d01ba4dc2b35dee14d8490ee05a1 upstream. We can't depend on the TRB's HWO bit to determine if the TRB ring is "full". A TRB is only available when the driver had processed it, not when the controller consumed and relinquished the TRB's ownership to the driver. Otherwise, the driver may overwrite unprocessed TRBs. This can happen when many transfer events accumulate and the system is slow to process them and/or when there are too many small requests. If a request is in the started_list, that means there is one or more unprocessed TRBs remained. Check this instead of the TRB's HWO bit whether the TRB ring is full. Fixes: c4233573f6ee ("usb: dwc3: gadget: prepare TRBs on update transfers too") Cc: Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/e91e975affb0d0d02770686afc3a5b9eb84409f6.1629335416.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c93bed41d988..e63bff91aba8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -894,19 +894,19 @@ static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index) static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) { - struct dwc3_trb *tmp; u8 trbs_left; /* - * If enqueue & dequeue are equal than it is either full or empty. - * - * One way to know for sure is if the TRB right before us has HWO bit - * set or not. If it has, then we're definitely full and can't fit any - * more transfers in our ring. + * If the enqueue & dequeue are equal then the TRB ring is either full + * or empty. It's considered full when there are DWC3_TRB_NUM-1 of TRBs + * pending to be processed by the driver. */ if (dep->trb_enqueue == dep->trb_dequeue) { - tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue); - if (tmp->ctrl & DWC3_TRB_CTRL_HWO) + /* + * If there is any request remained in the started_list at + * this point, that means there is no TRB available. + */ + if (!list_empty(&dep->started_list)) return 0; return DWC3_TRB_NUM - 1; From c9d60dd3c147c49b3d8eba9aa53ac3b6c24bc161 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 24 Aug 2021 21:28:55 -0700 Subject: [PATCH 12/34] usb: dwc3: gadget: Stop EP0 transfers during pullup disable commit 4a1e25c0a029b97ea4a3d423a6392bfacc3b2e39 upstream. During a USB cable disconnect, or soft disconnect scenario, a pending SETUP transaction may not be completed, leading to the following error: dwc3 a600000.dwc3: timed out waiting for SETUP phase If this occurs, then the entire pullup disable routine is skipped and proper cleanup and halting of the controller does not complete. Instead of returning an error (which is ignored from the UDC perspective), allow the pullup disable routine to continue, which will also handle disabling of EP0/1. This will end any active transfers as well. Ensure to clear any delayed_status also, as the timeout could happen within the STATUS stage. Fixes: bb0147364850 ("usb: dwc3: gadget: don't clear RUN/STOP when it's invalid to do so") Cc: Reviewed-by: Thinh Nguyen Acked-by: Felipe Balbi Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20210825042855.7977-1-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index e63bff91aba8..d61b7aa5d8e5 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1805,10 +1805,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) ret = wait_for_completion_timeout(&dwc->ep0_in_setup, msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); - if (ret == 0) { - dev_err(dwc->dev, "timed out waiting for SETUP phase\n"); - return -ETIMEDOUT; - } + if (ret == 0) + dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); } spin_lock_irqsave(&dwc->lock, flags); @@ -1946,6 +1944,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; dwc->link_state = DWC3_LINK_STATE_SS_DIS; + dwc->delayed_status = false; dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); From c1cec00baa5cd00203094635fdb874426a46b952 Mon Sep 17 00:00:00 2001 From: Tuo Li Date: Fri, 6 Aug 2021 06:30:29 -0700 Subject: [PATCH 13/34] IB/hfi1: Fix possible null-pointer dereference in _extend_sdma_tx_descs() [ Upstream commit cbe71c61992c38f72c2b625b2ef25916b9f0d060 ] kmalloc_array() is called to allocate memory for tx->descp. If it fails, the function __sdma_txclean() is called: __sdma_txclean(dd, tx); However, in the function __sdma_txclean(), tx-descp is dereferenced if tx->num_desc is not zero: sdma_unmap_desc(dd, &tx->descp[0]); To fix this possible null-pointer dereference, assign the return value of kmalloc_array() to a local variable descp, and then assign it to tx->descp if it is not NULL. Otherwise, go to enomem. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://lore.kernel.org/r/20210806133029.194964-1-islituo@gmail.com Reported-by: TOTE Robot Signed-off-by: Tuo Li Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/sdma.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 291c12f588b5..38258de75a94 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3055,6 +3055,7 @@ static void __sdma_process_event(struct sdma_engine *sde, static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int i; + struct sdma_desc *descp; /* Handle last descriptor */ if (unlikely((tx->num_desc == (MAX_DESC - 1)))) { @@ -3075,12 +3076,10 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if (unlikely(tx->num_desc == MAX_DESC)) goto enomem; - tx->descp = kmalloc_array( - MAX_DESC, - sizeof(struct sdma_desc), - GFP_ATOMIC); - if (!tx->descp) + descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC); + if (!descp) goto enomem; + tx->descp = descp; /* reserve last descriptor for coalescing */ tx->desc_limit = MAX_DESC - 1; From aa3cb20d13acc8c314249199bb69ce6c69d8c9d0 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 4 Jul 2021 10:11:41 +0300 Subject: [PATCH 14/34] e1000e: Fix the max snoop/no-snoop latency for 10M [ Upstream commit 44a13a5d99c71bf9e1676d9e51679daf4d7b3d73 ] We should decode the latency and the max_latency before directly compare. The latency should be presented as lat_enc = scale x value: lat_enc_d = (lat_enc & 0x0x3ff) x (1U << (5*((max_ltr_enc & 0x1c00) >> 10))) Fixes: cf8fb73c23aa ("e1000e: add support for LTR on I217/I218") Suggested-by: Yee Li Signed-off-by: Sasha Neftin Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 14 +++++++++++++- drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 +++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 7998a73b6a0f..fbad77450725 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -995,6 +995,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) { u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; + u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ + u16 lat_enc_d = 0; /* latency decoded */ u16 lat_enc = 0; /* latency encoded */ if (link) { @@ -1048,7 +1050,17 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop); max_ltr_enc = max_t(u16, max_snoop, max_nosnoop); - if (lat_enc > max_ltr_enc) + lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * + ((lat_enc & E1000_LTRV_SCALE_MASK) + >> E1000_LTRV_SCALE_SHIFT))); + + max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * + ((max_ltr_enc & E1000_LTRV_SCALE_MASK) + >> E1000_LTRV_SCALE_SHIFT))); + + if (lat_enc_d > max_ltr_enc_d) lat_enc = max_ltr_enc; } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 1502895eb45d..e757896287eb 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -274,8 +274,11 @@ /* Latency Tolerance Reporting */ #define E1000_LTRV 0x000F8 +#define E1000_LTRV_VALUE_MASK 0x000003FF #define E1000_LTRV_SCALE_MAX 5 #define E1000_LTRV_SCALE_FACTOR 5 +#define E1000_LTRV_SCALE_SHIFT 10 +#define E1000_LTRV_SCALE_MASK 0x00001C00 #define E1000_LTRV_REQ_SHIFT 15 #define E1000_LTRV_NOSNOOP_SHIFT 16 #define E1000_LTRV_SEND (1 << 30) From c33471daf2763c5aee2b7926202c74b75c365119 Mon Sep 17 00:00:00 2001 From: Shreyansh Chouhan Date: Sat, 21 Aug 2021 12:44:24 +0530 Subject: [PATCH 15/34] ip_gre: add validation for csum_start [ Upstream commit 1d011c4803c72f3907eccfc1ec63caefb852fcbf ] Validate csum_start in gre_handle_offloads before we call _gre_xmit so that we do not crash later when the csum_start value is used in the lco_csum function call. This patch deals with ipv4 code. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: syzbot+ff8e1b9f2f36481e2efc@syzkaller.appspotmail.com Signed-off-by: Shreyansh Chouhan Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index de6f89511a21..a8a37d112820 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -449,6 +449,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, static int gre_handle_offloads(struct sk_buff *skb, bool csum) { + if (csum && skb_checksum_start(skb) < skb->data) + return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } From 24d34768233fba8613064c93234774c5c97e0f50 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Aug 2021 09:35:23 +0200 Subject: [PATCH 16/34] xgene-v2: Fix a resource leak in the error handling path of 'xge_probe()' [ Upstream commit 5ed74b03eb4d08f5dd281dcb5f1c9bb92b363a8d ] A successful 'xge_mdio_config()' call should be balanced by a corresponding 'xge_mdio_remove()' call in the error handling path of the probe, as already done in the remove function. Update the error handling path accordingly. Fixes: ea8ab16ab225 ("drivers: net: xgene-v2: Add MDIO support") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/apm/xgene-v2/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 0f2ad50f3bd7..7f37e7cb687e 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -691,11 +691,13 @@ static int xge_probe(struct platform_device *pdev) ret = register_netdev(ndev); if (ret) { netdev_err(ndev, "Failed to register netdev\n"); - goto err; + goto err_mdio_remove; } return 0; +err_mdio_remove: + xge_mdio_remove(ndev); err: free_netdev(ndev); From 3acf84b8daaf1a1225b8fd43687ca1e6675c647d Mon Sep 17 00:00:00 2001 From: Maxim Kiselev Date: Fri, 20 Aug 2021 18:39:51 +0300 Subject: [PATCH 17/34] net: marvell: fix MVNETA_TX_IN_PRGRS bit number [ Upstream commit 359f4cdd7d78fdf8c098713b05fee950a730f131 ] According to Armada XP datasheet bit at 0 position is corresponding for TxInProg indication. Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Maxim Kiselev Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index fda5dd8c71eb..382d010e1294 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -100,7 +100,7 @@ #define MVNETA_DESC_SWAP BIT(6) #define MVNETA_TX_BRST_SZ_MASK(burst) ((burst) << 22) #define MVNETA_PORT_STATUS 0x2444 -#define MVNETA_TX_IN_PRGRS BIT(1) +#define MVNETA_TX_IN_PRGRS BIT(0) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c #define MVNETA_SERDES_CFG 0x24A0 From 3c2795525eb2540f10c0425329ef54b8c746170c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 26 Aug 2021 19:22:01 +0800 Subject: [PATCH 18/34] net: hns3: fix get wrong pfc_en when query PFC configuration [ Upstream commit 8c1671e0d13d4a0ba4fb3a0da932bf3736d7ff73 ] Currently, when query PFC configuration by dcbtool, driver will return PFC enable status based on TC. As all priorities are mapped to TC0 by default, if TC0 is enabled, then all priorities mapped to TC0 will be shown as enabled status when query PFC setting, even though some priorities have never been set. for example: $ dcb pfc show dev eth0 pfc-cap 4 macsec-bypass off delay 0 prio-pfc 0:off 1:off 2:off 3:off 4:off 5:off 6:off 7:off $ dcb pfc set dev eth0 prio-pfc 0:on 1:on 2:on 3:on $ dcb pfc show dev eth0 pfc-cap 4 macsec-bypass off delay 0 prio-pfc 0:on 1:on 2:on 3:on 4:on 5:on 6:on 7:on To fix this problem, just returns user's PFC config parameter saved in driver. Fixes: cacde272dd00 ("net: hns3: Add hclge_dcb module for the support of DCB feature") Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index a75d7c826fc2..dd935cd1fb44 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -204,21 +204,12 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC]; struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; - u8 i, j, pfc_map, *prio_tc; int ret; + u8 i; memset(pfc, 0, sizeof(*pfc)); pfc->pfc_cap = hdev->pfc_max; - prio_tc = hdev->tm_info.prio_tc; - pfc_map = hdev->tm_info.hw_pfc_map; - - /* Pfc setting is based on TC */ - for (i = 0; i < hdev->tm_info.num_tc; i++) { - for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { - if ((prio_tc[j] == i) && (pfc_map & BIT(i))) - pfc->pfc_en |= BIT(j); - } - } + pfc->pfc_en = hdev->tm_info.pfc_en; ret = hclge_pfc_tx_stats_get(hdev, requests); if (ret) From 6a600523e323e58dadc6322e21270d4ba8757685 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 27 Aug 2021 11:29:27 +0200 Subject: [PATCH 19/34] usb: gadget: u_audio: fix race condition on endpoint stop [ Upstream commit 068fdad20454f815e61e6f6eb9f051a8b3120e88 ] If the endpoint completion callback is call right after the ep_enabled flag is cleared and before usb_ep_dequeue() is call, we could do a double free on the request and the associated buffer. Fix this by clearing ep_enabled after all the endpoint requests have been dequeued. Fixes: 7de8681be2cd ("usb: gadget: u_audio: Free requests only after callback") Cc: stable Reported-by: Thinh Nguyen Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20210827092927.366482-1-jbrunet@baylibre.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_audio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 0cb0c638fd13..168303f21bf4 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -349,8 +349,6 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) if (!prm->ep_enabled) return; - prm->ep_enabled = false; - audio_dev = uac->audio_dev; params = &audio_dev->params; @@ -368,11 +366,12 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) } } + prm->ep_enabled = false; + if (usb_ep_disable(ep)) dev_err(uac->card->dev, "%s:%d Error!\n", __func__, __LINE__); } - int u_audio_start_capture(struct g_audio *audio_dev) { struct snd_uac_chip *uac = audio_dev->uac; From 5da47bf1612fbf2ce350b38fdf13ce49e8805ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 26 Jul 2021 10:30:56 +0200 Subject: [PATCH 20/34] opp: remove WARN when no valid OPPs remain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 335ffab3ef864539e814b9a2903b0ae420c1c067 ] This WARN can be triggered per-core and the stack trace is not useful. Replace it with plain dev_err(). Fix a comment while at it. Signed-off-by: Michał Mirosław Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/opp/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index d64a13d7881b..a53123356697 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -423,8 +423,9 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) } } - /* There should be one of more OPP defined */ - if (WARN_ON(!count)) { + /* There should be one or more OPPs defined */ + if (!count) { + dev_err(dev, "%s: no supported OPPs", __func__); ret = -ENOENT; goto put_opp_table; } From ae5e7146b541116efe5a2afd079f9df529a95cd2 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:45 +0300 Subject: [PATCH 21/34] virtio: Improve vq->broken access to avoid any compiler optimization [ Upstream commit 60f0779862e4ab943810187752c462e85f5fa371 ] Currently vq->broken field is read by virtqueue_is_broken() in busy loop in one context by virtnet_send_command(). vq->broken is set to true in other process context by virtio_break_device(). Reader and writer are accessing it without any synchronization. This may lead to a compiler optimization which may result to optimize reading vq->broken only once. Hence, force reading vq->broken on each invocation of virtqueue_is_broken() and also force writing it so that such update is visible to the readers. It is a theoretical fix that isn't yet encountered in the field. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_ring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index df7980aef927..0cc0cfd3a3cb 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1197,7 +1197,7 @@ bool virtqueue_is_broken(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - return vq->broken; + return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); @@ -1211,7 +1211,9 @@ void virtio_break_device(struct virtio_device *dev) list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); - vq->broken = true; + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); } } EXPORT_SYMBOL_GPL(virtio_break_device); From 68208dc42dd906fe626224000d85e9513dbe5199 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:48 +0300 Subject: [PATCH 22/34] virtio_pci: Support surprise removal of virtio pci device [ Upstream commit 43bb40c5b92659966bdf4bfe584fde0a3575a049 ] When a virtio pci device undergo surprise removal (aka async removal in PCIe spec), mark the device as broken so that any upper layer drivers can abort any outstanding operation. When a virtio net pci device undergo surprise removal which is used by a NetworkManager, a below call trace was observed. kernel:watchdog: BUG: soft lockup - CPU#1 stuck for 26s! [kworker/1:1:27059] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kworker/1:1:27059] CPU: 1 PID: 27059 Comm: kworker/1:1 Tainted: G S W I L 5.13.0-hotplug+ #8 Hardware name: Dell Inc. PowerEdge R640/0H28RR, BIOS 2.9.4 11/06/2020 Workqueue: events linkwatch_event RIP: 0010:virtnet_send_command+0xfc/0x150 [virtio_net] Call Trace: virtnet_set_rx_mode+0xcf/0x2a7 [virtio_net] ? __hw_addr_create_ex+0x85/0xc0 __dev_mc_add+0x72/0x80 igmp6_group_added+0xa7/0xd0 ipv6_mc_up+0x3c/0x60 ipv6_find_idev+0x36/0x80 addrconf_add_dev+0x1e/0xa0 addrconf_dev_config+0x71/0x130 addrconf_notify+0x1f5/0xb40 ? rtnl_is_locked+0x11/0x20 ? __switch_to_asm+0x42/0x70 ? finish_task_switch+0xaf/0x2c0 ? raw_notifier_call_chain+0x3e/0x50 raw_notifier_call_chain+0x3e/0x50 netdev_state_change+0x67/0x90 linkwatch_do_dev+0x3c/0x50 __linkwatch_run_queue+0xd2/0x220 linkwatch_event+0x21/0x30 process_one_work+0x1c8/0x370 worker_thread+0x30/0x380 ? process_one_work+0x370/0x370 kthread+0x118/0x140 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Hence, add the ability to abort the command on surprise removal which prevents infinite loop and system lockup. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-5-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_pci_common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 45b04bc91f24..b7cc63f556ee 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -579,6 +579,13 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); + /* + * Device is marked broken on surprise removal so that virtio upper + * layers can abort any ongoing operation. + */ + if (!pci_device_is_present(pci_dev)) + virtio_break_device(&vp_dev->vdev); + pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); From 691add905f329ffdf5940d7e1b9e00c8bc89a8b8 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 25 Jun 2021 08:55:02 +0530 Subject: [PATCH 23/34] vringh: Use wiov->used to check for read/write desc order [ Upstream commit e74cfa91f42c50f7f649b0eca46aa049754ccdbd ] As __vringh_iov() traverses a descriptor chain, it populates each descriptor entry into either read or write vring iov and increments that iov's ->used member. So, as we iterate over a descriptor chain, at any point, (riov/wriov)->used value gives the number of descriptor enteries available, which are to be read or written by the device. As all read iovs must precede the write iovs, wiov->used should be zero when we are traversing a read descriptor. Current code checks for wiov->i, to figure out whether any previous entry in the current descriptor chain was a write descriptor. However, iov->i is only incremented, when these vring iovs are consumed, at a later point, and remain 0 in __vringh_iov(). So, correct the check for read and write descriptor order, to use wiov->used. Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Signed-off-by: Neeraj Upadhyay Link: https://lore.kernel.org/r/1624591502-4827-1-git-send-email-neeraju@codeaurora.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vringh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 59c61744dcc1..97aa9b87e572 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -330,7 +330,7 @@ __vringh_iov(struct vringh *vrh, u16 i, iov = wiov; else { iov = riov; - if (unlikely(wiov && wiov->i)) { + if (unlikely(wiov && wiov->used)) { vringh_bad("Readable desc %p after writable", &descs[i]); err = -EINVAL; From 49d8b24e84a9b72c9a989c6d335f1bc1721b76c7 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Sun, 15 Aug 2021 14:05:08 +0300 Subject: [PATCH 24/34] qed: qed ll2 race condition fixes [ Upstream commit 37110237f31105d679fc0aa7b11cdec867750ea7 ] Avoiding qed ll2 race condition and NULL pointer dereference as part of the remove and recovery flows. Changes form V1: - Change (!p_rx->set_prod_addr). - qed_ll2.c checkpatch fixes. Change from V2: - Revert "qed_ll2.c checkpatch fixes". Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 2847509a183d..cb3569ac85f7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -354,6 +354,9 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) unsigned long flags; int rc = -EINVAL; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_tx->lock, flags); if (p_tx->b_completing_packet) { rc = -EBUSY; @@ -527,7 +530,16 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) unsigned long flags = 0; int rc = 0; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_rx->lock, flags); + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) { + spin_unlock_irqrestore(&p_rx->lock, flags); + return 0; + } + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); @@ -848,6 +860,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) return 0; @@ -871,6 +886,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; @@ -1628,6 +1646,8 @@ int qed_ll2_post_rx_buffer(void *cxt, if (!p_ll2_conn) return -EINVAL; p_rx = &p_ll2_conn->rx_queue; + if (!p_rx->set_prod_addr) + return -EIO; spin_lock_irqsave(&p_rx->lock, flags); if (!list_empty(&p_rx->free_descq)) From e7f5aefd15d9d020065f6f076e2b6e054198317a Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Sun, 15 Aug 2021 14:06:39 +0300 Subject: [PATCH 25/34] qed: Fix null-pointer dereference in qed_rdma_create_qp() [ Upstream commit d33d19d313d3466abdf8b0428be7837aff767802 ] Fix a possible null-pointer dereference in qed_rdma_create_qp(). Changes from V2: - Revert checkpatch fixes. Reported-by: TOTE Robot Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 909422d93903..3392982ff374 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1244,8 +1244,7 @@ qed_rdma_create_qp(void *rdma_cxt, if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info->active) { - DP_ERR(p_hwfn->cdev, - "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + pr_err("qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", rdma_cxt, in_params, out_params); return NULL; } From a78f93b9bba115e9c2a33529e28d4e12251e01e8 Mon Sep 17 00:00:00 2001 From: Mark Yacoub Date: Thu, 12 Aug 2021 15:49:17 -0400 Subject: [PATCH 26/34] drm: Copy drm_wait_vblank to user before returning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fa0b1ef5f7a694f48e00804a391245f3471aa155 ] [Why] Userspace should get back a copy of drm_wait_vblank that's been modified even when drm_wait_vblank_ioctl returns a failure. Rationale: drm_wait_vblank_ioctl modifies the request and expects the user to read it back. When the type is RELATIVE, it modifies it to ABSOLUTE and updates the sequence to become current_vblank_count + sequence (which was RELATIVE), but now it became ABSOLUTE. drmWaitVBlank (in libdrm) expects this to be the case as it modifies the request to be Absolute so it expects the sequence to would have been updated. The change is in compat_drm_wait_vblank, which is called by drm_compat_ioctl. This change of copying the data back regardless of the return number makes it en par with drm_ioctl, which always copies the data before returning. [How] Return from the function after everything has been copied to user. Fixes IGT:kms_flip::modeset-vs-vblank-race-interruptible Tested on ChromeOS Trogdor(msm) Reviewed-by: Michel Dänzer Signed-off-by: Mark Yacoub Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210812194917.1703356-1-markyacoub@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_ioc32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index ab8847c7dd96..87e13bcd7a67 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -855,8 +855,6 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED); - if (err) - return err; req32.reply.type = req.reply.type; req32.reply.sequence = req.reply.sequence; @@ -865,7 +863,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, if (copy_to_user(argp, &req32, sizeof(req32))) return -EFAULT; - return 0; + return err; } #if defined(CONFIG_X86) From d386a4b54607cf6f76e23815c2c9a3abc1d66882 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Aug 2021 16:40:48 +1000 Subject: [PATCH 27/34] drm/nouveau/disp: power down unused DP links during init [ Upstream commit 6eaa1f3c59a707332e921e32782ffcad49915c5e ] When booted with multiple displays attached, the EFI GOP driver on (at least) Ampere, can leave DP links powered up that aren't being used to display anything. This confuses our tracking of SOR routing, with the likely result being a failed modeset and display engine hang. Fix this by (ab?)using the DisableLT IED script to power-down the link, restoring HW to a state the driver expects. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 818d21bd28d3..1d2837c5a8f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -419,7 +419,7 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) return ret; } -static void +void nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_dp *dp = nvkm_dp(outp); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h index 495f665a0ee6..12d6ff4cfa95 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h @@ -32,6 +32,7 @@ struct nvkm_dp { int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *, struct nvkm_outp **); +void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *); /* DPCD Receiver Capabilities */ #define DPCD_RC00_DPCD_REV 0x00000 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index c62030c96fba..4b1c72fd8f03 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "outp.h" +#include "dp.h" #include "ior.h" #include @@ -216,6 +217,14 @@ nvkm_outp_init_route(struct nvkm_outp *outp) if (!ior->arm.head || ior->arm.proto != proto) { OUTP_DBG(outp, "no heads (%x %d %d)", ior->arm.head, ior->arm.proto, proto); + + /* The EFI GOP driver on Ampere can leave unused DP links routed, + * which we don't expect. The DisableLT IED script *should* get + * us back to where we need to be. + */ + if (ior->func->route.get && !ior->arm.head && outp->info.type == DCB_OUTPUT_DP) + nvkm_dp_disable(outp, ior); + return; } From b36556947ad5a184e8567fec6b7df3a6ae4206fc Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 17 Aug 2021 10:04:37 -0700 Subject: [PATCH 28/34] net/rds: dma_map_sg is entitled to merge entries [ Upstream commit fb4b1373dcab086d0619c29310f0466a0b2ceb8a ] Function "dma_map_sg" is entitled to merge adjacent entries and return a value smaller than what was passed as "nents". Subsequently "ib_map_mr_sg" needs to work with this value ("sg_dma_len") rather than the original "nents" parameter ("sg_len"). This old RDS bug was exposed and reliably causes kernel panics (using RDMA operations "rds-stress -D") on x86_64 starting with: commit c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Simply put: Linux 5.11 and later. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/60efc69f-1f35-529d-a7ef-da0549cad143@oracle.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/rds/ib_frmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 6431a023ac89..46988c009a3e 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -111,9 +111,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; /* Perform a WR for the fast_reg_mr. Each individual page From 0776c1a20babb4ad0b7ce7f2f4e0806a97663187 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Aug 2021 08:55:18 -0700 Subject: [PATCH 29/34] vt_kdsetmode: extend console locking commit 2287a51ba822384834dafc1c798453375d1107c7 upstream. As per the long-suffering comment. Reported-by: Minh Yuan Cc: Greg Kroah-Hartman Cc: Jiri Slaby Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index ce6c7dd7bc12..076b8a3f8e7a 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -484,16 +484,19 @@ int vt_ioctl(struct tty_struct *tty, ret = -EINVAL; goto out; } - /* FIXME: this needs the console lock extending */ - if (vc->vc_mode == (unsigned char) arg) + console_lock(); + if (vc->vc_mode == (unsigned char) arg) { + console_unlock(); break; + } vc->vc_mode = (unsigned char) arg; - if (console != fg_console) + if (console != fg_console) { + console_unlock(); break; + } /* * explicitly blank/unblank the screen if switching modes */ - console_lock(); if (arg == KD_TEXT) do_unblank_screen(1); else From 6be10fb6c143608a7c7ab3901a096e272233bf64 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Tue, 7 Jul 2020 15:26:03 -0400 Subject: [PATCH 30/34] fbmem: add margin check to fb_check_caps() commit a49145acfb975d921464b84fe00279f99827d816 upstream. A fb_ioctl() FBIOPUT_VSCREENINFO call with invalid xres setting or yres setting in struct fb_var_screeninfo will result in a KASAN: vmalloc-out-of-bounds failure in bitfill_aligned() as the margins are being cleared. The margins are cleared in chunks and if the xres setting or yres setting is a value of zero upto the chunk size, the failure will occur. Add a margin check to validate xres and yres settings. Signed-off-by: George Kennedy Reported-by: syzbot+e5fd3e65515b48c02a30@syzkaller.appspotmail.com Reviewed-by: Dan Carpenter Cc: Dhaval Giani Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/1594149963-13801-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbmem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 84845275dbef..de04c097d67c 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -991,6 +991,10 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) goto done; } + /* bitfill_aligned() assumes that it's at least 8x8 */ + if (var->xres < 8 || var->yres < 8) + return -EINVAL; + ret = info->fbops->fb_check_var(var, info); if (ret) From 3db3ec8f3b414fa76d3a9ae864781ebbb1709a36 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 22 Jun 2021 10:56:47 -0700 Subject: [PATCH 31/34] KVM: x86/mmu: Treat NX as used (not reserved) for all !TDP shadow MMUs commit 112022bdb5bc372e00e6e43cb88ee38ea67b97bd upstream Mark NX as being used for all non-nested shadow MMUs, as KVM will set the NX bit for huge SPTEs if the iTLB mutli-hit mitigation is enabled. Checking the mitigation itself is not sufficient as it can be toggled on at any time and KVM doesn't reset MMU contexts when that happens. KVM could reset the contexts, but that would require purging all SPTEs in all MMUs, for no real benefit. And, KVM already forces EFER.NX=1 when TDP is disabled (for WP=0, SMEP=1, NX=0), so technically NX is never reserved for shadow MMUs. Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210622175739.3610207-3-seanjc@google.com> Signed-off-by: Paolo Bonzini [sudip: use old path and adjust context] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 762baba4ecd5..0cb82172c06c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4557,7 +4557,16 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - bool uses_nx = context->nx || context->base_role.smep_andnot_wp; + /* + * KVM uses NX when TDP is disabled to handle a variety of scenarios, + * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and + * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0. + * The iTLB multi-hit workaround can be toggled at any time, so assume + * NX can be used by any non-nested shadow MMU to avoid having to reset + * MMU contexts. Note, KVM forces EFER.NX=1 when TDP is disabled. + */ + bool uses_nx = context->nx || !tdp_enabled || + context->base_role.smep_andnot_wp; struct rsvd_bits_validate *shadow_zero_check; int i; From 61b224e29ec37cbf39111270f2c7143bfe222b20 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Sat, 7 Aug 2021 10:37:02 +0300 Subject: [PATCH 32/34] Revert "floppy: reintroduce O_NDELAY fix" commit c7e9d0020361f4308a70cdfd6d5335e273eb8717 upstream. The patch breaks userspace implementations (e.g. fdutils) and introduces regressions in behaviour. Previously, it was possible to O_NDELAY open a floppy device with no media inserted or with write protected media without an error. Some userspace tools use this particular behavior for probing. It's not the first time when we revert this patch. Previous revert is in commit f2791e7eadf4 (Revert "floppy: refactor open() flags handling"). This reverts commit 8a0c014cd20516ade9654fc13b51345ec58e7be8. Link: https://lore.kernel.org/linux-block/de10cb47-34d1-5a88-7751-225ca380f735@compro.net/ Reported-by: Mark Hounschell Cc: Jiri Kosina Cc: Wim Osterholt Cc: Kurt Garloff Cc: Signed-off-by: Denis Efremov Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 04383f14c74a..8f444b375761 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4074,22 +4074,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) goto out; } - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) - goto out; - mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; From e9544276b3e60800a150f27fe5d031d133c77eea Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 26 Aug 2021 12:46:01 -0700 Subject: [PATCH 33/34] net: don't unconditionally copy_from_user a struct ifreq for socket ioctls commit d0efb16294d145d157432feda83877ae9d7cdf37 upstream. A common implementation of isatty(3) involves calling a ioctl passing a dummy struct argument and checking whether the syscall failed -- bionic and glibc use TCGETS (passing a struct termios), and musl uses TIOCGWINSZ (passing a struct winsize). If the FD is a socket, we will copy sizeof(struct ifreq) bytes of data from the argument and return -EFAULT if that fails. The result is that the isatty implementations may return a non-POSIX-compliant value in errno in the case where part of the dummy struct argument is inaccessible, as both struct termios and struct winsize are smaller than struct ifreq (at least on arm64). Although there is usually enough stack space following the argument on the stack that this did not present a practical problem up to now, with MTE stack instrumentation it's more likely for the copy to fail, as the memory following the struct may have a different tag. Fix the problem by adding an early check for whether the ioctl is a valid socket ioctl, and return -ENOTTY if it isn't. Fixes: 44c02a2c3dc5 ("dev_ioctl(): move copyin/copyout to callers") Link: https://linux-review.googlesource.com/id/I869da6cf6daabc3e4b7b82ac979683ba05e27d4d Signed-off-by: Peter Collingbourne Cc: # 4.19 Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 4 ++++ net/socket.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5f053c6b66..fbd689c15974 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3594,6 +3594,10 @@ int netdev_rx_handler_register(struct net_device *dev, void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); +static inline bool is_socket_ioctl_cmd(unsigned int cmd) +{ + return _IOC_TYPE(cmd) == SOCK_IOC_TYPE; +} int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf *, int); diff --git a/net/socket.c b/net/socket.c index f14bca00ff01..e5cc9f2b981e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1030,7 +1030,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, rtnl_unlock(); if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) err = -EFAULT; - } else { + } else if (is_socket_ioctl_cmd(cmd)) { struct ifreq ifr; bool need_copyout; if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) @@ -1039,6 +1039,8 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, if (!err && need_copyout) if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; + } else { + err = -ENOTTY; } return err; } @@ -3064,6 +3066,8 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct ifreq ifreq; u32 data32; + if (!is_socket_ioctl_cmd(cmd)) + return -ENOTTY; if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; if (get_user(data32, &u_ifreq32->ifr_data)) From b172b44fcb1771e083aad806fa96f3f60e2ddfac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Sep 2021 09:58:03 +0200 Subject: [PATCH 34/34] Linux 4.19.206 Link: https://lore.kernel.org/r/20210901122250.752620302@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Hulk Robot Tested-by: Sudip Mukherjee Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index abc35829f47b..3a3eea3ab10a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 205 +SUBLEVEL = 206 EXTRAVERSION = NAME = "People's Front"