From 39e12695c54ab1595012862d200b8efa057fd322 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Tue, 14 May 2019 19:42:57 -0700 Subject: [PATCH 001/152] UPSTREAM: bpf: relax inode permission check for retrieving bpf program For iptable module to load a bpf program from a pinned location, it only retrieve a loaded program and cannot change the program content so requiring a write permission for it might not be necessary. Also when adding or removing an unrelated iptable rule, it might need to flush and reload the xt_bpf related rules as well and triggers the inode permission check. It might be better to remove the write premission check for the inode so we won't need to grant write access to all the processes that flush and restore iptables rules. Signed-off-by: Chenbo Feng Signed-off-by: Alexei Starovoitov (cherry picked from commit e547ff3f803e779a3898f1f48447b29f43c54085) Bug: 129650054 Change-Id: I71487ad6f4d22e0a8be3757d9b72d1c04c92104d (cherry picked from commit 9e74c1b9e8418aa0209b15db24f0b3d4876f52aa) --- kernel/bpf/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 4a8f390a2b82..dc9d7ac8228d 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -518,7 +518,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) { struct bpf_prog *prog; - int ret = inode_permission(inode, MAY_READ | MAY_WRITE); + int ret = inode_permission(inode, MAY_READ); if (ret) return ERR_PTR(ret); From 7483147643b9d13f9bf245336345e5961865c563 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Mon, 15 Apr 2019 11:18:33 -0700 Subject: [PATCH 002/152] UPSTREAM: module: add stubs for within_module functions Provide stubs for within_module_core(), within_module_init(), and within_module() to prevent build errors when !CONFIG_MODULES. Suggested-by: Matthew Wilcox Reported-by: Randy Dunlap Reported-by: kbuild test robot Link: https://marc.info/?l=linux-mm&m=155384681109231&w=2 Signed-off-by: Tri Vo Signed-off-by: Jessica Yu (cherry picked from commit dadec066d8fa7da227f623f632ea114690fecaf8) Bug: 132997968 Test: defconfig + CONFIG_GCOV_KERNEL records coverage Change-Id: I69448d0721a5174d4f39fff0774a86361f37b8ef --- include/linux/module.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 904f94628132..2f13c831a0ce 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -683,6 +683,23 @@ static inline bool is_module_text_address(unsigned long addr) return false; } +static inline bool within_module_core(unsigned long addr, + const struct module *mod) +{ + return false; +} + +static inline bool within_module_init(unsigned long addr, + const struct module *mod) +{ + return false; +} + +static inline bool within_module(unsigned long addr, const struct module *mod) +{ + return false; +} + /* Get/put a kernel symbol (calls should be symmetric) */ #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) #define symbol_put(x) do { } while (0) From ff433dd9c0bc3dc40354161186bf3cd57f433caf Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 14 May 2019 15:45:25 -0700 Subject: [PATCH 003/152] UPSTREAM: gcov: clang: move common GCC code into gcc_base.c Patch series "gcov: add Clang support", v4. This patch (of 3): base.c contains a few callbacks specific to GCC's gcov implementation. Move these into their own module in preparation for Clang support. Link: http://lkml.kernel.org/r/20190318025411.98014-2-trong@android.com Signed-off-by: Greg Hackmann Signed-off-by: Nick Desaulniers Signed-off-by: Tri Vo Tested-by: Trilok Soni Tested-by: Prasad Sodagudi Tested-by: Tri Vo Reviewed-by: Peter Oberparleiter Cc: Daniel Mentz Cc: Petri Gynther Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 826eba0d77bc74c4d1c611374b76abfe251e8538) Bug: 132997968 Test: defconfig + CONFIG_GCOV_KERNEL records coverage Change-Id: Ice0533106d634b22aeee49b6f542917f45c681f0 --- kernel/gcov/Makefile | 4 +- kernel/gcov/base.c | 84 +---------------------------------------- kernel/gcov/gcc_base.c | 86 ++++++++++++++++++++++++++++++++++++++++++ kernel/gcov/gcov.h | 3 ++ 4 files changed, 93 insertions(+), 84 deletions(-) create mode 100644 kernel/gcov/gcc_base.c diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index ff06d64df397..45431ed679d1 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -2,5 +2,5 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' obj-y := base.o fs.o -obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o -obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o +obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_base.o gcc_3_4.o +obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_base.o gcc_4_7.o diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 9c7c8d5c18f2..799d42072727 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -22,88 +22,8 @@ #include #include "gcov.h" -static int gcov_events_enabled; -static DEFINE_MUTEX(gcov_lock); - -/* - * __gcov_init is called by gcc-generated constructor code for each object - * file compiled with -fprofile-arcs. - */ -void __gcov_init(struct gcov_info *info) -{ - static unsigned int gcov_version; - - mutex_lock(&gcov_lock); - if (gcov_version == 0) { - gcov_version = gcov_info_version(info); - /* - * Printing gcc's version magic may prove useful for debugging - * incompatibility reports. - */ - pr_info("version magic: 0x%x\n", gcov_version); - } - /* - * Add new profiling data structure to list and inform event - * listener. - */ - gcov_info_link(info); - if (gcov_events_enabled) - gcov_event(GCOV_ADD, info); - mutex_unlock(&gcov_lock); -} -EXPORT_SYMBOL(__gcov_init); - -/* - * These functions may be referenced by gcc-generated profiling code but serve - * no function for kernel profiling. - */ -void __gcov_flush(void) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_flush); - -void __gcov_merge_add(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_add); - -void __gcov_merge_single(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_single); - -void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_delta); - -void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_ior); - -void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_time_profile); - -void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_merge_icall_topn); - -void __gcov_exit(void) -{ - /* Unused. */ -} -EXPORT_SYMBOL(__gcov_exit); +int gcov_events_enabled; +DEFINE_MUTEX(gcov_lock); /** * gcov_enable_events - enable event reporting through gcov_event() diff --git a/kernel/gcov/gcc_base.c b/kernel/gcov/gcc_base.c new file mode 100644 index 000000000000..3cf736b9f880 --- /dev/null +++ b/kernel/gcov/gcc_base.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include "gcov.h" + +/* + * __gcov_init is called by gcc-generated constructor code for each object + * file compiled with -fprofile-arcs. + */ +void __gcov_init(struct gcov_info *info) +{ + static unsigned int gcov_version; + + mutex_lock(&gcov_lock); + if (gcov_version == 0) { + gcov_version = gcov_info_version(info); + /* + * Printing gcc's version magic may prove useful for debugging + * incompatibility reports. + */ + pr_info("version magic: 0x%x\n", gcov_version); + } + /* + * Add new profiling data structure to list and inform event + * listener. + */ + gcov_info_link(info); + if (gcov_events_enabled) + gcov_event(GCOV_ADD, info); + mutex_unlock(&gcov_lock); +} +EXPORT_SYMBOL(__gcov_init); + +/* + * These functions may be referenced by gcc-generated profiling code but serve + * no function for kernel profiling. + */ +void __gcov_flush(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_flush); + +void __gcov_merge_add(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_add); + +void __gcov_merge_single(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_single); + +void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_delta); + +void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_ior); + +void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_time_profile); + +void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_merge_icall_topn); + +void __gcov_exit(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_exit); diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h index de118ad4a024..0ecf1d664ec3 100644 --- a/kernel/gcov/gcov.h +++ b/kernel/gcov/gcov.h @@ -83,4 +83,7 @@ struct gcov_link { }; extern const struct gcov_link gcov_link[]; +extern int gcov_events_enabled; +extern struct mutex gcov_lock; + #endif /* GCOV_H */ From b3af390f82f5774a73691f4d1afbe46eff34d8e8 Mon Sep 17 00:00:00 2001 From: Tri Vo Date: Tue, 14 May 2019 15:45:28 -0700 Subject: [PATCH 004/152] UPSTREAM: gcov: docs: add a note on GCC vs Clang differences Document some things of note to gcov users: 1. GCC gcov and Clang llvm-cov tools are not compatible. 2. The use of GCC vs Clang is transparent at build-time. Also adjust the documentation to account for the removal of config symbol CONFIG_GCOV_FORMAT_AUTODETECT by commit 6a61b70b43c9 ("gcov: remove CONFIG_GCOV_FORMAT_AUTODETECT"). Link: http://lkml.kernel.org/r/20190318025411.98014-4-trong@android.com Signed-off-by: Tri Vo Reviewed-by: Peter Oberparleiter Cc: Daniel Mentz Cc: Greg Hackmann Cc: Nick Desaulniers Cc: Petri Gynther Cc: Prasad Sodagudi Cc: Trilok Soni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit aa069a23a220e9ab00d6837b76917952d0fb587e) Bug: 132997968 Test: defconfig + CONFIG_GCOV_KERNEL records coverage Change-Id: Id9a248d5e6d8fc643300230b2ab2ce902050a6ae --- Documentation/dev-tools/gcov.rst | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst index 69a7d90c320a..46aae52a41d0 100644 --- a/Documentation/dev-tools/gcov.rst +++ b/Documentation/dev-tools/gcov.rst @@ -34,10 +34,6 @@ Configure the kernel with:: CONFIG_DEBUG_FS=y CONFIG_GCOV_KERNEL=y -select the gcc's gcov format, default is autodetect based on gcc version:: - - CONFIG_GCOV_FORMAT_AUTODETECT=y - and to get coverage data for the entire kernel:: CONFIG_GCOV_PROFILE_ALL=y @@ -169,6 +165,20 @@ b) gcov is run on the BUILD machine [user@build] gcov -o /tmp/coverage/tmp/out/init main.c +Note on compilers +----------------- + +GCC and LLVM gcov tools are not necessarily compatible. Use gcov_ to work with +GCC-generated .gcno and .gcda files, and use llvm-cov_ for Clang. + +.. _gcov: http://gcc.gnu.org/onlinedocs/gcc/Gcov.html +.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html + +Build differences between GCC and Clang gcov are handled by Kconfig. It +automatically selects the appropriate gcov format depending on the detected +toolchain. + + Troubleshooting --------------- From 61cf23cf322d843cd4b225b35f111115b27f442f Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 14 May 2019 15:45:31 -0700 Subject: [PATCH 005/152] UPSTREAM: gcov: clang support LLVM uses profiling data that's deliberately similar to GCC, but has a very different way of exporting that data. LLVM calls llvm_gcov_init() once per module, and provides a couple of callbacks that we can use to ask for more data. We care about the "writeout" callback, which in turn calls back into compiler-rt/this module to dump all the gathered coverage data to disk: llvm_gcda_start_file() llvm_gcda_emit_function() llvm_gcda_emit_arcs() llvm_gcda_emit_function() llvm_gcda_emit_arcs() [... repeats for each function ...] llvm_gcda_summary_info() llvm_gcda_end_file() This design is much more stateless and unstructured than gcc's, and is intended to run at process exit. This forces us to keep some local state about which module we're dealing with at the moment. On the other hand, it also means we don't depend as much on how LLVM represents profiling data internally. See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more details on how this works, particularly GCOVProfiler::emitProfileArcs(), GCOVProfiler::insertCounterWriteout(), and GCOVProfiler::insertFlush(). [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20190417225328.208129-1-trong@android.com Signed-off-by: Greg Hackmann Signed-off-by: Nick Desaulniers Signed-off-by: Tri Vo Co-developed-by: Nick Desaulniers Co-developed-by: Tri Vo Tested-by: Trilok Soni Tested-by: Prasad Sodagudi Tested-by: Tri Vo Tested-by: Daniel Mentz Tested-by: Petri Gynther Reviewed-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e178a5beb36960902379040ee0b667fb0a8eee93) Bug: 132997968 Test: defconfig + CONFIG_GCOV_KERNEL records coverage Change-Id: Id72b3ee32571f0e555d5a7407d3f1e933850b390 --- kernel/gcov/Kconfig | 3 +- kernel/gcov/Makefile | 1 + kernel/gcov/base.c | 2 +- kernel/gcov/clang.c | 581 ++++++++++++++++++++++++++++++++++++++++++ kernel/gcov/gcc_3_4.c | 12 + kernel/gcov/gcc_4_7.c | 12 + kernel/gcov/gcov.h | 2 + 7 files changed, 611 insertions(+), 2 deletions(-) create mode 100644 kernel/gcov/clang.c diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 1e3823fa799b..f71c1adcff31 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -53,6 +53,7 @@ config GCOV_PROFILE_ALL choice prompt "Specify GCOV format" depends on GCOV_KERNEL + depends on CC_IS_GCC ---help--- The gcov format is usually determined by the GCC version, and the default is chosen according to your GCC version. However, there are @@ -62,7 +63,7 @@ choice config GCOV_FORMAT_3_4 bool "GCC 3.4 format" - depends on CC_IS_GCC && GCC_VERSION < 40700 + depends on GCC_VERSION < 40700 ---help--- Select this option to use the format defined by GCC 3.4. diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 45431ed679d1..d66a74b0f100 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -4,3 +4,4 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' obj-y := base.o fs.o obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_base.o gcc_3_4.o obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_base.o gcc_4_7.o +obj-$(CONFIG_CC_IS_CLANG) += clang.o diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 799d42072727..0ffe9f194080 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -64,7 +64,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, /* Remove entries located in module from linked list. */ while ((info = gcov_info_next(info))) { - if (within_module((unsigned long)info, mod)) { + if (gcov_info_within_module(info, mod)) { gcov_info_unlink(prev, info); if (gcov_events_enabled) gcov_event(GCOV_REMOVE, info); diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c new file mode 100644 index 000000000000..c94b820a1b62 --- /dev/null +++ b/kernel/gcov/clang.c @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Google, Inc. + * modified from kernel/gcov/gcc_4_7.c + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * LLVM uses profiling data that's deliberately similar to GCC, but has a + * very different way of exporting that data. LLVM calls llvm_gcov_init() once + * per module, and provides a couple of callbacks that we can use to ask for + * more data. + * + * We care about the "writeout" callback, which in turn calls back into + * compiler-rt/this module to dump all the gathered coverage data to disk: + * + * llvm_gcda_start_file() + * llvm_gcda_emit_function() + * llvm_gcda_emit_arcs() + * llvm_gcda_emit_function() + * llvm_gcda_emit_arcs() + * [... repeats for each function ...] + * llvm_gcda_summary_info() + * llvm_gcda_end_file() + * + * This design is much more stateless and unstructured than gcc's, and is + * intended to run at process exit. This forces us to keep some local state + * about which module we're dealing with at the moment. On the other hand, it + * also means we don't depend as much on how LLVM represents profiling data + * internally. + * + * See LLVM's lib/Transforms/Instrumentation/GCOVProfiling.cpp for more + * details on how this works, particularly GCOVProfiler::emitProfileArcs(), + * GCOVProfiler::insertCounterWriteout(), and + * GCOVProfiler::insertFlush(). + */ + +#define pr_fmt(fmt) "gcov: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include "gcov.h" + +typedef void (*llvm_gcov_callback)(void); + +struct gcov_info { + struct list_head head; + + const char *filename; + unsigned int version; + u32 checksum; + + struct list_head functions; +}; + +struct gcov_fn_info { + struct list_head head; + + u32 ident; + u32 checksum; + u8 use_extra_checksum; + u32 cfg_checksum; + + u32 num_counters; + u64 *counters; + const char *function_name; +}; + +static struct gcov_info *current_info; + +static LIST_HEAD(clang_gcov_list); + +void llvm_gcov_init(llvm_gcov_callback writeout, llvm_gcov_callback flush) +{ + struct gcov_info *info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return; + + INIT_LIST_HEAD(&info->head); + INIT_LIST_HEAD(&info->functions); + + mutex_lock(&gcov_lock); + + list_add_tail(&info->head, &clang_gcov_list); + current_info = info; + writeout(); + current_info = NULL; + if (gcov_events_enabled) + gcov_event(GCOV_ADD, info); + + mutex_unlock(&gcov_lock); +} +EXPORT_SYMBOL(llvm_gcov_init); + +void llvm_gcda_start_file(const char *orig_filename, const char version[4], + u32 checksum) +{ + current_info->filename = orig_filename; + memcpy(¤t_info->version, version, sizeof(current_info->version)); + current_info->checksum = checksum; +} +EXPORT_SYMBOL(llvm_gcda_start_file); + +void llvm_gcda_emit_function(u32 ident, const char *function_name, + u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum) +{ + struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return; + + INIT_LIST_HEAD(&info->head); + info->ident = ident; + info->checksum = func_checksum; + info->use_extra_checksum = use_extra_checksum; + info->cfg_checksum = cfg_checksum; + if (function_name) + info->function_name = kstrdup(function_name, GFP_KERNEL); + + list_add_tail(&info->head, ¤t_info->functions); +} +EXPORT_SYMBOL(llvm_gcda_emit_function); + +void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters) +{ + struct gcov_fn_info *info = list_last_entry(¤t_info->functions, + struct gcov_fn_info, head); + + info->num_counters = num_counters; + info->counters = counters; +} +EXPORT_SYMBOL(llvm_gcda_emit_arcs); + +void llvm_gcda_summary_info(void) +{ +} +EXPORT_SYMBOL(llvm_gcda_summary_info); + +void llvm_gcda_end_file(void) +{ +} +EXPORT_SYMBOL(llvm_gcda_end_file); + +/** + * gcov_info_filename - return info filename + * @info: profiling data set + */ +const char *gcov_info_filename(struct gcov_info *info) +{ + return info->filename; +} + +/** + * gcov_info_version - return info version + * @info: profiling data set + */ +unsigned int gcov_info_version(struct gcov_info *info) +{ + return info->version; +} + +/** + * gcov_info_next - return next profiling data set + * @info: profiling data set + * + * Returns next gcov_info following @info or first gcov_info in the chain if + * @info is %NULL. + */ +struct gcov_info *gcov_info_next(struct gcov_info *info) +{ + if (!info) + return list_first_entry_or_null(&clang_gcov_list, + struct gcov_info, head); + if (list_is_last(&info->head, &clang_gcov_list)) + return NULL; + return list_next_entry(info, head); +} + +/** + * gcov_info_link - link/add profiling data set to the list + * @info: profiling data set + */ +void gcov_info_link(struct gcov_info *info) +{ + list_add_tail(&info->head, &clang_gcov_list); +} + +/** + * gcov_info_unlink - unlink/remove profiling data set from the list + * @prev: previous profiling data set + * @info: profiling data set + */ +void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) +{ + /* Generic code unlinks while iterating. */ + __list_del_entry(&info->head); +} + +/** + * gcov_info_within_module - check if a profiling data set belongs to a module + * @info: profiling data set + * @mod: module + * + * Returns true if profiling data belongs module, false otherwise. + */ +bool gcov_info_within_module(struct gcov_info *info, struct module *mod) +{ + return within_module((unsigned long)info->filename, mod); +} + +/* Symbolic links to be created for each profiling data file. */ +const struct gcov_link gcov_link[] = { + { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ + { 0, NULL}, +}; + +/** + * gcov_info_reset - reset profiling data to zero + * @info: profiling data set + */ +void gcov_info_reset(struct gcov_info *info) +{ + struct gcov_fn_info *fn; + + list_for_each_entry(fn, &info->functions, head) + memset(fn->counters, 0, + sizeof(fn->counters[0]) * fn->num_counters); +} + +/** + * gcov_info_is_compatible - check if profiling data can be added + * @info1: first profiling data set + * @info2: second profiling data set + * + * Returns non-zero if profiling data can be added, zero otherwise. + */ +int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2) +{ + struct gcov_fn_info *fn_ptr1 = list_first_entry_or_null( + &info1->functions, struct gcov_fn_info, head); + struct gcov_fn_info *fn_ptr2 = list_first_entry_or_null( + &info2->functions, struct gcov_fn_info, head); + + if (info1->checksum != info2->checksum) + return false; + if (!fn_ptr1) + return fn_ptr1 == fn_ptr2; + while (!list_is_last(&fn_ptr1->head, &info1->functions) && + !list_is_last(&fn_ptr2->head, &info2->functions)) { + if (fn_ptr1->checksum != fn_ptr2->checksum) + return false; + if (fn_ptr1->use_extra_checksum != fn_ptr2->use_extra_checksum) + return false; + if (fn_ptr1->use_extra_checksum && + fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum) + return false; + fn_ptr1 = list_next_entry(fn_ptr1, head); + fn_ptr2 = list_next_entry(fn_ptr2, head); + } + return list_is_last(&fn_ptr1->head, &info1->functions) && + list_is_last(&fn_ptr2->head, &info2->functions); +} + +/** + * gcov_info_add - add up profiling data + * @dest: profiling data set to which data is added + * @source: profiling data set which is added + * + * Adds profiling counts of @source to @dest. + */ +void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) +{ + struct gcov_fn_info *dfn_ptr; + struct gcov_fn_info *sfn_ptr = list_first_entry_or_null(&src->functions, + struct gcov_fn_info, head); + + list_for_each_entry(dfn_ptr, &dst->functions, head) { + u32 i; + + for (i = 0; i < sfn_ptr->num_counters; i++) + dfn_ptr->counters[i] += sfn_ptr->counters[i]; + } +} + +static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) +{ + size_t cv_size; /* counter values size */ + struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn), + GFP_KERNEL); + if (!fn_dup) + return NULL; + INIT_LIST_HEAD(&fn_dup->head); + + fn_dup->function_name = kstrdup(fn->function_name, GFP_KERNEL); + if (!fn_dup->function_name) + goto err_name; + + cv_size = fn->num_counters * sizeof(fn->counters[0]); + fn_dup->counters = vmalloc(cv_size); + if (!fn_dup->counters) + goto err_counters; + memcpy(fn_dup->counters, fn->counters, cv_size); + + return fn_dup; + +err_counters: + kfree(fn_dup->function_name); +err_name: + kfree(fn_dup); + return NULL; +} + +/** + * gcov_info_dup - duplicate profiling data set + * @info: profiling data set to duplicate + * + * Return newly allocated duplicate on success, %NULL on error. + */ +struct gcov_info *gcov_info_dup(struct gcov_info *info) +{ + struct gcov_info *dup; + struct gcov_fn_info *fn; + + dup = kmemdup(info, sizeof(*dup), GFP_KERNEL); + if (!dup) + return NULL; + INIT_LIST_HEAD(&dup->head); + INIT_LIST_HEAD(&dup->functions); + dup->filename = kstrdup(info->filename, GFP_KERNEL); + if (!dup->filename) + goto err; + + list_for_each_entry(fn, &info->functions, head) { + struct gcov_fn_info *fn_dup = gcov_fn_info_dup(fn); + + if (!fn_dup) + goto err; + list_add_tail(&fn_dup->head, &dup->functions); + } + + return dup; + +err: + gcov_info_free(dup); + return NULL; +} + +/** + * gcov_info_free - release memory for profiling data set duplicate + * @info: profiling data set duplicate to free + */ +void gcov_info_free(struct gcov_info *info) +{ + struct gcov_fn_info *fn, *tmp; + + list_for_each_entry_safe(fn, tmp, &info->functions, head) { + kfree(fn->function_name); + vfree(fn->counters); + list_del(&fn->head); + kfree(fn); + } + kfree(info->filename); + kfree(info); +} + +#define ITER_STRIDE PAGE_SIZE + +/** + * struct gcov_iterator - specifies current file position in logical records + * @info: associated profiling data + * @buffer: buffer containing file data + * @size: size of buffer + * @pos: current position in file + */ +struct gcov_iterator { + struct gcov_info *info; + void *buffer; + size_t size; + loff_t pos; +}; + +/** + * store_gcov_u32 - store 32 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't + * store anything. + */ +static size_t store_gcov_u32(void *buffer, size_t off, u32 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + *data = v; + } + + return sizeof(*data); +} + +/** + * store_gcov_u64 - store 64 bit number in gcov format to buffer + * @buffer: target buffer or NULL + * @off: offset into the buffer + * @v: value to be stored + * + * Number format defined by gcc: numbers are recorded in the 32 bit + * unsigned binary form of the endianness of the machine generating the + * file. 64 bit numbers are stored as two 32 bit numbers, the low part + * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store + * anything. + */ +static size_t store_gcov_u64(void *buffer, size_t off, u64 v) +{ + u32 *data; + + if (buffer) { + data = buffer + off; + + data[0] = (v & 0xffffffffUL); + data[1] = (v >> 32); + } + + return sizeof(*data) * 2; +} + +/** + * convert_to_gcda - convert profiling data set to gcda file format + * @buffer: the buffer to store file data or %NULL if no data should be stored + * @info: profiling data set to be converted + * + * Returns the number of bytes that were/would have been stored into the buffer. + */ +static size_t convert_to_gcda(char *buffer, struct gcov_info *info) +{ + struct gcov_fn_info *fi_ptr; + size_t pos = 0; + + /* File header. */ + pos += store_gcov_u32(buffer, pos, GCOV_DATA_MAGIC); + pos += store_gcov_u32(buffer, pos, info->version); + pos += store_gcov_u32(buffer, pos, info->checksum); + + list_for_each_entry(fi_ptr, &info->functions, head) { + u32 i; + u32 len = 2; + + if (fi_ptr->use_extra_checksum) + len++; + + pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION); + pos += store_gcov_u32(buffer, pos, len); + pos += store_gcov_u32(buffer, pos, fi_ptr->ident); + pos += store_gcov_u32(buffer, pos, fi_ptr->checksum); + if (fi_ptr->use_extra_checksum) + pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); + + pos += store_gcov_u32(buffer, pos, GCOV_TAG_COUNTER_BASE); + pos += store_gcov_u32(buffer, pos, fi_ptr->num_counters * 2); + for (i = 0; i < fi_ptr->num_counters; i++) + pos += store_gcov_u64(buffer, pos, fi_ptr->counters[i]); + } + + return pos; +} + +/** + * gcov_iter_new - allocate and initialize profiling data iterator + * @info: profiling data set to be iterated + * + * Return file iterator on success, %NULL otherwise. + */ +struct gcov_iterator *gcov_iter_new(struct gcov_info *info) +{ + struct gcov_iterator *iter; + + iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL); + if (!iter) + goto err_free; + + iter->info = info; + /* Dry-run to get the actual buffer size. */ + iter->size = convert_to_gcda(NULL, info); + iter->buffer = vmalloc(iter->size); + if (!iter->buffer) + goto err_free; + + convert_to_gcda(iter->buffer, info); + + return iter; + +err_free: + kfree(iter); + return NULL; +} + + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +void gcov_iter_free(struct gcov_iterator *iter) +{ + vfree(iter->buffer); + kfree(iter); +} + +/** + * gcov_iter_get_info - return profiling data set for given file iterator + * @iter: file iterator + */ +struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter) +{ + return iter->info; +} + +/** + * gcov_iter_start - reset file iterator to starting position + * @iter: file iterator + */ +void gcov_iter_start(struct gcov_iterator *iter) +{ + iter->pos = 0; +} + +/** + * gcov_iter_next - advance file iterator to next logical record + * @iter: file iterator + * + * Return zero if new position is valid, non-zero if iterator has reached end. + */ +int gcov_iter_next(struct gcov_iterator *iter) +{ + if (iter->pos < iter->size) + iter->pos += ITER_STRIDE; + + if (iter->pos >= iter->size) + return -EINVAL; + + return 0; +} + +/** + * gcov_iter_write - write data for current pos to seq_file + * @iter: file iterator + * @seq: seq_file handle + * + * Return zero on success, non-zero otherwise. + */ +int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq) +{ + size_t len; + + if (iter->pos >= iter->size) + return -EINVAL; + + len = ITER_STRIDE; + if (iter->pos + len > iter->size) + len = iter->size - iter->pos; + + seq_write(seq, iter->buffer + iter->pos, len); + + return 0; +} diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c index 1e32e66c9563..64d2dd9f7c64 100644 --- a/kernel/gcov/gcc_3_4.c +++ b/kernel/gcov/gcc_3_4.c @@ -137,6 +137,18 @@ void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) gcov_info_head = info->next; } +/** + * gcov_info_within_module - check if a profiling data set belongs to a module + * @info: profiling data set + * @mod: module + * + * Returns true if profiling data belongs module, false otherwise. + */ +bool gcov_info_within_module(struct gcov_info *info, struct module *mod) +{ + return within_module((unsigned long)info, mod); +} + /* Symbolic links to be created for each profiling data file. */ const struct gcov_link gcov_link[] = { { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index ca5e5c0ef853..ec37563674d6 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -150,6 +150,18 @@ void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info) gcov_info_head = info->next; } +/** + * gcov_info_within_module - check if a profiling data set belongs to a module + * @info: profiling data set + * @mod: module + * + * Returns true if profiling data belongs module, false otherwise. + */ +bool gcov_info_within_module(struct gcov_info *info, struct module *mod) +{ + return within_module((unsigned long)info, mod); +} + /* Symbolic links to be created for each profiling data file. */ const struct gcov_link gcov_link[] = { { OBJ_TREE, "gcno" }, /* Link to .gcno file in $(objtree). */ diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h index 0ecf1d664ec3..6ab2c1808c9d 100644 --- a/kernel/gcov/gcov.h +++ b/kernel/gcov/gcov.h @@ -15,6 +15,7 @@ #ifndef GCOV_H #define GCOV_H GCOV_H +#include #include /* @@ -46,6 +47,7 @@ unsigned int gcov_info_version(struct gcov_info *info); struct gcov_info *gcov_info_next(struct gcov_info *info); void gcov_info_link(struct gcov_info *info); void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info); +bool gcov_info_within_module(struct gcov_info *info, struct module *mod); /* Base interface. */ enum gcov_action { From 4c3e84fccd814f6530984f0db89d85ecdfc8b82a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 20 Mar 2019 10:15:46 -0700 Subject: [PATCH 006/152] ANDROID: kbuild: add CONFIG_LD_IS_LLD Similarly to the CC_IS_CLANG config, add LD_IS_LLD to simplify feature selection based on the linker. Bug: 133186739 Change-Id: I175ee4b33832afd1369d48dbd131fc5c39c2f71a Signed-off-by: Sami Tolvanen --- init/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 4d6062311fae..0332548d7fa7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -19,6 +19,9 @@ config GCC_VERSION config CC_IS_CLANG def_bool $(success,$(CC) --version | head -n 1 | grep -q clang) +config LD_IS_LLD + def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD) + config CLANG_VERSION int default $(shell,$(srctree)/scripts/clang-version.sh $(CC)) From 0e0752ebd2b5865c38cb46d61609b5bdb8863505 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 28 Nov 2017 08:48:49 -0800 Subject: [PATCH 007/152] ANDROID: kbuild: add support for clang LTO This change adds the configuration option CONFIG_LTO_CLANG, and build system support for clang's Link Time Optimization (LTO). In preparation for LTO support for other compilers, potentially common parts of the changes are gated behind CONFIG_LTO instead. With -flto, instead of object files, clang produces LLVM bitcode, which is compiled into a native object at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html While the kernel normally uses GNU ld for linking, LLVM supports LTO only with lld or GNU gold linkers. This patch set assumes lld will be used. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: Ibcd9fc7ec501b4f30b43b4877897615645f8655f Signed-off-by: Sami Tolvanen --- Makefile | 29 +++++++++- arch/Kconfig | 38 +++++++++++++ include/asm-generic/vmlinux.lds.h | 2 +- scripts/Makefile.build | 94 +++++++++++++++++++++++++++++-- scripts/Makefile.modpost | 34 +++++++++-- scripts/link-vmlinux.sh | 64 +++++++++++++++++---- scripts/mod/modpost.c | 7 +++ 7 files changed, 246 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 840bde10c9b4..f70ef17a3d2f 100644 --- a/Makefile +++ b/Makefile @@ -608,6 +608,16 @@ ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg endif +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# use llvm-ar for building symbol tables from IR files, and llvm-nm instead +# of objdump for processing symbol versions and exports +LLVM_AR := llvm-ar +LLVM_NM := llvm-nm +export LLVM_AR LLVM_NM +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -795,6 +805,22 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections LDFLAGS_vmlinux += --gc-sections endif +ifdef CONFIG_LTO_CLANG +lto-clang-flags := -flto -fvisibility=hidden + +# allow disabling only clang LTO where needed +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +export DISABLE_LTO_CLANG +endif + +ifdef CONFIG_LTO +LTO_CFLAGS := $(lto-clang-flags) +KBUILD_CFLAGS += $(LTO_CFLAGS) + +DISABLE_LTO := $(DISABLE_LTO_CLANG) +export LTO_CFLAGS DISABLE_LTO +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) @@ -1583,7 +1609,8 @@ clean: $(clean-dirs) -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 6801123932a5..4572791825b6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -474,6 +474,44 @@ config STACKPROTECTOR_STRONG about 20% of all kernel functions, which increases the kernel code size by about 2%. +config LTO + def_bool n + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option if it supports: + - compiling with clang, + - compiling inline assembly with clang's integrated assembler, + - and linking with LLD. + +choice + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" + default LTO_NONE + help + This option turns on Link-Time Optimization (LTO). + +config LTO_NONE + bool "None" + +config LTO_CLANG + bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_RECORD + depends on CC_IS_CLANG && LD_IS_LLD + select LTO + help + This option enables clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + + If you select this option, you must compile the kernel with clang and + LLD. + +endchoice + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dd38c97933f1..65cbde21a9f4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -66,7 +66,7 @@ * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 64fac0ad32d6..30b9860ece7b 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -185,6 +185,23 @@ else cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + else \ + if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + fi; \ + mv -f $(@D)/.tmp_$(@F) $@; +else cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -197,6 +214,7 @@ cmd_modversions_c = \ mv -f $(@D)/.tmp_$(@F) $@; \ fi; endif +endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifndef CC_USING_RECORD_MCOUNT @@ -429,6 +447,26 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # To build objects in subdirs, we need to descend into the directories $(sort $(subdir-obj-y)): $(subdir-ym) ; +ifdef CONFIG_LTO_CLANG + ifdef CONFIG_MODVERSIONS + # combine symversions for later processing + update_lto_symversions = \ + rm -f $@.symversions; \ + for i in $(filter-out FORCE,$^); do \ + if [ -f $$i.symversions ]; then \ + cat $$i.symversions \ + >> $@.symversions; \ + fi; \ + done; + endif + # rebuild the symbol table with llvm-ar to include IR files + update_lto_symtable = ; \ + mv -f $@ $@.tmp; \ + $(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \ + $$($(AR) t $@.tmp); \ + rm -f $@.tmp +endif + # # Rule to compile a set of .o files into one .o file # @@ -439,7 +477,8 @@ ifdef builtin-target # scripts/link-vmlinux.sh builds an aggregate built-in.a with a symbol # table and index. quiet_cmd_ar_builtin = AR $@ - cmd_ar_builtin = rm -f $@; \ + cmd_ar_builtin = $(update_lto_symversions) \ + rm -f $@; \ $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(filter $(real-obj-y), $^) $(builtin-target): $(real-obj-y) FORCE @@ -468,7 +507,11 @@ ifdef lib-target quiet_cmd_link_l_target = AR $@ # lib target archives do get a symbol table and index -cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) +cmd_link_l_target = \ + $(update_lto_symversions) \ + rm -f $@; \ + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \ + $(update_lto_symtable) $(lib-target): $(lib-y) FORCE $(call if_changed,link_l_target) @@ -479,13 +522,34 @@ dummy-object = $(obj)/.lib_exports.o ksyms-lds = $(dot-target).lds quiet_cmd_export_list = EXPORTS $@ -cmd_export_list = $(OBJDUMP) -h $< | \ - sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\ - rm -f $(dummy-object);\ +filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' +link_export_list = rm -f $(dummy-object); \ echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ $(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\ rm $(dummy-object) $(ksyms-lds) +ifdef CONFIG_LTO_CLANG +# objdump doesn't understand IR files and llvm-nm doesn't support archives, +# so we'll walk through each file in the archive separately +cmd_export_list = \ + rm -f $(ksyms-lds); \ + for o in $$($(AR) t $<); do \ + if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \ + $(OBJDUMP) -h $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + else \ + $(LLVM_NM) $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + fi; \ + done; \ + $(link_export_list) +else +cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \ + $(link_export_list) +endif + $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) @@ -509,13 +573,31 @@ $($(subst $(obj)/,,$(@:.o=-objs))) \ $($(subst $(obj)/,,$(@:.o=-y))) \ $($(subst $(obj)/,,$(@:.o=-m)))), $^) +cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) + +quiet_cmd_link_multi-y = AR $@ +cmd_link_multi-y = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \ + $(update_lto_symtable) + quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) + +ifdef CONFIG_LTO_CLANG + # don't compile IR until needed + cmd_link_multi-m = $(cmd_link_multi-y) +else + cmd_link_multi-m = $(cmd_link_multi-link) +endif + +$(multi-used-y): FORCE + $(call if_changed,link_multi-y) $(multi-used-m): FORCE $(call if_changed,link_multi-m) @{ echo $(@:.o=.ko); echo $(link_multi_deps); \ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) + +$(call multi_depend, $(multi-used-y), .o, -objs -y) $(call multi_depend, $(multi-used-m), .o, -objs -y -m) targets += $(multi-used-m) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 7d4af0d0accb..dc2d4be47ac8 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -83,12 +83,28 @@ modpost = scripts/mod/modpost \ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) +# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we +# need to link them into actual objects before passing them to modpost +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) + +ifdef CONFIG_LTO_CLANG +quiet_cmd_cc_lto_link_modules = LD [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $(@) \ + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE + $(call if_changed,cc_lto_link_modules) +endif + # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE +__modpost: $(modules:.ko=$(modpost-ext).o) FORCE $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ @@ -100,7 +116,6 @@ vmlinux.o: FORCE # Declare generated files as targets for modpost $(modules:.ko=.mod.c): __modpost ; - # Step 5), compile all *.mod.c files # modname is set to make c_flags define KBUILD_MODNAME @@ -119,13 +134,24 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Step 6), final link of the modules with optional arch pass after final link quiet_cmd_ld_ko_o = LD [M] $@ + +ifdef CONFIG_LTO_CLANG + cmd_ld_ko_o = \ + $(LD) -r $(LDFLAGS) \ + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ + $(shell [ -s $(@:.ko=.o.symversions) ] && \ + echo -T $(@:.ko=.o.symversions)) \ + -o $@ --whole-archive \ + $(filter-out FORCE,$(^:$(modpost-ext).o=.o)) +else cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -o $@ $(filter-out FORCE,$^) ; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif -$(modules): %.ko :%.o %.mod.o FORCE +$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE +$(call if_changed,ld_ko_o) targets += $(modules) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index c8cf45362bd6..a64e3fd412ab 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -60,6 +60,38 @@ archive_builtin() ${AR} rcsTP${KBUILD_ARFLAGS} built-in.a \ ${KBUILD_VMLINUX_INIT} \ ${KBUILD_VMLINUX_MAIN} + + # rebuild with llvm-ar to update the symbol table + if [ -n "${CONFIG_LTO_CLANG}" ]; then + mv -f built-in.a built-in.a.tmp + ${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.a $(${AR} t built-in.a.tmp) + rm -f built-in.a.tmp + fi +} + +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions +modversions() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return + fi + + if [ -z "${CONFIG_MODVERSIONS}" ]; then + return + fi + + rm -f .tmp_symversions + + for a in built-in.a ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions + fi + done + done + + echo "-T .tmp_symversions" } # Link of vmlinux.o used for section mismatch analysis @@ -75,7 +107,13 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" - ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO vmlinux.o + fi + + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects} } # Link of vmlinux @@ -87,13 +125,20 @@ vmlinux_link() local objects if [ "${SRCARCH}" != "um" ]; then - objects="--whole-archive \ - built-in.a \ - --no-whole-archive \ - --start-group \ - ${KBUILD_VMLINUX_LIBS} \ - --end-group \ - ${1}" + if [ -z "${CONFIG_LTO_CLANG}" ]; then + objects="--whole-archive \ + built-in.a \ + --no-whole-archive \ + --start-group \ + ${KBUILD_VMLINUX_LIBS} \ + --end-group \ + ${1}" + else + objects="--start-group \ + vmlinux.o \ + --end-group \ + ${1}" + fi ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ -T ${lds} ${objects} @@ -114,7 +159,6 @@ vmlinux_link() fi } - # Create ${2} .o file with all symbols from the ${1} object file kallsyms() { @@ -159,6 +203,7 @@ cleanup() { rm -f .tmp_System.map rm -f .tmp_kallsyms* + rm -f .tmp_symversions rm -f .tmp_vmlinux* rm -f built-in.a rm -f System.map @@ -220,7 +265,6 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init archive_builtin #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 858cbe56b100..ca5bb33d7460 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -145,6 +145,9 @@ static struct module *new_module(const char *modname) p[strlen(p) - 2] = '\0'; mod->is_dot_o = 1; } + /* strip trailing .lto */ + if (strends(p, ".lto")) + p[strlen(p) - 4] = '\0'; /* add to list */ mod->name = p; @@ -1927,6 +1930,10 @@ static char *remove_dot(char *s) size_t m = strspn(s + n + 1, "0123456789"); if (m && (s[n + m] == '.' || s[n + m] == 0)) s[n] = 0; + + /* strip trailing .lto */ + if (strends(s, ".lto")) + s[strlen(s) - 4] = '\0'; } return s; } From c274f8308e6c6519fffa34d0247a3e4d71ce7c49 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 8 May 2019 13:22:12 -0700 Subject: [PATCH 008/152] FROMLIST: kbuild: fix dynamic ftrace with clang LTO With CONFIG_LTO_CLANG enabled, LLVM IR won't be compiled into object files until modpost_link. This change postpones calls to recordmcount until after this step. In order to exclude ftrace_process_locs from inspection, we add a new code section .text..ftrace, which we tell recordmcount to ignore, and a __norecordmcount attribute for moving functions to this section. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: Iba2c053968206acf533fadab1eb34a743b5088ee (am from https://patchwork.kernel.org/patch/10060327/) Signed-off-by: Sami Tolvanen --- arch/Kconfig | 2 +- include/asm-generic/vmlinux.lds.h | 1 + include/linux/compiler-clang.h | 7 +++++++ include/linux/compiler_types.h | 4 ++++ kernel/trace/ftrace.c | 6 +++--- scripts/Makefile.build | 14 +++++++++++++- scripts/Makefile.modpost | 4 ++++ scripts/link-vmlinux.sh | 18 ++++++++++++++++++ scripts/recordmcount.c | 3 ++- 9 files changed, 53 insertions(+), 6 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 4572791825b6..0b4e93b91568 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -497,7 +497,7 @@ config LTO_NONE config LTO_CLANG bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" depends on ARCH_SUPPORTS_LTO_CLANG - depends on !FTRACE_MCOUNT_RECORD + depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT depends on CC_IS_CLANG && LD_IS_LLD select LTO help diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 65cbde21a9f4..f12e43993d66 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -492,6 +492,7 @@ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ *(.text..refcount) \ + *(.text..ftrace) \ *(.ref.text) \ MEM_KEEP(init.text*) \ MEM_KEEP(exit.text*) \ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d756f2318efe..64eb811a11e4 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -43,3 +43,10 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #define __assume_aligned(a, ...) \ __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) + +#ifdef CONFIG_LTO_CLANG +#ifdef CONFIG_FTRACE_MCOUNT_RECORD +#define __norecordmcount \ + __attribute__((__section__(".text..ftrace"))) +#endif +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index db192becfec4..4f498e3cc760 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -248,6 +248,10 @@ struct ftrace_likely_data { # define __gnu_inline #endif +#ifndef __norecordmcount +#define __norecordmcount +#endif + /* * Force always-inline if the user requests it so via the .config. * GCC does not warn about unused static inline functions for diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1688782f3dfb..8edbec4ec3fe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5500,9 +5500,9 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } -static int ftrace_process_locs(struct module *mod, - unsigned long *start, - unsigned long *end) +static int __norecordmcount ftrace_process_locs(struct module *mod, + unsigned long *start, + unsigned long *end) { struct ftrace_page *start_pg; struct ftrace_page *pg; diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 30b9860ece7b..585ea170818f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -223,6 +223,12 @@ ifdef BUILD_C_RECORDMCOUNT ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") RECORDMCOUNT_FLAGS = -w endif + +ifdef CONFIG_LTO_CLANG +# With LTO, we postpone running recordmcount until after the LTO link step, so +# let's export the parameters for the link script. +export RECORDMCOUNT_FLAGS +else # Due to recursion, we must skip empty.o. # The empty.o file is created in the make process in order to determine # the target endianness and word size. It is made before all other C @@ -231,22 +237,28 @@ sub_cmd_record_mcount = \ if [ $(@) != "scripts/mod/empty.o" ]; then \ $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \ fi; +endif + recordmcount_source := $(srctree)/scripts/recordmcount.c \ $(srctree)/scripts/recordmcount.h -else +else # !BUILD_C_RECORDMCOUNT sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)" \ "$(LD) $(KBUILD_LDFLAGS)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; + recordmcount_source := $(srctree)/scripts/recordmcount.pl endif # BUILD_C_RECORDMCOUNT + +ifndef CONFIG_LTO_CLANG cmd_record_mcount = \ if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ "$(CC_FLAGS_FTRACE)" ]; then \ $(sub_cmd_record_mcount) \ fi; +endif # CONFIG_LTO_CLANG endif # CC_USING_RECORD_MCOUNT endif # CONFIG_FTRACE_MCOUNT_RECORD diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index dc2d4be47ac8..1771a311ad66 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -143,6 +143,10 @@ ifdef CONFIG_LTO_CLANG echo -T $(@:.ko=.o.symversions)) \ -o $@ --whole-archive \ $(filter-out FORCE,$(^:$(modpost-ext).o=.o)) + + ifdef CONFIG_FTRACE_MCOUNT_RECORD + cmd_ld_ko_o += ; $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) $@ + endif else cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a64e3fd412ab..b6473a89bd4b 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -116,6 +116,19 @@ modpost_link() ${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects} } +# If CONFIG_LTO_CLANG is selected, we postpone running recordmcount until +# we have compiled LLVM IR to an object file. +recordmcount() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return + fi + + if [ -n "${CONFIG_FTRACE_MCOUNT_RECORD}" ]; then + scripts/recordmcount ${RECORDMCOUNT_FLAGS} $* + fi +} + # Link of vmlinux # ${1} - optional extra .o files # ${2} - output file @@ -270,6 +283,11 @@ modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o +if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Call recordmcount if needed + recordmcount vmlinux.o +fi + kallsymso="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 895c40e8679f..7a9ad239da57 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -406,7 +406,8 @@ is_mcounted_section_name(char const *const txtname) strcmp(".softirqentry.text", txtname) == 0 || strcmp(".kprobes.text", txtname) == 0 || strcmp(".cpuidle.text", txtname) == 0 || - strcmp(".text.unlikely", txtname) == 0; + (strncmp(".text.", txtname, 6) == 0 && + strcmp(".text..ftrace", txtname) != 0); } /* 32 bit and 64 bit are very similar */ From af85e9f502888cf65548a0367553e9a6be894846 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 14 Sep 2018 09:17:55 -0700 Subject: [PATCH 009/152] ANDROID: kbuild: disable LTO_CLANG with KASAN Using LTO with KASAN currently results in "inlinable function call in a function with debug info must have a !dbg location" errors for memset and several of the __asan_report_* functions. As combining these options doesn't provide significant benefits, this change disables LTO_CLANG when KASAN is selected. Bug: 113246877 Bug: 133186739 Change-Id: I06cd27d1e9ab74627de4771548453abe3593fcb5 Signed-off-by: Sami Tolvanen --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index 0b4e93b91568..cb8ef5e2b322 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -498,6 +498,7 @@ config LTO_CLANG bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" depends on ARCH_SUPPORTS_LTO_CLANG depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT + depends on !KASAN depends on CC_IS_CLANG && LD_IS_LLD select LTO help From e8e77de9b437502634129774fd761092e0c4b787 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 10 Apr 2017 12:32:24 -0700 Subject: [PATCH 010/152] FROMLIST: scripts/mod: disable LTO for empty.c With CONFIG_LTO_CLANG, clang generates LLVM IR instead of ELF object files. As empty.o is used for probing target properties, disable LTO for it to produce an object file instead. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: I0c7ac7ee0134465cac4a8c3a9c7e8b6347076a2b (am from https://patchwork.kernel.org/patch/10060317/) Signed-off-by: Sami Tolvanen --- scripts/mod/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 42c5d50f2bcc..e014b2fdd069 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD := y +CFLAGS_empty.o += $(DISABLE_LTO) hostprogs-y := modpost mk_elfconfig always := $(hostprogs-y) empty.o From 33f52ffaf9e603508abb14e7b63cbee48b7eca45 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 1 May 2017 11:08:05 -0700 Subject: [PATCH 011/152] FROMLIST: efi/libstub: disable LTO With CONFIG_LTO_CLANG, we produce LLVM IR instead of object files. Since LTO is not really needed here and the Makefile assumes we produce an object file, disable LTO for libstub. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: Ieaa3d7e2c694655788f480f4351bf7c4d3fce090 (am from https://patchwork.kernel.org/patch/10060309/) Acked-by: Ard Biesheuvel Signed-off-by: Sami Tolvanen --- drivers/firmware/efi/libstub/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index d9845099635e..818d399245e5 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,8 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) \ - -D__DISABLE_EXPORTS + -D__DISABLE_EXPORTS \ + $(DISABLE_LTO) GCOV_PROFILE := n KASAN_SANITIZE := n From acd5c72821936b2f3a621e67c7e45dbd49122902 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 5 Dec 2017 12:54:13 -0800 Subject: [PATCH 012/152] ANDROID: drivers/misc: disable LTO for lkdtm_rodata.o Disable LTO for lkdtm_rodata.o to allow objcopy to be used to manipulate sections. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: Iedd1a3a2a9b06f44e7ceb6ac287ea764eaf5ef0a Signed-off-by: Sami Tolvanen --- drivers/misc/lkdtm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 3370a4138e94..1578589091a8 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -10,6 +10,7 @@ lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o lkdtm-$(CONFIG_LKDTM) += usercopy.o KCOV_INSTRUMENT_rodata.o := n +CFLAGS_lkdtm_rodata.o += $(DISABLE_LTO) OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ From 9730a6289c31ff03f15671ca7dea848fc14b9bfe Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 8 Dec 2017 15:49:24 -0800 Subject: [PATCH 013/152] ANDROID: init: ensure initcall ordering with LTO With LTO, LLVM sorts initcalls in a single translation unit alphabetically based on the name of the function (or actually, the variable stored in the initcall section). Use __COUNTER__ in the variable name in an attempt to preserve the intended order. In addition, LTO requires all initcall variables to have unique names. Use __LINE__ in the name to reduce the chance of name collisions. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: I4fa3cb93cba967a1440ac53328eb6b8ac649ff36 Signed-off-by: Sami Tolvanen --- include/linux/init.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index 2538d176dd1f..892fe8e04c82 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -167,6 +167,20 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ +#ifdef CONFIG_LTO_CLANG + /* + * Use __COUNTER__ prefix in the variable to help ensure ordering + * inside a compilation unit that defines multiple initcalls, and + * __LINE__ to help prevent naming collisions. + */ + #define ___initcall_name2(c, l, fn, id) __initcall_##c##_##l##_##fn##id + #define ___initcall_name1(c, l, fn, id) ___initcall_name2(c, l, fn, id) + #define __initcall_name(fn, id) \ + ___initcall_name1(__COUNTER__, __LINE__, fn, id) +#else + #define __initcall_name(fn, id) __initcall_##fn##id +#endif + /* * initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined @@ -187,12 +201,12 @@ extern bool initcall_debug; #define ___define_initcall(fn, id, __sec) \ __ADDRESSABLE(fn) \ asm(".section \"" #__sec ".init\", \"a\" \n" \ - "__initcall_" #fn #id ": \n" \ + __stringify(__initcall_name(fn, id)) ": \n" \ ".long " #fn " - . \n" \ ".previous \n"); #else #define ___define_initcall(fn, id, __sec) \ - static initcall_t __initcall_##fn##id __used \ + static initcall_t __initcall_name(fn, id) __used \ __attribute__((__section__(#__sec ".init"))) = fn; #endif From 1d7afc15c7390fa3f800b07e9fa56ee346520f22 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 26 Apr 2019 14:55:29 -0700 Subject: [PATCH 014/152] BACKPORT: arm64: sysreg: Make mrs_s and msr_s macros work with Clang and LTO Clang's integrated assembler does not allow assembly macros defined in one inline asm block using the .macro directive to be used across separate asm blocks. LLVM developers consider this a feature and not a bug, recommending code refactoring: https://bugs.llvm.org/show_bug.cgi?id=19749 As binutils doesn't allow macros to be redefined, this change uses UNDEFINE_MRS_S and UNDEFINE_MSR_S to define corresponding macros in-place and workaround gcc and clang limitations on redefining macros across different assembler blocks. Specifically, the current state after preprocessing looks like this: asm volatile(".macro mXX_s ... .endm"); void f() { asm volatile("mXX_s a, b"); } With GCC, it gives macro redefinition error because sysreg.h is included in multiple source files, and assembler code for all of them is later combined for LTO (I've seen an intermediate file with hundreds of identical definitions). With clang, it gives macro undefined error because clang doesn't allow sharing macros between inline asm statements. I also seem to remember catching another sort of undefined error with GCC due to reordering of macro definition asm statement and generated asm code for function that uses the macro. The solution with defining and undefining for each use, while certainly not elegant, satisfies both GCC and clang, LTO and non-LTO. Co-developed-by: Alex Matveev Co-developed-by: Yury Norov Co-developed-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Will Deacon (cherry picked from commit be604c616ca71cbf5c860d0cfa4595128ab74189) Change-Id: I803fff57f639b0921ef81f90ec4befe802e7eecf Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/kvm_hyp.h | 4 +-- arch/arm64/include/asm/sysreg.h | 45 +++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 384c34397619..f150c0f44bbf 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -28,7 +28,7 @@ ({ \ u64 reg; \ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ - "mrs_s %0, " __stringify(r##vh),\ + __mrs_s("%0", r##vh), \ ARM64_HAS_VIRT_HOST_EXTN) \ : "=r" (reg)); \ reg; \ @@ -38,7 +38,7 @@ do { \ u64 __val = (u64)(v); \ asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ - "msr_s " __stringify(r##vh) ", %x0",\ + __msr_s(r##vh, "%x0"), \ ARM64_HAS_VIRT_HOST_EXTN) \ : : "rZ" (__val)); \ } while (0) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1470931b897..d1f69b467e64 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -684,20 +684,39 @@ #include #include -asm( -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ .L__reg_num_x\\num, \\num\n" -" .endr\n" +#define __DEFINE_MRS_MSR_S_REGNUM \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__reg_num_x\\num, \\num\n" \ +" .endr\n" \ " .equ .L__reg_num_xzr, 31\n" -"\n" -" .macro mrs_s, rt, sreg\n" - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MRS_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro mrs_s, rt, sreg\n" \ + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ " .endm\n" -"\n" -" .macro msr_s, sreg, rt\n" - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MSR_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro msr_s, sreg, rt\n" \ + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ " .endm\n" -); + +#define UNDEFINE_MRS_S \ +" .purgem mrs_s\n" + +#define UNDEFINE_MSR_S \ +" .purgem msr_s\n" + +#define __mrs_s(v, r) \ + DEFINE_MRS_S \ +" mrs_s " v ", " __stringify(r) "\n" \ + UNDEFINE_MRS_S + +#define __msr_s(r, v) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", " v "\n" \ + UNDEFINE_MSR_S /* * Unlike read_cpuid, calls to read_sysreg are never expected to be @@ -725,13 +744,13 @@ asm( */ #define read_sysreg_s(r) ({ \ u64 __val; \ - asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ - asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ + asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) /* From 723ef158f0694fe99a59061aea37e00e6f22ec57 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 13 Nov 2017 08:19:17 -0800 Subject: [PATCH 015/152] FROMLIST: arm64: kvm: use -fno-jump-tables with clang Starting with LLVM r308050, clang generates a jump table with EL1 virtual addresses in __init_stage2_translation, which results in a kernel panic when booting at EL2: Kernel panic - not syncing: HYP panic: PS:800003c9 PC:ffff0000089e6fd8 ESR:86000004 FAR:ffff0000089e6fd8 HPFAR:0000000009825000 PAR:0000000000000000 VCPU:000804fc20001221 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-rc7-dirty #3 Hardware name: ARM Juno development board (r1) (DT) Call trace: [] dump_backtrace+0x0/0x34c [] show_stack+0x18/0x20 [] dump_stack+0xc4/0xfc [] panic+0x138/0x2b4 [] panic+0x0/0x2b4 SMP: stopping secondary CPUs SMP: failed to stop secondary CPUs 0-3,5 Kernel Offset: disabled CPU features: 0x002086 Memory Limit: none ---[ end Kernel panic - not syncing: HYP panic: PS:800003c9 PC:ffff0000089e6fd8 ESR:86000004 FAR:ffff0000089e6fd8 HPFAR:0000000009825000 PAR:0000000000000000 VCPU:000804fc20001221 This change adds -fno-jump-tables to arm64/hyp to work around the bug. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: I1257be1febdcbfcc886fe6183c698b7a98d2a153 (am from https://patchwork.kernel.org/patch/10060301/) Suggested-by: AKASHI Takahiro Signed-off-by: Sami Tolvanen --- arch/arm64/kvm/hyp/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 2fabc2dc1966..78e8a37fba9a 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,6 +6,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) +ifeq ($(cc-name),clang) +ccflags-y += -fno-jump-tables +endif + KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o From 864f2bc0e4950b1ad9f18556815752c42beab9bb Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 19 Dec 2017 08:55:23 -0800 Subject: [PATCH 016/152] ANDROID: arm64: vdso: disable LTO Due to a bug in clang, vdso fails to build when both LTO_CLANG and CC_OPTIMIZE_FOR_SIZE are enabled: https://bugs.llvm.org/show_bug.cgi?id=32155 Disable LTO for vdso to work around the problem. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: I1d0279535fd389db4c829e4556f9ef728f240a34 Signed-off-by: Sami Tolvanen --- arch/arm64/kernel/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b215c712d897..ef3f9d9d4062 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -15,6 +15,7 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ccflags-y += $(DISABLE_LTO) # Disable gcov profiling for VDSO code GCOV_PROFILE := n From 3bd6dd6a79706abe1bef1fd7d8bf3b04e530ef17 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 6 Feb 2019 12:51:14 -0800 Subject: [PATCH 017/152] ANDROID: arm64: lse: fix LSE atomics with LTO LLVM's integrated assembler is always used for inline assembly with CONFIG_LTO_CLANG. Unlike gcc, LLVM considers each inline assembly block to be independent and therefore, any preambles that enable features must be included in each block. This change adds the necessary preamble to ARM64_LSE_ATOMIC_INSN to allow CONFIG_ARM64_LSE_ATOMICS to be enabled with LTO. Bug: 117299373 Bug: 133186739 Change-Id: Icc06361dc2a2dba0f5f967d7f540cac2753b3e9c Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/lse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 8262325e2fc6..8be3b0f85182 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -20,7 +20,12 @@ #else /* __ASSEMBLER__ */ +#ifdef CONFIG_LTO_CLANG +#define __LSE_PREAMBLE ".arch armv8-a+lse\n" +#else __asm__(".arch_extension lse"); +#define __LSE_PREAMBLE +#endif /* Move the ll/sc atomics out-of-line */ #define __LL_SC_INLINE notrace @@ -33,7 +38,7 @@ __asm__(".arch_extension lse"); /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) #endif /* __ASSEMBLER__ */ #else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ From 8108aab7a0ee7739c106d2612dd55734a7985f0e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 25 Apr 2019 16:08:55 -0700 Subject: [PATCH 018/152] ANDROID: arm64: add atomic_ll_sc.o to obj-y if using lld __ll_sc_* functions are only referenced in inline assembly, which means lld won't see them when CONFIG_LTO_CLANG is enabled due to a bug in LLVM: https://bugs.llvm.org/show_bug.cgi?id=35841 When LTO and CONFIG_ARM64_LSE_ATOMICS are both enabled, linking fails with the following type of errors: ld.lld: error: relocation R_AARCH64_CALL26 cannot refer to absolute symbol: __ll_sc_atomic64_add_return_acquire >>> defined in vmlinux.o >>> referenced by atomic_lse.h:299 (arch/arm64/include/asm/atomic_lse.h:299) >>> vmlinux.o:(kernel_init_freeable) ... This change works around the problem by always linking in atomic_ll_sc.o to vmlinux, instead of placing it in arch/arm64/lib/lib.a. Bug: 63740206 Bug: 117299373 Bug: 124318741 Bug: 133186739 Change-Id: Idf2211034cce9cb0b6b2007bbdb1dfc1c1a1053d Signed-off-by: Sami Tolvanen --- arch/arm64/lib/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 5df2d611b77d..33ce9a5c324b 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -11,7 +11,12 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # patching of the bl instruction in the caller with an atomic instruction # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. +ifeq ($(CONFIG_LD_IS_LLD), y) +# https://bugs.llvm.org/show_bug.cgi?id=35841 +obj-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +else lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +endif CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ From 2410c05496d8f69902d8cac39f5d405e2ebab5f3 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 25 Apr 2019 14:05:11 -0700 Subject: [PATCH 019/152] ANDROID: arm64: disable HAVE_ARCH_PREL32_RELOCATIONS with LTO_CLANG Disable HAVE_ARCH_PREL32_RELOCATIONS to stop LLVM from reordering initcalls. Bug: 133186739 Change-Id: I8fd9e24c89986b5a41813e23d28778839dd3abdf Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b8aa0b81f4b2..5a2c779bc778 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -108,7 +108,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PREL32_RELOCATIONS + select HAVE_ARCH_PREL32_RELOCATIONS if !LTO_CLANG select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST From 2e449d9900d29d5714becea0ec981709729f6dc7 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 2 Nov 2017 09:34:42 -0700 Subject: [PATCH 020/152] FROMLIST: arm64: select ARCH_SUPPORTS_LTO_CLANG Allow CONFIG_LTO_CLANG to be enabled for the architecture. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: Id8e06b49877c4de2f15b51fc432d601b83b2c68f (am from https://patchwork.kernel.org/patch/10060333/) Signed-off-by: Sami Tolvanen --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5a2c779bc778..2edce73ee611 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -57,6 +57,7 @@ config ARM64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE + select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG select ARCH_SUPPORTS_NUMA_BALANCING From 4976b0dad5a8737b228074ffaa9807b3a0610511 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 25 Apr 2019 16:09:05 -0700 Subject: [PATCH 021/152] ANDROID: add support for clang Control Flow Integrity (CFI) This change adds the CONFIG_CFI_CLANG option, CFI error handling, and a faster look-up table for cross module CFI checks. Bug: 67506682 Bug: 133186739 Change-Id: Ic009f0a629b552a0eb16e6d89808c7029e91447d Signed-off-by: Sami Tolvanen --- Makefile | 26 +++ arch/Kconfig | 28 +++ include/asm-generic/vmlinux.lds.h | 3 + include/linux/cfi.h | 38 ++++ include/linux/compiler-clang.h | 2 + include/linux/compiler_types.h | 4 + include/linux/init.h | 2 +- include/linux/module.h | 5 + init/Kconfig | 2 +- kernel/Makefile | 4 + kernel/cfi.c | 306 ++++++++++++++++++++++++++++++ kernel/module.c | 27 +++ net/Kconfig | 1 + 13 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 include/linux/cfi.h create mode 100644 kernel/cfi.c diff --git a/Makefile b/Makefile index f70ef17a3d2f..709c6c0b3d1f 100644 --- a/Makefile +++ b/Makefile @@ -821,6 +821,32 @@ DISABLE_LTO := $(DISABLE_LTO_CLANG) export LTO_CFLAGS DISABLE_LTO endif +ifdef CONFIG_CFI_CLANG +cfi-clang-flags += -fsanitize=cfi $(call cc-option, -fsplit-lto-unit) +DISABLE_CFI_CLANG := -fno-sanitize=cfi +ifdef CONFIG_MODULES +cfi-clang-flags += -fsanitize-cfi-cross-dso +DISABLE_CFI_CLANG += -fno-sanitize-cfi-cross-dso +endif +ifdef CONFIG_CFI_PERMISSIVE +cfi-clang-flags += -fsanitize-recover=cfi -fno-sanitize-trap=cfi +endif + +# also disable CFI when LTO is disabled +DISABLE_LTO_CLANG += $(DISABLE_CFI_CLANG) +# allow disabling only clang CFI where needed +export DISABLE_CFI_CLANG +endif + +ifdef CONFIG_CFI +CFI_CFLAGS := $(cfi-clang-flags) +KBUILD_CFLAGS += $(CFI_CFLAGS) + +DISABLE_CFI := $(DISABLE_CFI_CLANG) +DISABLE_LTO += $(DISABLE_CFI) +export CFI_CFLAGS DISABLE_CFI +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) diff --git a/arch/Kconfig b/arch/Kconfig index cb8ef5e2b322..30346f3d86be 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -513,6 +513,34 @@ config LTO_CLANG endchoice +config CFI + bool + +config CFI_PERMISSIVE + bool "Use CFI in permissive mode" + depends on CFI + help + When selected, Control Flow Integrity (CFI) violations result in a + warning instead of a kernel panic. This option is useful for finding + CFI violations in drivers during development. + +config CFI_CLANG + bool "Use clang Control Flow Integrity (CFI) (EXPERIMENTAL)" + depends on LTO_CLANG + depends on KALLSYMS + select CFI + help + This option enables clang Control Flow Integrity (CFI), which adds + runtime checking for indirect function calls. + +config CFI_CLANG_SHADOW + bool "Use CFI shadow to speed up cross-module checks" + default y + depends on CFI_CLANG + help + If you select this option, the kernel builds a fast look-up table of + CFI check functions in loaded modules to reduce overhead. + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f12e43993d66..8667538d0b4b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -68,6 +68,7 @@ */ #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#define TEXT_CFI_MAIN .text.[0-9a-zA-Z_]*.cfi #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* @@ -75,6 +76,7 @@ #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text +#define TEXT_CFI_MAIN .text.cfi #define DATA_MAIN .data #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata @@ -491,6 +493,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + *(TEXT_CFI_MAIN) \ *(.text..refcount) \ *(.text..ftrace) \ *(.ref.text) \ diff --git a/include/linux/cfi.h b/include/linux/cfi.h new file mode 100644 index 000000000000..e27033d5dd53 --- /dev/null +++ b/include/linux/cfi.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_CFI_H +#define _LINUX_CFI_H + +#include + +#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_MODULES + +typedef void (*cfi_check_fn)(uint64_t, void *, void *); + +/* Compiler-generated function in each module, and the kernel */ +#define CFI_CHECK_FN __cfi_check +#define CFI_CHECK_FN_NAME __stringify(CFI_CHECK_FN) + +extern void CFI_CHECK_FN(uint64_t, void *, void *); + +#ifdef CONFIG_CFI_CLANG_SHADOW +extern void cfi_module_add(struct module *mod, unsigned long min_addr, + unsigned long max_addr); + +extern void cfi_module_remove(struct module *mod, unsigned long min_addr, + unsigned long max_addr); +#else +static inline void cfi_module_add(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ +} + +static inline void cfi_module_remove(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ +} +#endif /* CONFIG_CFI_CLANG_SHADOW */ + +#endif /* CONFIG_MODULES */ +#endif /* CONFIG_CFI_CLANG */ + +#endif /* _LINUX_CFI_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 64eb811a11e4..ed0f2ad51ee2 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -49,4 +49,6 @@ #define __norecordmcount \ __attribute__((__section__(".text..ftrace"))) #endif + +#define __nocfi __attribute__((no_sanitize("cfi"))) #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4f498e3cc760..fcfd54fe235d 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -252,6 +252,10 @@ struct ftrace_likely_data { #define __norecordmcount #endif +#ifndef __nocfi +#define __nocfi +#endif + /* * Force always-inline if the user requests it so via the .config. * GCC does not warn about unused static inline functions for diff --git a/include/linux/init.h b/include/linux/init.h index 892fe8e04c82..847e8536228f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline +#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline __nocfi #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) diff --git a/include/linux/module.h b/include/linux/module.h index 2f13c831a0ce..7fe9c296ee0b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -348,6 +349,10 @@ struct module { const s32 *crcs; unsigned int num_syms; +#ifdef CONFIG_CFI_CLANG + cfi_check_fn cfi_check; +#endif + /* Kernel parameters. */ #ifdef CONFIG_SYSFS struct mutex param_lock; diff --git a/init/Kconfig b/init/Kconfig index 0332548d7fa7..8a03d8b466a8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2043,7 +2043,7 @@ endif # MODULES config MODULES_TREE_LOOKUP def_bool y - depends on PERF_EVENTS || TRACING + depends on PERF_EVENTS || TRACING || CFI_CLANG config INIT_ALL_POSSIBLE bool diff --git a/kernel/Makefile b/kernel/Makefile index 7a63d567fdb5..7d146967402a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -34,6 +34,9 @@ KASAN_SANITIZE_kcov.o := n # cond_syscall is currently not LTO compatible CFLAGS_sys_ni.o = $(DISABLE_LTO) +# Don't instrument error handlers +CFLAGS_cfi.o = $(DISABLE_CFI_CLANG) + obj-y += sched/ obj-y += locking/ obj-y += power/ @@ -103,6 +106,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ +obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/cfi.c b/kernel/cfi.c new file mode 100644 index 000000000000..9e9992258b48 --- /dev/null +++ b/kernel/cfi.c @@ -0,0 +1,306 @@ +/* + * CFI (Control Flow Integrity) error and slowpath handling + * + * Copyright (C) 2017 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Compiler-defined handler names */ +#ifdef CONFIG_CFI_PERMISSIVE +#define cfi_failure_handler __ubsan_handle_cfi_check_fail +#define cfi_slowpath_handler __cfi_slowpath_diag +#else /* enforcing */ +#define cfi_failure_handler __ubsan_handle_cfi_check_fail_abort +#define cfi_slowpath_handler __cfi_slowpath +#endif /* CONFIG_CFI_PERMISSIVE */ + +static inline void handle_cfi_failure(void *ptr) +{ +#ifdef CONFIG_CFI_PERMISSIVE + WARN_RATELIMIT(1, "CFI failure (target: %pF):\n", ptr); +#else + pr_err("CFI failure (target: %pF):\n", ptr); + BUG(); +#endif +} + +#ifdef CONFIG_MODULES +#ifdef CONFIG_CFI_CLANG_SHADOW +struct shadow_range { + /* Module address range */ + unsigned long mod_min_addr; + unsigned long mod_max_addr; + /* Module page range */ + unsigned long min_page; + unsigned long max_page; +}; + +#define SHADOW_ORDER 1 +#define SHADOW_PAGES (1 << SHADOW_ORDER) +#define SHADOW_SIZE \ + ((SHADOW_PAGES * PAGE_SIZE - sizeof(struct shadow_range)) / sizeof(u16)) +#define SHADOW_INVALID 0xFFFF + +struct cfi_shadow { + /* Page range covered by the shadow */ + struct shadow_range r; + /* Page offsets to __cfi_check functions in modules */ + u16 shadow[SHADOW_SIZE]; +}; + +static DEFINE_SPINLOCK(shadow_update_lock); +static struct cfi_shadow __rcu *cfi_shadow __read_mostly = NULL; + +static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr) +{ + unsigned long index; + unsigned long page = ptr >> PAGE_SHIFT; + + if (unlikely(page < s->r.min_page)) + return -1; /* Outside of module area */ + + index = page - s->r.min_page; + + if (index >= SHADOW_SIZE) + return -1; /* Cannot be addressed with shadow */ + + return (int)index; +} + +static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s, + int index) +{ + BUG_ON(index < 0 || index >= SHADOW_SIZE); + + if (unlikely(s->shadow[index] == SHADOW_INVALID)) + return 0; + + return (s->r.min_page + s->shadow[index]) << PAGE_SHIFT; +} + +static inline unsigned long shadow_to_page(const struct cfi_shadow *s, + int index) +{ + BUG_ON(index < 0 || index >= SHADOW_SIZE); + + return (s->r.min_page + index) << PAGE_SHIFT; +} + +static void prepare_next_shadow(const struct cfi_shadow __rcu *prev, + struct cfi_shadow *next) +{ + int i, index, check; + + /* Mark everything invalid */ + memset(next->shadow, 0xFF, sizeof(next->shadow)); + + if (!prev) + return; /* No previous shadow */ + + /* If the base address didn't change, update is not needed */ + if (prev->r.min_page == next->r.min_page) { + memcpy(next->shadow, prev->shadow, sizeof(next->shadow)); + return; + } + + /* Convert the previous shadow to the new address range */ + for (i = 0; i < SHADOW_SIZE; ++i) { + if (prev->shadow[i] == SHADOW_INVALID) + continue; + + index = ptr_to_shadow(next, shadow_to_page(prev, i)); + if (index < 0) + continue; + + check = ptr_to_shadow(next, + shadow_to_ptr(prev, prev->shadow[i])); + if (check < 0) + continue; + + next->shadow[index] = (u16)check; + } +} + +static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod) +{ + unsigned long ptr; + unsigned long min_page_addr; + unsigned long max_page_addr; + unsigned long check = (unsigned long)mod->cfi_check; + int check_index = ptr_to_shadow(s, check); + + BUG_ON((check & PAGE_MASK) != check); /* Must be page aligned */ + + if (check_index < 0) + return; /* Module not addressable with shadow */ + + min_page_addr = (unsigned long)mod->core_layout.base & PAGE_MASK; + max_page_addr = (unsigned long)mod->core_layout.base + + mod->core_layout.text_size; + max_page_addr &= PAGE_MASK; + + /* For each page, store the check function index in the shadow */ + for (ptr = min_page_addr; ptr <= max_page_addr; ptr += PAGE_SIZE) { + int index = ptr_to_shadow(s, ptr); + if (index >= 0) { + /* Assume a page only contains code for one module */ + BUG_ON(s->shadow[index] != SHADOW_INVALID); + s->shadow[index] = (u16)check_index; + } + } +} + +static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod) +{ + unsigned long ptr; + unsigned long min_page_addr; + unsigned long max_page_addr; + + min_page_addr = (unsigned long)mod->core_layout.base & PAGE_MASK; + max_page_addr = (unsigned long)mod->core_layout.base + + mod->core_layout.text_size; + max_page_addr &= PAGE_MASK; + + for (ptr = min_page_addr; ptr <= max_page_addr; ptr += PAGE_SIZE) { + int index = ptr_to_shadow(s, ptr); + if (index >= 0) + s->shadow[index] = SHADOW_INVALID; + } +} + +typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *); + +static void update_shadow(struct module *mod, unsigned long min_addr, + unsigned long max_addr, update_shadow_fn fn) +{ + struct cfi_shadow *prev; + struct cfi_shadow *next = (struct cfi_shadow *) + __get_free_pages(GFP_KERNEL, SHADOW_ORDER); + + BUG_ON(!next); + + next->r.mod_min_addr = min_addr; + next->r.mod_max_addr = max_addr; + next->r.min_page = min_addr >> PAGE_SHIFT; + next->r.max_page = max_addr >> PAGE_SHIFT; + + spin_lock(&shadow_update_lock); + prev = rcu_dereference_protected(cfi_shadow, 1); + prepare_next_shadow(prev, next); + + fn(next, mod); + set_memory_ro((unsigned long)next, SHADOW_PAGES); + rcu_assign_pointer(cfi_shadow, next); + + spin_unlock(&shadow_update_lock); + synchronize_rcu(); + + if (prev) { + set_memory_rw((unsigned long)prev, SHADOW_PAGES); + free_pages((unsigned long)prev, SHADOW_ORDER); + } +} + +void cfi_module_add(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ + update_shadow(mod, min_addr, max_addr, add_module_to_shadow); +} +EXPORT_SYMBOL(cfi_module_add); + +void cfi_module_remove(struct module *mod, unsigned long min_addr, + unsigned long max_addr) +{ + update_shadow(mod, min_addr, max_addr, remove_module_from_shadow); +} +EXPORT_SYMBOL(cfi_module_remove); + +static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s, + unsigned long ptr) +{ + int index; + + if (unlikely(!s)) + return NULL; /* No shadow available */ + + if (ptr < s->r.mod_min_addr || ptr > s->r.mod_max_addr) + return NULL; /* Not in a mapped module */ + + index = ptr_to_shadow(s, ptr); + if (index < 0) + return NULL; /* Cannot be addressed with shadow */ + + return (cfi_check_fn)shadow_to_ptr(s, index); +} +#endif /* CONFIG_CFI_CLANG_SHADOW */ + +static inline cfi_check_fn find_module_cfi_check(void *ptr) +{ + struct module *mod; + + preempt_disable(); + mod = __module_address((unsigned long)ptr); + preempt_enable(); + + if (mod) + return mod->cfi_check; + + return CFI_CHECK_FN; +} + +static inline cfi_check_fn find_cfi_check(void *ptr) +{ +#ifdef CONFIG_CFI_CLANG_SHADOW + cfi_check_fn f; + + if (!rcu_access_pointer(cfi_shadow)) + return CFI_CHECK_FN; /* No loaded modules */ + + /* Look up the __cfi_check function to use */ + rcu_read_lock(); + f = ptr_to_check_fn(rcu_dereference(cfi_shadow), (unsigned long)ptr); + rcu_read_unlock(); + + if (f) + return f; + + /* + * Fall back to find_module_cfi_check, which works also for a larger + * module address space, but is slower. + */ +#endif /* CONFIG_CFI_CLANG_SHADOW */ + + return find_module_cfi_check(ptr); +} + +void cfi_slowpath_handler(uint64_t id, void *ptr, void *diag) +{ + cfi_check_fn check = find_cfi_check(ptr); + + if (likely(check)) + check(id, ptr, diag); + else /* Don't allow unchecked modules */ + handle_cfi_failure(ptr); +} +EXPORT_SYMBOL(cfi_slowpath_handler); +#endif /* CONFIG_MODULES */ + +void cfi_failure_handler(void *data, void *ptr, void *vtable) +{ + handle_cfi_failure(ptr); +} +EXPORT_SYMBOL(cfi_failure_handler); + +void __cfi_check_fail(void *data, void *ptr) +{ + handle_cfi_failure(ptr); +} diff --git a/kernel/module.c b/kernel/module.c index 38bf28b5cc20..e1793038823b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2125,6 +2125,8 @@ void __weak module_arch_freeing_init(struct module *mod) { } +static void cfi_cleanup(struct module *mod); + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -2166,6 +2168,10 @@ static void free_module(struct module *mod) /* This may be empty, but that's OK */ disable_ro_nx(&mod->init_layout); + + /* Clean up CFI for the module. */ + cfi_cleanup(mod); + module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); kfree(mod->args); @@ -3353,6 +3359,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; } +static void cfi_init(struct module *mod); + static int post_relocation(struct module *mod, const struct load_info *info) { /* Sort exception table now relocations are done. */ @@ -3365,6 +3373,9 @@ static int post_relocation(struct module *mod, const struct load_info *info) /* Setup kallsyms-specific fields. */ add_kallsyms(mod, info); + /* Setup CFI for the module. */ + cfi_init(mod); + /* Arch-specific module finalizing. */ return module_finalize(info->hdr, info->sechdrs, mod); } @@ -4134,6 +4145,22 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, } #endif /* CONFIG_KALLSYMS */ +static void cfi_init(struct module *mod) +{ +#ifdef CONFIG_CFI_CLANG + mod->cfi_check = + (cfi_check_fn)mod_find_symname(mod, CFI_CHECK_FN_NAME); + cfi_module_add(mod, module_addr_min, module_addr_max); +#endif +} + +static void cfi_cleanup(struct module *mod) +{ +#ifdef CONFIG_CFI_CLANG + cfi_module_remove(mod, module_addr_min, module_addr_max); +#endif +} + /* Maximum number of characters written by module_flags() */ #define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4) diff --git a/net/Kconfig b/net/Kconfig index 228dfa382eec..8cc61e392256 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -287,6 +287,7 @@ config BPF_JIT bool "enable BPF Just In Time compiler" depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT depends on MODULES + depends on !CFI ---help--- Berkeley Packet Filter filtering capabilities are normally handled by an interpreter. This option allows kernel to generate a native From 05fb7fe5f3c3d0d24a39f9510108f73c9ee8e3d6 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 14 Feb 2018 10:26:35 -0800 Subject: [PATCH 022/152] ANDROID: kallsyms: strip the .cfi postfix from symbols with CONFIG_CFI_CLANG With CFI enabled, LLVM appends .cfi to most function names, which potentially breaks user space tools. While stripping the postfix is not optimal either, this should at least create less confusion. Bug: 67506682 Bug: 73328469 Bug: 133186739 Change-Id: I253f34a562629032ddd792b8498e171109ea7cbc Signed-off-by: Sami Tolvanen --- kernel/kallsyms.c | 49 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 02a0b01380d8..672ed40e60c2 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -268,6 +268,24 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } +#ifdef CONFIG_CFI_CLANG +/* + * LLVM appends .cfi to function names when CONFIG_CFI_CLANG is enabled, + * which causes confusion and potentially breaks user space tools, so we + * will strip the postfix from expanded symbol names. + */ +static inline void cleanup_symbol_name(char *s) +{ + char *res; + + res = strrchr(s, '.'); + if (res && !strcmp(res, ".cfi")) + *res = '\0'; +} +#else +static inline void cleanup_symbol_name(char *s) {} +#endif + /* * Lookup an address * - modname is set to NULL if it's in the kernel. @@ -294,7 +312,9 @@ const char *kallsyms_lookup(unsigned long addr, namebuf, KSYM_NAME_LEN); if (modname) *modname = NULL; - return namebuf; + + ret = namebuf; + goto found; } /* See if it's in a module or a BPF JITed image. */ @@ -307,11 +327,16 @@ const char *kallsyms_lookup(unsigned long addr, if (!ret) ret = ftrace_mod_address_lookup(addr, symbolsize, offset, modname, namebuf); + +found: + cleanup_symbol_name(namebuf); return ret; } int lookup_symbol_name(unsigned long addr, char *symname) { + int res; + symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -322,15 +347,23 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - return 0; + goto found; } /* See if it's in a module. */ - return lookup_module_symbol_name(addr, symname); + res = lookup_module_symbol_name(addr, symname); + if (res) + return res; + +found: + cleanup_symbol_name(symname); + return 0; } int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) { + int res; + name[0] = '\0'; name[KSYM_NAME_LEN - 1] = '\0'; @@ -342,10 +375,16 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, kallsyms_expand_symbol(get_symbol_offset(pos), name, KSYM_NAME_LEN); modname[0] = '\0'; - return 0; + goto found; } /* See if it's in a module. */ - return lookup_module_symbol_attrs(addr, size, offset, modname, name); + res = lookup_module_symbol_attrs(addr, size, offset, modname, name); + if (res) + return res; + +found: + cleanup_symbol_name(name); + return 0; } /* Look up a kernel symbol and return it in a text buffer. */ From c4759b6ea8dc4bf5a08775750c798f803a6fe263 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 26 Feb 2019 13:07:52 -0800 Subject: [PATCH 023/152] UPSTREAM: netfilter: xt_IDLETIMER: fix sysfs callback function type Use struct device_attribute instead of struct idletimer_tg_attr, and the correct callback function type to avoid indirect call mismatches with Control Flow Integrity checking. Bug: 117299373 Change-Id: I5cc50171e01a5162ef504a726fe3ed4b51941b34 (cherry-picked from commit 20fdaf6e1e313b4efa48243d5250526eb43ea70d) Signed-off-by: Sami Tolvanen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 559acfaec0e9..f5e165e018d9 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -51,19 +51,13 @@ #include #include -struct idletimer_tg_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); -}; - struct idletimer_tg { struct list_head entry; struct timer_list timer; struct work_struct work; struct kobject *kobj; - struct idletimer_tg_attr attr; + struct device_attribute attr; struct timespec delayed_timer_trigger; struct timespec last_modified_timer; @@ -184,8 +178,8 @@ struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) return NULL; } -static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, - char *buf) +static ssize_t idletimer_tg_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct idletimer_tg *timer; unsigned long expires = 0; @@ -193,7 +187,7 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, mutex_lock(&list_mutex); - timer = __idletimer_tg_find_by_label(attr->name); + timer = __idletimer_tg_find_by_label(attr->attr.name); if (timer) expires = timer->timer.expires; From 2bf50b5a8e1d83537dafb34f614d50eadc02cfbb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 May 2019 09:26:37 -0400 Subject: [PATCH 024/152] FROMLIST: mm: don't cast ->readpage to filler_t for do_read_cache_page We can just pass a NULL filler and do the right thing inside of do_read_cache_page based on the NULL parameter. Signed-off-by: Christoph Hellwig Bug: 133186739 Change-Id: I720afb2e0c28d5649b62ed3852b48ac63dc0d7a8 Link: https://lkml.org/lkml/2019/5/1/294 Signed-off-by: Sami Tolvanen --- include/linux/pagemap.h | 3 +-- mm/filemap.c | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cce0b126c8f0..78cac3fc3ce5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -399,8 +399,7 @@ extern int read_cache_pages(struct address_space *mapping, static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, void *data) { - filler_t *filler = (filler_t *)mapping->a_ops->readpage; - return read_cache_page(mapping, index, filler, data); + return read_cache_page(mapping, index, NULL, data); } /* diff --git a/mm/filemap.c b/mm/filemap.c index b60294923142..99aa6ae0bf19 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2875,7 +2875,11 @@ static struct page *do_read_cache_page(struct address_space *mapping, } filler: - err = filler(data, page); + if (filler) + err = filler(data, page); + else + err = mapping->a_ops->readpage(data, page); + if (err < 0) { put_page(page); return ERR_PTR(err); @@ -2982,9 +2986,7 @@ struct page *read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp) { - filler_t *filler = (filler_t *)mapping->a_ops->readpage; - - return do_read_cache_page(mapping, index, filler, NULL, gfp); + return do_read_cache_page(mapping, index, NULL, NULL, gfp); } EXPORT_SYMBOL(read_cache_page_gfp); From 49ded33701f584eac1c1c0cea68037db34707433 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 May 2019 09:31:53 -0400 Subject: [PATCH 025/152] UPSTREAM: nfs: pass the correct prototype to read_cache_page Fix the callbacks NFS passes to read_cache_page to actually have the proper type expected. Casting around function pointers can easily hide typing bugs, and defeats control flow protection. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker Change-Id: Ia245fc02d550d34d80f4e8ebdeb6b744115ffdb3 (cherry picked from commit a46126ccc77e764429d63bf958d117f607f4b6c6) Signed-off-by: Sami Tolvanen --- fs/nfs/dir.c | 7 ++++--- fs/nfs/symlink.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8bfaa658b2c1..a4ea9ab28096 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -664,8 +664,9 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, * We only need to convert from xdr once so future lookups are much simpler */ static -int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) +int nfs_readdir_filler(void *data, struct page* page) { + nfs_readdir_descriptor_t *desc = data; struct inode *inode = file_inode(desc->file); int ret; @@ -697,8 +698,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { - return read_cache_page(desc->file->f_mapping, - desc->page_index, (filler_t *)nfs_readdir_filler, desc); + return read_cache_page(desc->file->f_mapping, desc->page_index, + nfs_readdir_filler, desc); } /* diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 06eb44b47885..25ba299fdac2 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -26,8 +26,9 @@ * and straight-forward than readdir caching. */ -static int nfs_symlink_filler(struct inode *inode, struct page *page) +static int nfs_symlink_filler(void *data, struct page *page) { + struct inode *inode = data; int error; error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); @@ -65,8 +66,8 @@ static const char *nfs_get_link(struct dentry *dentry, err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) return err; - page = read_cache_page(&inode->i_data, 0, - (filler_t *)nfs_symlink_filler, inode); + page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler, + inode); if (IS_ERR(page)) return ERR_CAST(page); } From ca3858e6b04b3785e38e1cff5bf2e0659f59cade Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 May 2019 09:35:03 -0400 Subject: [PATCH 026/152] FROMLIST: jffs2: pass the correct prototype to read_cache_page Fix the callback jffs2 passes to read_cache_page to actually have the proper type expected. Casting around function pointers can easily hide typing bugs, and defeats control flow protection. Signed-off-by: Christoph Hellwig Bug: 133186739 Change-Id: Ie5bfeff009a555c1894784458d2fe79a87f1be03 Link: https://lkml.org/lkml/2019/5/1/295 Signed-off-by: Sami Tolvanen --- fs/jffs2/file.c | 4 ++-- fs/jffs2/fs.c | 2 +- fs/jffs2/os-linux.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 7d8654a1472e..f8fb89b10227 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -109,9 +109,9 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) return ret; } -int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) +int jffs2_do_readpage_unlock(void *data, struct page *pg) { - int ret = jffs2_do_readpage_nolock(inode, pg); + int ret = jffs2_do_readpage_nolock(data, pg); unlock_page(pg); return ret; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index eab04eca95a3..7fbe8a7843b9 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -686,7 +686,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct page *pg; pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT, - (void *)jffs2_do_readpage_unlock, inode); + jffs2_do_readpage_unlock, inode); if (IS_ERR(pg)) return (void *)pg; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index a2dbbb3f4c74..bd3d5f0ddc34 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -155,7 +155,7 @@ extern const struct file_operations jffs2_file_operations; extern const struct inode_operations jffs2_file_inode_operations; extern const struct address_space_operations jffs2_file_address_operations; int jffs2_fsync(struct file *, loff_t, loff_t, int); -int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); +int jffs2_do_readpage_unlock(void *data, struct page *pg); /* ioctl.c */ long jffs2_ioctl(struct file *, unsigned int, unsigned long); From 7da4aafc5edab874cf66447ce532290dd78aeb9b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 30 Apr 2019 12:57:04 -0700 Subject: [PATCH 027/152] FROMLIST: 9p: pass the correct prototype to read_cache_page Fix the callback 9p passes to read_cache_page to actually have the proper type expected. Casting around function pointers can easily hide typing bugs, and defeats control flow protection. Signed-off-by: Christoph Hellwig Change-Id: I3738b7c0c7f77f85cd7007141400c36516dac6b8 Bug: 133186739 Link: https://lkml.org/lkml/2019/5/1/374 Signed-off-by: Sami Tolvanen --- fs/9p/vfs_addr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index e1cbdfdb7c68..197069303510 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -50,8 +50,9 @@ * @page: structure to page * */ -static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) +static int v9fs_fid_readpage(void *data, struct page *page) { + struct p9_fid *fid = data; struct inode *inode = page->mapping->host; struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE}; struct iov_iter to; @@ -122,7 +123,8 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) return ret; - ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); + ret = read_cache_pages(mapping, pages, v9fs_fid_readpage, + filp->private_data); p9_debug(P9_DEBUG_VFS, " = %d\n", ret); return ret; } From b4a47b3387c7e24a60d7f732f5dc7ca5f2e96645 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 10 May 2018 14:56:41 -0700 Subject: [PATCH 028/152] ANDROID: ftrace: fix function type mismatches This change fixes indirect call mismatches with function and function graph tracing, which trip Control-Flow Integrity (CFI) checking. Bug: 79510107 Bug: 67506682 Bug: 133186739 Change-Id: I5de08c113fb970ffefedce93c58e0161f22c7ca2 Signed-off-by: Sami Tolvanen --- include/linux/ftrace.h | 8 ++++++++ kernel/trace/ftrace.c | 17 +++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dd16e8218db3..677a5eab83c5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -224,8 +224,16 @@ extern enum ftrace_tracing_type_t ftrace_tracing_type; int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); +#ifdef CONFIG_CFI_CLANG +/* Use a C stub with the correct type for CFI */ +static inline void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs) +{ +} +#else extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); +#endif #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8edbec4ec3fe..a6bff0c07032 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -124,8 +124,9 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); #else /* See comment below, where ftrace_ops_list_func is defined */ -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); -#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs); +#define ftrace_ops_list_func ftrace_ops_no_ops #endif /* @@ -6313,7 +6314,8 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, } NOKPROBE_SYMBOL(ftrace_ops_list_func); #else -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } @@ -6776,14 +6778,17 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } +void ftrace_graph_return_stub(struct ftrace_graph_ret *trace) +{ +} + int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { return 0; } /* The callbacks that hook a function */ -trace_func_graph_ret_t ftrace_graph_return = - (trace_func_graph_ret_t)ftrace_stub; +trace_func_graph_ret_t ftrace_graph_return = ftrace_graph_return_stub; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; @@ -7013,7 +7018,7 @@ void unregister_ftrace_graph(void) goto out; ftrace_graph_active--; - ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; + ftrace_graph_return = ftrace_graph_return_stub; ftrace_graph_entry = ftrace_graph_entry_stub; __ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET); From fb0e5d877bcda62edbd9339125bfc8a0a84330ae Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 4 Oct 2018 08:56:16 -0700 Subject: [PATCH 029/152] ANDROID: modpost: add an exception for CFI stubs When CONFIG_CFI_CLANG is enabled, LLVM renames all address taken functions by appending a .cfi postfix to their names, and creates function stubs with the original names. The compiler always injects these stubs to the text section, even if the function itself is placed into init or exit sections, which creates modpost warnings. This commit adds a modpost exception for CFI stubs to prevent the warnings. Bug: 117237524 Bug: 133186739 Change-Id: Ieb8bf20d0c3ad7b7295c535f598370220598cdb0 Signed-off-by: Sami Tolvanen --- scripts/mod/modpost.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index ca5bb33d7460..b16c893e377c 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -945,6 +945,7 @@ static const char *const head_sections[] = { ".head.text*", NULL }; static const char *const linker_symbols[] = { "__init_begin", "_sinittext", "_einittext", NULL }; static const char *const optim_symbols[] = { "*.constprop.*", NULL }; +static const char *const cfi_symbols[] = { "*.cfi", NULL }; enum mismatch { TEXT_TO_ANY_INIT, @@ -1166,6 +1167,16 @@ static const struct sectioncheck *section_mismatch( * fromsec = text section * refsymname = *.constprop.* * + * Pattern 6: + * With CONFIG_CFI_CLANG, clang appends .cfi to all indirectly called + * functions and creates a function stub with the original name. This + * stub is always placed in .text, even if the actual function with the + * .cfi postfix is in .init.text or .exit.text. + * This pattern is identified by + * tosec = init or exit section + * fromsec = text section + * tosym = *.cfi + * **/ static int secref_whitelist(const struct sectioncheck *mismatch, const char *fromsec, const char *fromsym, @@ -1204,6 +1215,12 @@ static int secref_whitelist(const struct sectioncheck *mismatch, match(fromsym, optim_symbols)) return 0; + /* Check for pattern 6 */ + if (match(fromsec, text_sections) && + match(tosec, init_exit_sections) && + match(tosym, cfi_symbols)) + return 0; + return 1; } From 568a3e2eeedb5672578a09ec193791c7d34b99ac Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 25 Apr 2019 14:06:13 -0700 Subject: [PATCH 030/152] FROMLIST: arm64: fix syscall_fn_t type Syscall wrappers in use const struct pt_regs * as the argument type. Use const in syscall_fn_t as well to fix indirect call type mismatches with Control-Flow Integrity checking. Bug: 133186739 Change-Id: I23d26aef5107985631a9aac500f2ceefc0b25419 Link: https://lkml.org/lkml/2019/5/3/689 Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ad8be16a39c9..58102652bf9e 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -20,7 +20,7 @@ #include #include -typedef long (*syscall_fn_t)(struct pt_regs *regs); +typedef long (*syscall_fn_t)(const struct pt_regs *regs); extern const syscall_fn_t sys_call_table[]; From d09a3e0de2a7bb94b7b6742720136b17d90217c6 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 25 Apr 2019 16:02:04 -0700 Subject: [PATCH 031/152] FROMLIST: arm64: use the correct function type in SYSCALL_DEFINE0 Although a syscall defined using SYSCALL_DEFINE0 doesn't accept parameters, use the correct function type to avoid indirect call type mismatches with Control-Flow Integrity checking. Bug: 133186739 Change-Id: I45d1235f4b93c69451ed3610b676d0948cef5ee1 Link: https://lkml.org/lkml/2019/5/3/691 Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/syscall_wrapper.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index a4477e515b79..507d0ee6bc69 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -30,10 +30,10 @@ } \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#define COMPAT_SYSCALL_DEFINE0(sname) \ - asmlinkage long __arm64_compat_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO); \ - asmlinkage long __arm64_compat_sys_##sname(void) +#define COMPAT_SYSCALL_DEFINE0(sname) \ + asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused); \ + ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO); \ + asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL_COMPAT(name) \ cond_syscall(__arm64_compat_sys_##name); @@ -62,11 +62,11 @@ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __arm64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ - asmlinkage long __arm64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \ + ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ + asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL From f904253e0ec351f566d0577161e040b588042941 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 3 May 2019 08:51:26 -0700 Subject: [PATCH 032/152] FROMLIST: arm64: use the correct function type for __arm64_sys_ni_syscall Calling sys_ni_syscall through a syscall_fn_t pointer trips indirect call Control-Flow Integrity checking due to a function type mismatch. Use SYSCALL_DEFINE0 for __arm64_sys_ni_syscall instead and remove the now unnecessary casts. Bug: 133186739 Change-Id: Id01f505b333b864477335e87e0c3100fa60be1eb Link: https://lkml.org/lkml/2019/5/3/690 Signed-off-by: Sami Tolvanen --- arch/arm64/kernel/sys.c | 14 +++++++++----- arch/arm64/kernel/sys32.c | 12 ++++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index b44065fb1616..4f8e8a7237a8 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -47,22 +47,26 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) return ksys_personality(personality); } +asmlinkage long sys_ni_syscall(void); + +SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} + /* * Wrappers to pass the pt_regs argument. */ #define sys_personality sys_arm64_personality -asmlinkage long sys_ni_syscall(const struct pt_regs *); -#define __arm64_sys_ni_syscall sys_ni_syscall - #undef __SYSCALL #define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); #include #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, +#define __SYSCALL(nr, sym) [nr] = __arm64_##sym, const syscall_fn_t sys_call_table[__NR_syscalls] = { - [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, + [0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall, #include }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index 0f8bcb7de700..f8f6c26cfd32 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -133,17 +133,21 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); } -asmlinkage long sys_ni_syscall(const struct pt_regs *); -#define __arm64_sys_ni_syscall sys_ni_syscall +asmlinkage long sys_ni_syscall(void); + +COMPAT_SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} #undef __SYSCALL #define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); #include #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, +#define __SYSCALL(nr, sym) [nr] = __arm64_##sym, const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { - [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, + [0 ... __NR_compat_syscalls - 1] = __arm64_sys_ni_syscall, #include }; From 04464eb227fb5bbc6b815fd66c1bb1840eb8f074 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 18 Aug 2017 14:31:23 -0700 Subject: [PATCH 033/152] ANDROID: arm64: disable CFI for cpu_replace_ttbr1 Disable CFI to allow an indirect call to a physical address. Bug: 67506682 Bug: 133186739 Change-Id: I0ec38f34245a4ad52f508f6989093526d3bf442f Signed-off-by: Sami Tolvanen --- arch/arm64/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39ec0b8a689e..1b7aa97b9c9a 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -141,7 +141,7 @@ static inline void cpu_install_idmap(void) * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ -static inline void cpu_replace_ttbr1(pgd_t *pgdp) +static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) { typedef void (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1; From f31c0b1dba7bd3a3e258b875dbfdfe1431d07a5d Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 9 Apr 2018 13:48:49 -0700 Subject: [PATCH 034/152] ANDROID: arm64: mark kpti_install_ng_mappings as __nocfi 4.9.93 panics on boot when CFI_CLANG and UNMAP_KERNEL_AT_EL0 are both enabled. From Sami Tolvanen: "kpti_install_ng_mappings makes an indirect call to a physical address, which trips CFI. Adding the __nocfi attribute to this function should fix the problem." Bug: 77811249 Bug: 133186739 Change-Id: I87d1ceb29f1ba2caee8954547596f4236bdfc31f Reported-by: Jean-Baptiste Theou Signed-off-by: Greg Hackmann --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 93f69d82225d..f780d9688856 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -916,7 +916,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return !has_cpuid_feature(entry, scope); } -static void +static void __nocfi kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t); From b53a16001edbea99b52bce817e944f6cf8b8a4c6 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 23 Apr 2018 12:52:07 -0700 Subject: [PATCH 035/152] ANDROID: arm64: kvm: disable CFI Disable CFI for code that runs at EL2 because __cfi_check only understands EL1 addresses. Bug: 67506682 Bug: 133186739 Change-Id: Ia582943be0b31669d88464fd99228a5368b1aa6a Signed-off-by: Sami Tolvanen --- arch/arm64/kvm/hyp/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 78e8a37fba9a..c769fe580daf 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -4,7 +4,8 @@ # ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) + $(DISABLE_STACKLEAK_PLUGIN) \ + $(DISABLE_CFI) ifeq ($(cc-name),clang) ccflags-y += -fno-jump-tables From 8c9fc78531b6620294d0faf75dd9951ca98d1895 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 10 Aug 2017 09:39:53 -0700 Subject: [PATCH 036/152] ANDROID: crypto: arm64/sha: fix CFI in SHA CE Calling a stand-alone assembly function trips clang's Control-Flow Integrity checking. Add static inline stubs to convert indirect calls to direct calls for sha[12]_ce_transform Bug: 67506682 Bug: 133186739 Change-Id: If872f30095994206bc768eee13670be552b2a247 Signed-off-by: Sami Tolvanen --- arch/arm64/crypto/sha1-ce-glue.c | 8 ++++++++ arch/arm64/crypto/sha2-ce-glue.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 17fac2889f56..c3d572af201c 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -29,6 +29,14 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +#ifdef CONFIG_CFI_CLANG +static inline void __cfi_sha1_ce_transform(struct sha1_state *sst, + u8 const *src, int blocks) +{ + sha1_ce_transform((struct sha1_ce_state *)sst, src, blocks); +} +#define sha1_ce_transform __cfi_sha1_ce_transform +#endif const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 261f5195cab7..db37282ca060 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -29,6 +29,14 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +#ifdef CONFIG_CFI_CLANG +static inline void __cfi_sha2_ce_transform(struct sha256_state *sst, + u8 const *src, int blocks) +{ + sha2_ce_transform((struct sha256_ce_state *)sst, src, blocks); +} +#define sha2_ce_transform __cfi_sha2_ce_transform +#endif const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, sst.count); From 59dec116361fe9afb50fb93f1679e7359ab67a49 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 15 May 2019 15:07:52 -0700 Subject: [PATCH 037/152] ANDROID: crypto: arm64/ghash: fix CFI for GHASH CE Calling a stand-alone assembly function trips clang's Control-Flow Integrity checking. Add a static inline stub to convert indirect calls to direct calls for pmull_ghash_update_p(64|8). Bug: 133186739 Change-Id: Ib4899cdae60d83961f78ed3451a919f7c9c713bc Signed-off-by: Sami Tolvanen --- arch/arm64/crypto/ghash-ce-glue.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 067d8937d5af..b2003e2077e0 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -60,6 +60,22 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, struct ghash_key const *k, const char *head); +#ifdef CONFIG_CFI_CLANG +static inline void __cfi_pmull_ghash_update_p64(int blocks, u64 dg[], + const char *src, struct ghash_key const *k, const char *head) +{ + return pmull_ghash_update_p64(blocks, dg, src, k, head); +} +#define pmull_ghash_update_p64 __cfi_pmull_ghash_update_p64 + +static inline void __cfi_pmull_ghash_update_p8(int blocks, u64 dg[], + const char *src, struct ghash_key const *k, const char *head) +{ + return pmull_ghash_update_p8(blocks, dg, src, k, head); +} +#define pmull_ghash_update_p8 __cfi_pmull_ghash_update_p8 +#endif + static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src, struct ghash_key const *k, const char *head); From 2b42717b28ddd8f249be5daa37d974ccbc275b40 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 4 Apr 2019 14:59:57 -0700 Subject: [PATCH 038/152] UPSTREAM: x86/build: Keep local relocations with ld.lld The LLVM linker (ld.lld) defaults to removing local relocations, which causes KASLR boot failures. ld.bfd and ld.gold already handle this correctly. This adds the explicit instruction "--discard-none" during the link phase. There is no change in output for ld.bfd and ld.gold, but ld.lld now produces an image with all the needed relocations. Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Nick Desaulniers Cc: Thomas Gleixner Cc: clang-built-linux@googlegroups.com Cc: x86-ml Link: https://lkml.kernel.org/r/20190404214027.GA7324@beast Link: https://github.com/ClangBuiltLinux/linux/issues/404 Change-Id: If16af606aee95770bee98fc662867115b3a8572a (cherry picked from commit 7c21383f3429dd70da39c0c7f1efa12377a47ab6) Signed-off-by: Sami Tolvanen --- arch/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7aae8cde1dea..ff18580c0445 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -47,7 +47,7 @@ export REALMODE_CFLAGS export BITS ifdef CONFIG_X86_NEED_RELOCS - LDFLAGS_vmlinux := --emit-relocs + LDFLAGS_vmlinux := --emit-relocs --discard-none endif # From cdd93e0809dd5482e60c5b450d114e171b4775ef Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 2 Apr 2019 11:59:46 -0700 Subject: [PATCH 039/152] ANDROID: x86/cpu/vmware: use the full form of inl in VMWARE_PORT LLVM's assembler doesn't accept the short form inl (%%dx) instruction, but instead insists on the output register to be explicitly specified: :1:7: error: invalid operand for instruction inl (%dx) ^ LLVM ERROR: Error parsing inline asm Bug: 133186739 Change-Id: I0519034f4a66bd72f23d206d4638578836a49ff5 Signed-off-by: Sami Tolvanen --- arch/x86/kernel/cpu/vmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d805202c63cd..917840ed5fe4 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -45,7 +45,7 @@ #define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ - __asm__("inl (%%dx)" : \ + __asm__("inl (%%dx), %%eax" : \ "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ "0"(VMWARE_HYPERVISOR_MAGIC), \ "1"(VMWARE_PORT_CMD_##cmd), \ From 5f8f071ece87a6b185db64365cd0adda466c89d0 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 16 Apr 2019 14:40:00 -0700 Subject: [PATCH 040/152] ANDROID: x86/vdso: disable LTO only for VDSO Instead of disabling LTO for all the code in the directory, only disable it for the VDSO itself. Bug: 133186739 Change-Id: I02e34b75e022982f9884e238aec89486890dd4bb Signed-off-by: Sami Tolvanen --- arch/x86/entry/vdso/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 5bfe2243a08f..c66b333a08ac 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,7 +3,6 @@ # Building vDSO images for x86. # -KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y @@ -68,7 +67,7 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(DISABLE_LTO) ifdef CONFIG_RETPOLINE ifneq ($(RETPOLINE_VDSO_CFLAGS),) @@ -139,6 +138,8 @@ KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(LTO_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(CFI_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) From e854ce2c299dfbb3eb9b4a6398fce8096d7d57a1 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 19 Apr 2019 14:10:19 -0700 Subject: [PATCH 041/152] ANDROID: x86: disable HAVE_ARCH_PREL32_RELOCATIONS with LTO_CLANG Disable HAVE_ARCH_PREL32_RELOCATIONS to stop LLVM from reordering initcalls. Bug: 133186739 Change-Id: Ie8f58b53fe8dccb961573f3aa80ab6d8aae16f4d Signed-off-by: Sami Tolvanen --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e76d16ac2776..feef7a28cd71 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -124,7 +124,7 @@ config X86 select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT - select HAVE_ARCH_PREL32_RELOCATIONS + select HAVE_ARCH_PREL32_RELOCATIONS if !LTO_CLANG select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK From 3e1624332ad69171ea8f997ed45d355d5245e50f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 30 Apr 2019 13:23:12 -0700 Subject: [PATCH 042/152] ANDROID: x86: disable STACK_VALIDATION with LTO_CLANG Disable CONFIG_STACK_VALIDATION with LTO, because objtool doesn't understand LLVM IR. Bug: 133186739 Change-Id: I3f2bd3f0ce6c80f88bd00394453b543f884188f2 Signed-off-by: Sami Tolvanen --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index feef7a28cd71..bfc4a817bd3f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -185,7 +185,7 @@ config X86 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR - select HAVE_STACK_VALIDATION if X86_64 + select HAVE_STACK_VALIDATION if X86_64 && !LTO_CLANG select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK From 0a90c27ba0dca14d61a33fe0bd3c22f700370bf5 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 10 Apr 2019 14:17:20 -0700 Subject: [PATCH 043/152] ANDROID: x86: add support for CONFIG_LTO_CLANG Bug: 133186739 Change-Id: I8e7a8976f9381233f28badc324145b9c4c7eacb0 Signed-off-by: Sami Tolvanen --- arch/x86/Kconfig | 1 + arch/x86/Makefile | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bfc4a817bd3f..f85253f01cdb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -78,6 +78,7 @@ config X86 select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_LTO_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ff18580c0445..723bffd979e9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -214,6 +214,11 @@ ifdef CONFIG_X86_64 KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000) endif +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ + -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release From 2d96fcbd8b379773f26079c4e416ab94e2c84f67 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 16 Apr 2019 13:18:39 -0700 Subject: [PATCH 044/152] ANDROID: x86: use the correct function type in SYSCALL_DEFINE0 Although a syscall defined using SYSCALL_DEFINE0 doesn't accept parameters, use the correct function type to avoid type mismatches with Control-Flow Integrity checking. Bug: 133186739 Change-Id: I246d27e4418695231979f4f9a6d98c8eb6776167 Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/syscall_wrapper.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d..5509c1afa7d9 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -48,12 +48,13 @@ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias * named __ia32_sys_*() */ -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(void) + +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL(name) \ cond_syscall(__x64_sys_##name); \ @@ -183,9 +184,9 @@ #ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(void) + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL From 5dda676ccdd33604ffc7dd47625f8358eee0ed5e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 16 Apr 2019 14:08:52 -0700 Subject: [PATCH 045/152] ANDROID: x86: use the correct function type for native_set_fixmap Bug: 133186739 Change-Id: Iae48f393231e53f390bb0bfd5ebbe4bb3c0fd463 Signed-off-by: Sami Tolvanen --- arch/x86/include/asm/fixmap.h | 2 +- arch/x86/mm/pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6390bd8c141b..5e12b2319d7a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -159,7 +159,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 59274e2c1ac4..619101a4c7fc 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -660,7 +660,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { /* Sanitize 'prot' against any unsupported bits: */ From f78329ce400c17339b392c5642a7a7157152c651 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 22 Apr 2019 12:42:48 -0700 Subject: [PATCH 046/152] ANDROID: x86: use the correct function type for sys32_(rt_)sigreturn Use the correct function type to avoid tripping Control-Flow Integrity checking. Bug: 133186739 Change-Id: I6a145ba075f8902a5883e6374328643b3771b1eb Signed-off-by: Sami Tolvanen --- arch/x86/ia32/ia32_signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 86b1341cba9a..7aa46538d53e 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -113,7 +113,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, return err; } -asmlinkage long sys32_sigreturn(void) +asmlinkage long sys32_sigreturn(const struct pt_regs *__unused) { struct pt_regs *regs = current_pt_regs(); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); @@ -139,7 +139,7 @@ asmlinkage long sys32_sigreturn(void) return 0; } -asmlinkage long sys32_rt_sigreturn(void) +asmlinkage long sys32_rt_sigreturn(const struct pt_regs *__unused) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_ia32 __user *frame; From 3215ed434bb8ef5aa12bdee11875c711e88f60c9 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 7 May 2019 13:19:37 -0700 Subject: [PATCH 047/152] ANDROID: x86: use the correct function type for sys_ni_syscall Bug: 133186739 Change-Id: I0d43dc716fad3b987e7e29357dbf84e26c753367 Signed-off-by: Sami Tolvanen --- arch/x86/entry/syscall_32.c | 8 +++----- arch/x86/entry/syscall_64.c | 12 +++++++++--- arch/x86/entry/syscalls/syscall_32.tbl | 4 ++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index aa3336a7cb15..7d17b3addbbb 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,13 +10,11 @@ #ifdef CONFIG_IA32_EMULATION /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); - -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); - +#define __sys_ni_syscall __ia32_sys_ni_syscall #else /* CONFIG_IA32_EMULATION */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __sys_ni_syscall sys_ni_syscall #endif /* CONFIG_IA32_EMULATION */ #include @@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, #include }; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index d5252bc1e380..eb82ad912f82 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -4,11 +4,17 @@ #include #include #include +#include #include #include -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +extern asmlinkage long sys_ni_syscall(void); + +SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} + #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); #include #undef __SYSCALL_64 @@ -20,6 +26,6 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, + [0 ... __NR_syscall_max] = &__x64_sys_ni_syscall, #include }; diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3cf7b533b3d1..13c795b38f15 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -124,7 +124,7 @@ 110 i386 iopl sys_iopl __ia32_sys_iopl 111 i386 vhangup sys_vhangup __ia32_sys_vhangup 112 i386 idle -113 i386 vm86old sys_vm86old sys_ni_syscall +113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall 114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4 115 i386 swapoff sys_swapoff __ia32_sys_swapoff 116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo @@ -177,7 +177,7 @@ 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 -166 i386 vm86 sys_vm86 sys_ni_syscall +166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall 167 i386 query_module 168 i386 poll sys_poll __ia32_sys_poll 169 i386 nfsservctl From 8f83e6e6b2727102902cd2e9418b7baebe0fbded Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Thu, 24 Jan 2019 16:52:59 -0800 Subject: [PATCH 048/152] BACKPORT: drm/amd/display: add -msse2 to prevent Clang from emitting libcalls to undefined SW FP routines arch/x86/Makefile disables SSE and SSE2 for the whole kernel. The AMDGPU drivers modified in this patch re-enable SSE but not SSE2. Turn on SSE2 to support emitting double precision floating point instructions rather than calls to non-existent (usually available from gcc_s or compiler_rt) floating point helper routines. Link: https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html Link: https://github.com/ClangBuiltLinux/linux/issues/327 Cc: stable@vger.kernel.org # 4.19 Reported-by: S, Shirish Reported-by: Matthias Kaehlcke Suggested-by: James Y Knight Suggested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Tested-by: Guenter Roeck Tested-by: Matthias Kaehlcke Tested-by: Nathan Chancellor Reviewed-by: Harry Wentland Signed-off-by: Harry Wentland Signed-off-by: Alex Deucher Change-Id: I653676f209c85d1c27c91af1c9347ee59107b04f (cherry picked from commit 10117450735c7a7c0858095fb46a860e7037cb9a) [ added CONFIG_CC_IS_CLANG ] Link: https://bugs.freedesktop.org/show_bug.cgi?id=109487 Signed-off-by: Sami Tolvanen --- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 6 ++++++ drivers/gpu/drm/amd/display/dc/dml/Makefile | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 95f332ee3e7e..e4a8b3337f5c 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -32,6 +32,12 @@ endif calcs_ccflags := -mhard-float -msse $(cc_stack_align) +# Use -msse2 only with clang: +# https://bugs.freedesktop.org/show_bug.cgi?id=109487 +ifdef CONFIG_CC_IS_CLANG +calcs_cc_flags += -msse2 +endif + CFLAGS_dcn_calcs.o := $(calcs_ccflags) CFLAGS_dcn_calc_auto.o := $(calcs_ccflags) CFLAGS_dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index d97ca6528f9d..648c80c21680 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -32,6 +32,12 @@ endif dml_ccflags := -mhard-float -msse $(cc_stack_align) +# Use -msse2 only with clang: +# https://bugs.freedesktop.org/show_bug.cgi?id=109487 +ifdef CONFIG_CC_IS_CLANG +dml_ccflags += -msse2 +endif + CFLAGS_display_mode_lib.o := $(dml_ccflags) CFLAGS_display_pipe_clocks.o := $(dml_ccflags) CFLAGS_dml1_display_rq_dlg_calc.o := $(dml_ccflags) From e6e368c99975511451b447bfbb59cf708de6a8ec Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 21 May 2019 14:56:39 -0700 Subject: [PATCH 049/152] fs: sdcardfs: Add missing option to show_options unshared_obb was missing from show_options bug: 133257717 Change-Id: I1bc49d1b4098052382a518540e5965e037aa39f1 --- fs/sdcardfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 5e0c39a17251..1240ef2f9d74 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -313,6 +313,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_printf(m, ",reserved=%uMB", opts->reserved_mb); if (opts->nocache) seq_printf(m, ",nocache"); + if (opts->unshared_obb) + seq_printf(m, ",unshared_obb"); return 0; }; From 7761dbf58d2255601c2f71d853f1802810919a31 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 28 Apr 2019 17:25:38 -0400 Subject: [PATCH 050/152] locking/rwsem: Prevent decrement of reader count before increment [ Upstream commit a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 ] During my rwsem testing, it was found that after a down_read(), the reader count may occasionally become 0 or even negative. Consequently, a writer may steal the lock at that time and execute with the reader in parallel thus breaking the mutual exclusion guarantee of the write lock. In other words, both readers and writer can become rwsem owners simultaneously. The current reader wakeup code does it in one pass to clear waiter->task and put them into wake_q before fully incrementing the reader count. Once waiter->task is cleared, the corresponding reader may see it, finish the critical section and do unlock to decrement the count before the count is incremented. This is not a problem if there is only one reader to wake up as the count has been pre-incremented by 1. It is a problem if there are more than one readers to be woken up and writer can steal the lock. The wakeup was actually done in 2 passes before the following v4.9 commit: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") To fix this problem, the wakeup is now done in two passes again. In the first pass, we collect the readers and count them. The reader count is then fully incremented. In the second pass, the waiter->task is then cleared and they are put into wake_q to be woken up later. Signed-off-by: Waiman Long Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/locking/rwsem-xadd.c | 44 +++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index ef909357b84e..e41e4b4b5267 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, { struct rwsem_waiter *waiter, *tmp; long oldcount, woken = 0, adjustment = 0; + struct list_head wlist; /* * Take a peek at the queue head waiter such that we can determine @@ -188,18 +189,42 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * of the queue. We know that woken will be at least 1 as we accounted * for above. Note we increment the 'active part' of the count by the * number of readers before waking any processes up. + * + * We have to do wakeup in 2 passes to prevent the possibility that + * the reader count may be decremented before it is incremented. It + * is because the to-be-woken waiter may not have slept yet. So it + * may see waiter->task got cleared, finish its critical section and + * do an unlock before the reader count increment. + * + * 1) Collect the read-waiters in a separate list, count them and + * fully increment the reader count in rwsem. + * 2) For each waiters in the new list, clear waiter->task and + * put them into wake_q to be woken up later. */ - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { - struct task_struct *tsk; - + list_for_each_entry(waiter, &sem->wait_list, list) { if (waiter->type == RWSEM_WAITING_FOR_WRITE) break; woken++; - tsk = waiter->task; + } + list_cut_before(&wlist, &sem->wait_list, &waiter->list); + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + if (list_empty(&sem->wait_list)) { + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; + } + + if (adjustment) + atomic_long_add(adjustment, &sem->count); + + /* 2nd pass */ + list_for_each_entry_safe(waiter, tmp, &wlist, list) { + struct task_struct *tsk; + + tsk = waiter->task; get_task_struct(tsk); - list_del(&waiter->list); + /* * Ensure calling get_task_struct() before setting the reader * waiter to nil such that rwsem_down_read_failed() cannot @@ -215,15 +240,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, /* wake_q_add() already take the task ref */ put_task_struct(tsk); } - - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; - if (list_empty(&sem->wait_list)) { - /* hit end of list above */ - adjustment -= RWSEM_WAITING_BIAS; - } - - if (adjustment) - atomic_long_add(adjustment, &sem->count); } /* From 393ca9ea37fb1c8cd74838282bf569825d18e3bf Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 14 May 2019 13:24:39 -0700 Subject: [PATCH 051/152] x86/speculation/mds: Revert CPU buffer clear on double fault exit commit 88640e1dcd089879530a49a8d212d1814678dfe7 upstream. The double fault ESPFIX path doesn't return to user mode at all -- it returns back to the kernel by simulating a #GP fault. prepare_exit_to_usermode() will run on the way out of general_protection before running user code. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: Jon Masters Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 04dcbdb80578 ("x86/speculation/mds: Clear CPU buffers on exit to user") Link: http://lkml.kernel.org/r/ac97612445c0a44ee10374f6ea79c222fe22a5c4.1557865329.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 7 ------- arch/x86/kernel/traps.c | 8 -------- 2 files changed, 15 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 534e9baa4e1d..0dc812bb9249 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -158,13 +158,6 @@ Mitigation points mitigated on the return from do_nmi() to provide almost complete coverage. - - Double fault (#DF): - - A double fault is usually fatal, but the ESPFIX workaround, which can - be triggered from user space through modify_ldt(2) is a recoverable - double fault. #DF uses the paranoid exit path, so explicit mitigation - in the double fault handler is required. - - Machine Check Exception (#MC): Another corner case is a #MC which hits between the CPU buffer clear diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0a5efd764914..e6db475164ed 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -388,13 +387,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; - /* - * This situation can be triggered by userspace via - * modify_ldt(2) and the return does not take the regular - * user space exit, so a CPU buffer clear is required when - * MDS mitigation is enabled. - */ - mds_user_clear_cpu_buffers(); return; } #endif From b185029f5c41b7c7e4ac5d0f294cc81e56b4c785 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 14 May 2019 13:24:40 -0700 Subject: [PATCH 052/152] x86/speculation/mds: Improve CPU buffer clear documentation commit 9d8d0294e78a164d407133dea05caf4b84247d6a upstream. On x86_64, all returns to usermode go through prepare_exit_to_usermode(), with the sole exception of do_nmi(). This even includes machine checks -- this was added several years ago to support MCE recovery. Update the documentation. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: Jon Masters Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 04dcbdb80578 ("x86/speculation/mds: Clear CPU buffers on exit to user") Link: http://lkml.kernel.org/r/999fa9e126ba6a48e9d214d2f18dbde5c62ac55c.1557865329.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 0dc812bb9249..5d4330be200f 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -142,38 +142,13 @@ Mitigation points mds_user_clear. The mitigation is invoked in prepare_exit_to_usermode() which covers - most of the kernel to user space transitions. There are a few exceptions - which are not invoking prepare_exit_to_usermode() on return to user - space. These exceptions use the paranoid exit code. + all but one of the kernel to user space transitions. The exception + is when we return from a Non Maskable Interrupt (NMI), which is + handled directly in do_nmi(). - - Non Maskable Interrupt (NMI): - - Access to sensible data like keys, credentials in the NMI context is - mostly theoretical: The CPU can do prefetching or execute a - misspeculated code path and thereby fetching data which might end up - leaking through a buffer. - - But for mounting other attacks the kernel stack address of the task is - already valuable information. So in full mitigation mode, the NMI is - mitigated on the return from do_nmi() to provide almost complete - coverage. - - - Machine Check Exception (#MC): - - Another corner case is a #MC which hits between the CPU buffer clear - invocation and the actual return to user. As this still is in kernel - space it takes the paranoid exit path which does not clear the CPU - buffers. So the #MC handler repopulates the buffers to some - extent. Machine checks are not reliably controllable and the window is - extremly small so mitigation would just tick a checkbox that this - theoretical corner case is covered. To keep the amount of special - cases small, ignore #MC. - - - Debug Exception (#DB): - - This takes the paranoid exit path only when the INT1 breakpoint is in - kernel space. #DB on a user space address takes the regular exit path, - so no extra mitigation required. + (The reason that NMI is special is that prepare_exit_to_usermode() can + enable IRQs. In NMI context, NMIs are blocked, and we don't want to + enable IRQs with NMIs blocked.) 2. C-State transition From 7b72ca6312abfd2b61aea45ad9f0d2c45bf2a4e9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 May 2019 12:01:32 -0500 Subject: [PATCH 053/152] objtool: Fix function fallthrough detection commit e6f393bc939d566ce3def71232d8013de9aaadde upstream. When a function falls through to the next function due to a compiler bug, objtool prints some obscure warnings. For example: drivers/regulator/core.o: warning: objtool: regulator_count_voltages()+0x95: return with modified stack frame drivers/regulator/core.o: warning: objtool: regulator_count_voltages()+0x0: stack state mismatch: cfa1=7+32 cfa2=7+8 Instead it should be printing: drivers/regulator/core.o: warning: objtool: regulator_supply_is_couple() falls through to next function regulator_count_voltages() This used to work, but was broken by the following commit: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") The padding nops at the end of a function aren't actually part of the function, as defined by the symbol table. So the 'func' variable in validate_branch() is getting cleared to NULL when a padding nop is encountered, breaking the fallthrough detection. If the current instruction doesn't have a function associated with it, just consider it to be part of the previously detected function by not overwriting the previous value of 'func'. Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions") Link: http://lkml.kernel.org/r/546d143820cd08a46624ae8440d093dd6c902cae.1557766718.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ef152daccc33..46be34576620 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1805,7 +1805,8 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, return 1; } - func = insn->func ? insn->func->pfunc : NULL; + if (insn->func) + func = insn->func->pfunc; if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", From 8cf1bbca44672fc04bd711c053c0aeccdf1ce726 Mon Sep 17 00:00:00 2001 From: Christoph Muellner Date: Fri, 22 Mar 2019 12:38:06 +0100 Subject: [PATCH 054/152] arm64: dts: rockchip: Disable DCMDs on RK3399's eMMC controller. commit a3eec13b8fd2b9791a21fa16e38dfea8111579bf upstream. When using direct commands (DCMDs) on an RK3399, we get spurious CQE completion interrupts for the DCMD transaction slot (#31): [ 931.196520] ------------[ cut here ]------------ [ 931.201702] mmc1: cqhci: spurious TCN for tag 31 [ 931.206906] WARNING: CPU: 0 PID: 1433 at /usr/src/kernel/drivers/mmc/host/cqhci.c:725 cqhci_irq+0x2e4/0x490 [ 931.206909] Modules linked in: [ 931.206918] CPU: 0 PID: 1433 Comm: irq/29-mmc1 Not tainted 4.19.8-rt6-funkadelic #1 [ 931.206920] Hardware name: Theobroma Systems RK3399-Q7 SoM (DT) [ 931.206924] pstate: 40000005 (nZcv daif -PAN -UAO) [ 931.206927] pc : cqhci_irq+0x2e4/0x490 [ 931.206931] lr : cqhci_irq+0x2e4/0x490 [ 931.206933] sp : ffff00000e54bc80 [ 931.206934] x29: ffff00000e54bc80 x28: 0000000000000000 [ 931.206939] x27: 0000000000000001 x26: ffff000008f217e8 [ 931.206944] x25: ffff8000f02ef030 x24: ffff0000091417b0 [ 931.206948] x23: ffff0000090aa000 x22: ffff8000f008b000 [ 931.206953] x21: 0000000000000002 x20: 000000000000001f [ 931.206957] x19: ffff8000f02ef018 x18: ffffffffffffffff [ 931.206961] x17: 0000000000000000 x16: 0000000000000000 [ 931.206966] x15: ffff0000090aa6c8 x14: 0720072007200720 [ 931.206970] x13: 0720072007200720 x12: 0720072007200720 [ 931.206975] x11: 0720072007200720 x10: 0720072007200720 [ 931.206980] x9 : 0720072007200720 x8 : 0720072007200720 [ 931.206984] x7 : 0720073107330720 x6 : 00000000000005a0 [ 931.206988] x5 : ffff00000860d4b0 x4 : 0000000000000000 [ 931.206993] x3 : 0000000000000001 x2 : 0000000000000001 [ 931.206997] x1 : 1bde3a91b0d4d900 x0 : 0000000000000000 [ 931.207001] Call trace: [ 931.207005] cqhci_irq+0x2e4/0x490 [ 931.207009] sdhci_arasan_cqhci_irq+0x5c/0x90 [ 931.207013] sdhci_irq+0x98/0x930 [ 931.207019] irq_forced_thread_fn+0x2c/0xa0 [ 931.207023] irq_thread+0x114/0x1c0 [ 931.207027] kthread+0x128/0x130 [ 931.207032] ret_from_fork+0x10/0x20 [ 931.207035] ---[ end trace 0000000000000002 ]--- The driver shows this message only for the first spurious interrupt by using WARN_ONCE(). Changing this to WARN() shows, that this is happening quite frequently (up to once a second). Since the eMMC 5.1 specification, where CQE and CQHCI are specified, does not mention that spurious TCN interrupts for DCMDs can be simply ignored, we must assume that using this feature is not working reliably. The current implementation uses DCMD for REQ_OP_FLUSH only, and I could not see any performance/power impact when disabling this optional feature for RK3399. Therefore this patch disables DCMDs for RK3399. Signed-off-by: Christoph Muellner Signed-off-by: Philipp Tomsich Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1") Cc: stable@vger.kernel.org [the corresponding code changes are queued for 5.2 so doing that as well] Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index c88e603396f6..df7e62d9a670 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -305,6 +305,7 @@ phys = <&emmc_phy>; phy-names = "phy_arasan"; power-domains = <&power RK3399_PD_EMMC>; + disable-cqe-dcmd; status = "disabled"; }; From abea1fb532667eeeb774d6bafb850b03b013226a Mon Sep 17 00:00:00 2001 From: Stuart Menefy Date: Tue, 19 Feb 2019 13:03:37 +0000 Subject: [PATCH 055/152] ARM: dts: exynos: Fix interrupt for shared EINTs on Exynos5260 commit b7ed69d67ff0788d8463e599dd5dd1b45c701a7e upstream. Fix the interrupt information for the GPIO lines with a shared EINT interrupt. Fixes: 16d7ff2642e7 ("ARM: dts: add dts files for exynos5260 SoC") Cc: stable@vger.kernel.org Signed-off-by: Stuart Menefy Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5260.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos5260.dtsi b/arch/arm/boot/dts/exynos5260.dtsi index 55167850619c..33a085ffc447 100644 --- a/arch/arm/boot/dts/exynos5260.dtsi +++ b/arch/arm/boot/dts/exynos5260.dtsi @@ -223,7 +223,7 @@ wakeup-interrupt-controller { compatible = "samsung,exynos4210-wakeup-eint"; interrupt-parent = <&gic>; - interrupts = ; + interrupts = ; }; }; From e2c436d9268f0d856c284a40ca7ecec17c69a2be Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 20 Mar 2019 10:59:50 +0100 Subject: [PATCH 056/152] ARM: dts: exynos: Fix audio (microphone) routing on Odroid XU3 commit 9b23e1a3e8fde76e8cc0e366ab1ed4ffb4440feb upstream. The name of CODEC input widget to which microphone is connected through the "Headphone" jack is "IN12" not "IN1". This fixes microphone support on Odroid XU3. Cc: # v4.14+ Signed-off-by: Sylwester Nawrocki Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi index e84544b220b9..b90cea8b7368 100644 --- a/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroidxu3-audio.dtsi @@ -22,7 +22,7 @@ "Headphone Jack", "HPL", "Headphone Jack", "HPR", "Headphone Jack", "MICBIAS", - "IN1", "Headphone Jack", + "IN12", "Headphone Jack", "Speakers", "SPKL", "Speakers", "SPKR"; From 6eaeee1e7845cb4ab68404583b3073517a5b0f00 Mon Sep 17 00:00:00 2001 From: Christoph Muellner Date: Fri, 22 Mar 2019 12:38:05 +0100 Subject: [PATCH 057/152] mmc: sdhci-of-arasan: Add DTS property to disable DCMDs. commit 7bda9482e7ed4d27d83c1f9cb5cbe3b34ddac3e8 upstream. Direct commands (DCMDs) are an optional feature of eMMC 5.1's command queue engine (CQE). The Arasan eMMC 5.1 controller uses the CQHCI, which exposes a control register bit to enable the feature. The current implementation sets this bit unconditionally. This patch allows to suppress the feature activation, by specifying the property disable-cqe-dcmd. Signed-off-by: Christoph Muellner Signed-off-by: Philipp Tomsich Acked-by: Adrian Hunter Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-arasan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index a40bcc27f187..7fdac277e382 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -814,7 +814,10 @@ static int sdhci_arasan_probe(struct platform_device *pdev) host->mmc_host_ops.start_signal_voltage_switch = sdhci_arasan_voltage_switch; sdhci_arasan->has_cqe = true; - host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD; + host->mmc->caps2 |= MMC_CAP2_CQE; + + if (!of_property_read_bool(np, "disable-cqe-dcmd")) + host->mmc->caps2 |= MMC_CAP2_CQE_DCMD; } ret = sdhci_arasan_add_host(sdhci_arasan); From 921bc15462e21e274c20f766d4865f884f23f730 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Tue, 5 Mar 2019 19:33:54 +0800 Subject: [PATCH 058/152] ARM: exynos: Fix a leaked reference by adding missing of_node_put commit 629266bf7229cd6a550075f5961f95607b823b59 upstream. The call to of_get_next_child returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with warnings like: arch/arm/mach-exynos/firmware.c:201:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 193, but without a corresponding object release within this function. Cc: stable@vger.kernel.org Signed-off-by: Wen Yang Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-exynos/firmware.c | 1 + arch/arm/mach-exynos/suspend.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c index be1f20fe28f4..fbe1db61115f 100644 --- a/arch/arm/mach-exynos/firmware.c +++ b/arch/arm/mach-exynos/firmware.c @@ -196,6 +196,7 @@ void __init exynos_firmware_init(void) return; addr = of_get_address(nd, 0, NULL, NULL); + of_node_put(nd); if (!addr) { pr_err("%s: No address specified.\n", __func__); return; diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 7ead3acd6fa4..b1fe53e8b460 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -639,8 +639,10 @@ void __init exynos_pm_init(void) if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) { pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + of_node_put(np); return; } + of_node_put(np); pm_data = (const struct exynos_pm_data *) match->data; From 26eb5e7fa08db43fffe2d2333ece7f72a49122a7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 18 Mar 2019 11:14:39 -0500 Subject: [PATCH 059/152] power: supply: axp288_charger: Fix unchecked return value commit c3422ad5f84a66739ec6a37251ca27638c85b6be upstream. Currently there is no check on platform_get_irq() return value in case it fails, hence never actually reporting any errors and causing unexpected behavior when using such value as argument for function regmap_irq_get_virq(). Fix this by adding a proper check, a message reporting any errors and returning *pirq* Addresses-Coverity-ID: 1443940 ("Improper use of negative value") Fixes: 843735b788a4 ("power: axp288_charger: axp288 charger driver") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/axp288_charger.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index 735658ee1c60..c60659fb21de 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -832,6 +832,10 @@ static int axp288_charger_probe(struct platform_device *pdev) /* Register charger interrupts */ for (i = 0; i < CHRG_INTR_END; i++) { pirq = platform_get_irq(info->pdev, i); + if (pirq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", pirq); + return pirq; + } info->irq[i] = regmap_irq_get_virq(info->regmap_irqc, pirq); if (info->irq[i] < 0) { dev_warn(&info->pdev->dev, From 592127e9c1bbc14795292a273faa006d805c7fe8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 22 Apr 2019 22:43:01 +0200 Subject: [PATCH 060/152] power: supply: axp288_fuel_gauge: Add ACEPC T8 and T11 mini PCs to the blacklist commit 9274c78305e12c5f461bec15f49c38e0f32ca705 upstream. The ACEPC T8 and T11 Cherry Trail Z8350 mini PCs use an AXP288 and as PCs, rather then portables, they does not have a battery. Still for some reason the AXP288 not only thinks there is a battery, it actually thinks it is discharging while the PC is running, slowly going to 0% full, causing userspace to shutdown the system due to the battery being critically low after a while. This commit adds the ACEPC T8 and T11 to the axp288 fuel-gauge driver blacklist, so that we stop reporting bogus battery readings on this device. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1690852 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/axp288_fuel_gauge.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index 084c8ba9749d..ab0b6e78ca02 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -695,6 +695,26 @@ static void fuel_gauge_init_irq(struct axp288_fg_info *info) * detection reports one despite it not being there. */ static const struct dmi_system_id axp288_fuel_gauge_blacklist[] = { + { + /* ACEPC T8 Cherry Trail Z8350 mini PC */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "To be filled by O.E.M."), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "T8"), + /* also match on somewhat unique bios-version */ + DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1.000"), + }, + }, + { + /* ACEPC T11 Cherry Trail Z8350 mini PC */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "To be filled by O.E.M."), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "T11"), + /* also match on somewhat unique bios-version */ + DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1.000"), + }, + }, { /* Intel Cherry Trail Compute Stick, Windows version */ .matches = { From 222abad906ba7e4727270930c681173d7d4769aa Mon Sep 17 00:00:00 2001 From: Boyang Zhou Date: Mon, 29 Apr 2019 15:27:19 +0100 Subject: [PATCH 061/152] arm64: mmap: Ensure file offset is treated as unsigned commit f08cae2f28db24d95be5204046b60618d8de4ddc upstream. The file offset argument to the arm64 sys_mmap() implementation is scaled from bytes to pages by shifting right by PAGE_SHIFT. Unfortunately, the offset is passed in as a signed 'off_t' type and therefore large offsets (i.e. with the top bit set) are incorrectly sign-extended by the shift. This has been observed to cause false mmap() failures when mapping GPU doorbells on an arm64 server part. Change the type of the file offset argument to sys_mmap() from 'off_t' to 'unsigned long' so that the shifting scales the value as expected. Cc: Signed-off-by: Boyang Zhou [will: rewrote commit message] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index b44065fb1616..6f91e8116514 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -31,7 +31,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, off) + unsigned long, fd, unsigned long, off) { if (offset_in_page(off) != 0) return -EINVAL; From 6d696ceb15a31bedf89b3c572bf62b64fe5d69c4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Apr 2019 17:26:22 +0100 Subject: [PATCH 062/152] arm64: arch_timer: Ensure counter register reads occur with seqlock held commit 75a19a0202db21638a1c2b424afb867e1f9a2376 upstream. When executing clock_gettime(), either in the vDSO or via a system call, we need to ensure that the read of the counter register occurs within the seqlock reader critical section. This ensures that updates to the clocksource parameters (e.g. the multiplier) are consistent with the counter value and therefore avoids the situation where time appears to go backwards across multiple reads. Extend the vDSO logic so that the seqlock critical section covers the read of the counter register as well as accesses to the data page. Since reads of the counter system registers are not ordered by memory barrier instructions, introduce dependency ordering from the counter read to a subsequent memory access so that the seqlock memory barriers apply to the counter access in both the vDSO and the system call paths. Cc: Cc: Marc Zyngier Tested-by: Vincenzo Frascino Link: https://lore.kernel.org/linux-arm-kernel/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ Reported-by: Thomas Gleixner Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/arch_timer.h | 33 +++++++++++++++++++++++++-- arch/arm64/kernel/vdso/gettimeofday.S | 15 ++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index f2a234d6516c..93e07512b4b6 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -148,18 +148,47 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + static inline u64 arch_counter_get_cntpct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntpct_el0); + cnt = arch_timer_reg_read_stable(cntpct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } static inline u64 arch_counter_get_cntvct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntvct_el0); + cnt = arch_timer_reg_read_stable(cntvct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } +#undef arch_counter_enforce_ordering + static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index c39872a7b03c..e8f60112818f 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -73,6 +73,13 @@ x_tmp .req x8 movn x_tmp, #0xff00, lsl #48 and \res, x_tmp, \res mul \res, \res, \mult + /* + * Fake address dependency from the value computed from the counter + * register to subsequent data page accesses so that the sequence + * locking also orders the read of the counter. + */ + and x_tmp, \res, xzr + add vdso_data, vdso_data, x_tmp .endm /* @@ -147,12 +154,12 @@ ENTRY(__kernel_gettimeofday) /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=1b get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=1b get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -211,13 +218,13 @@ realtime: /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=realtime /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=realtime get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 clock_gettime_return, shift=1 @@ -231,7 +238,6 @@ monotonic: ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic /* All computations are done with left-shifted nsecs. */ lsl x4, x4, x12 @@ -239,6 +245,7 @@ monotonic: lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -253,13 +260,13 @@ monotonic_raw: /* w11 = cs_raw_mult, w12 = cs_shift */ ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic_raw get_ts_clock_raw res_sec=x10, res_nsec=x11, \ clock_nsec=x15, nsec_to_sec=x9 From 26e7d2ad97b9dcc1a4618d0803847a31fc013e86 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 1 Apr 2019 12:30:14 +0100 Subject: [PATCH 063/152] arm64: compat: Reduce address limit commit d263119387de9975d2acba1dfd3392f7c5979c18 upstream. Currently, compat tasks running on arm64 can allocate memory up to TASK_SIZE_32 (UL(0x100000000)). This means that mmap() allocations, if we treat them as returning an array, are not compliant with the sections 6.5.8 of the C standard (C99) which states that: "If the expression P points to an element of an array object and the expression Q points to the last element of the same array object, the pointer expression Q+1 compares greater than P". Redefine TASK_SIZE_32 to address the issue. Cc: Catalin Marinas Cc: Will Deacon Cc: Jann Horn Cc: Reported-by: Jann Horn Signed-off-by: Vincenzo Frascino [will: fixed typo in comment] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/processor.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 79657ad91397..def5a5e807f0 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -53,7 +53,15 @@ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ #ifdef CONFIG_COMPAT +#ifdef CONFIG_ARM64_64K_PAGES +/* + * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied + * by the compat vectors page. + */ #define TASK_SIZE_32 UL(0x100000000) +#else +#define TASK_SIZE_32 (UL(0x100000000) - PAGE_SIZE) +#endif /* CONFIG_ARM64_64K_PAGES */ #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ From f273cd16554a92ea746cf117a126ff5be8c54794 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 8 Apr 2019 18:17:18 +0100 Subject: [PATCH 064/152] arm64: Clear OSDLR_EL1 on CPU boot commit 6fda41bf12615ee7c3ddac88155099b1a8cf8d00 upstream. Some firmwares may reboot CPUs with OS Double Lock set. Make sure that it is unlocked, in order to use debug exceptions. Cc: Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/debug-monitors.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 06ca574495af..262925b98f42 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,6 +135,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors); */ static int clear_os_lock(unsigned int cpu) { + write_sysreg(0, osdlr_el1); write_sysreg(0, oslar_el1); isb(); return 0; From d8d751efec2819ec6d4fdfc04b20e4f67f0c157e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 8 Apr 2019 18:17:19 +0100 Subject: [PATCH 065/152] arm64: Save and restore OSDLR_EL1 across suspend/resume commit 827a108e354db633698f0b4a10c1ffd2b1f8d1d0 upstream. When the CPU comes out of suspend, the firmware may have modified the OS Double Lock Register. Save it in an unused slot of cpu_suspend_ctx, and restore it on resume. Cc: Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/proc.S | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 03646e6a2ef4..8cce091b6c21 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,24 +70,25 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, cpacr_el1 - mrs x6, tcr_el1 - mrs x7, vbar_el1 - mrs x8, mdscr_el1 - mrs x9, oslsr_el1 - mrs x10, sctlr_el1 + mrs x5, osdlr_el1 + mrs x6, cpacr_el1 + mrs x7, tcr_el1 + mrs x8, vbar_el1 + mrs x9, mdscr_el1 + mrs x10, oslsr_el1 + mrs x11, sctlr_el1 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x11, tpidr_el1 + mrs x12, tpidr_el1 alternative_else - mrs x11, tpidr_el2 + mrs x12, tpidr_el2 alternative_endif - mrs x12, sp_el0 + mrs x13, sp_el0 stp x2, x3, [x0] - stp x4, xzr, [x0, #16] - stp x5, x6, [x0, #32] - stp x7, x8, [x0, #48] - stp x9, x10, [x0, #64] - stp x11, x12, [x0, #80] + stp x4, x5, [x0, #16] + stp x6, x7, [x0, #32] + stp x8, x9, [x0, #48] + stp x10, x11, [x0, #64] + stp x12, x13, [x0, #80] ret ENDPROC(cpu_do_suspend) @@ -110,8 +111,8 @@ ENTRY(cpu_do_resume) msr cpacr_el1, x6 /* Don't change t0sz here, mask those bits when restoring */ - mrs x5, tcr_el1 - bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + mrs x7, tcr_el1 + bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH msr tcr_el1, x8 msr vbar_el1, x9 @@ -135,6 +136,7 @@ alternative_endif /* * Restore oslsr_el1 by writing oslar_el1 */ + msr osdlr_el1, x5 ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 From 2ea1a37d0138bbbc813a269c24621ff194f2145b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Feb 2019 10:30:52 +0100 Subject: [PATCH 066/152] sched/x86: Save [ER]FLAGS on context switch commit 6690e86be83ac75832e461c141055b5d601c0a6d upstream. Effectively reverts commit: 2c7577a75837 ("sched/x86_64: Don't save flags on context switch") Specifically because SMAP uses FLAGS.AC which invalidates the claim that the kernel has clean flags. In particular; while preemption from interrupt return is fine (the IRET frame on the exception stack contains FLAGS) it breaks any code that does synchonous scheduling, including preempt_enable(). This has become a significant issue ever since commit: 5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses") provided for means of having 'normal' C code between STAC / CLAC, exposing the FLAGS.AC state. So far this hasn't led to trouble, however fix it before it comes apart. Reported-by: Julien Thierry Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@kernel.org Fixes: 5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 2 ++ arch/x86/entry/entry_64.S | 2 ++ arch/x86/include/asm/switch_to.h | 1 + arch/x86/kernel/process_32.c | 7 +++++++ arch/x86/kernel/process_64.c | 8 ++++++++ 5 files changed, 20 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index fbbf1ba57ec6..b5c2b1091b18 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -648,6 +648,7 @@ ENTRY(__switch_to_asm) pushl %ebx pushl %edi pushl %esi + pushfl /* switch stack */ movl %esp, TASK_threadsp(%eax) @@ -670,6 +671,7 @@ ENTRY(__switch_to_asm) #endif /* restore callee-saved registers */ + popfl popl %esi popl %edi popl %ebx diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 617df50a11d9..585bbc5b3216 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -352,6 +352,7 @@ ENTRY(__switch_to_asm) pushq %r13 pushq %r14 pushq %r15 + pushfq /* switch stack */ movq %rsp, TASK_threadsp(%rdi) @@ -374,6 +375,7 @@ ENTRY(__switch_to_asm) #endif /* restore callee-saved registers */ + popfq popq %r15 popq %r14 popq %r13 diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 7cf1a270d891..157149d4129c 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -40,6 +40,7 @@ asmlinkage void ret_from_fork(void); * order of the fields must match the code in __switch_to_asm(). */ struct inactive_task_frame { + unsigned long flags; #ifdef CONFIG_X86_64 unsigned long r15; unsigned long r14; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d3e593eb189f..020efe0f9614 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -130,6 +130,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, struct task_struct *tsk; int err; + /* + * For a new task use the RESET flags value since there is no before. + * All the status flags are zero; DF and all the system flags must also + * be 0, specifically IF must be 0 because we context switch to the new + * task with interrupts disabled. + */ + frame->flags = X86_EFLAGS_FIXED; frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a0854f283efd..59f71d0f2b23 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -300,6 +300,14 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, childregs = task_pt_regs(p); fork_frame = container_of(childregs, struct fork_frame, regs); frame = &fork_frame->frame; + + /* + * For a new task use the RESET flags value since there is no before. + * All the status flags are zero; DF and all the system flags must also + * be 0, specifically IF must be 0 because we context switch to the new + * task with interrupts disabled. + */ + frame->flags = X86_EFLAGS_FIXED; frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; From c1ec6beac625c55fb1e4a9706852c9cbf11fe49f Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 22 Apr 2019 13:25:58 +0200 Subject: [PATCH 067/152] crypto: crypto4xx - fix ctr-aes missing output IV commit 25baaf8e2c93197d063b372ef7b62f2767c7ac0b upstream. Commit 8efd972ef96a ("crypto: testmgr - support checking skcipher output IV") caused the crypto4xx driver to produce the following error: | ctr-aes-ppc4xx encryption test failed (wrong output IV) | on test vector 0, cfg="in-place" This patch fixes this by reworking the crypto4xx_setkey_aes() function to: - not save the iv for ECB (as per 18.2.38 CRYP0_SA_CMD_0: "This bit mut be cleared for DES ECB mode or AES ECB mode, when no IV is used.") - instruct the hardware to save the generated IV for all other modes of operations that have IV and then supply it back to the callee in pretty much the same way as we do it for cbc-aes already. - make it clear that the DIR_(IN|OUT)BOUND is the important bit that tells the hardware to encrypt or decrypt the data. (this is cosmetic - but it hopefully prevents me from getting confused again). - don't load any bogus hash when we don't use any hash operation to begin with. Cc: stable@vger.kernel.org Fixes: f2a13e7cba9e ("crypto: crypto4xx - enable AES RFC3686, ECB, CFB and OFB offloads") Signed-off-by: Christian Lamparter Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/amcc/crypto4xx_alg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c index f5c07498ea4f..0c85a5123f85 100644 --- a/drivers/crypto/amcc/crypto4xx_alg.c +++ b/drivers/crypto/amcc/crypto4xx_alg.c @@ -141,9 +141,10 @@ static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher, /* Setup SA */ sa = ctx->sa_in; - set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_CBC ? - SA_SAVE_IV : SA_NOT_SAVE_IV), - SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE, + set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_ECB ? + SA_NOT_SAVE_IV : SA_SAVE_IV), + SA_NOT_LOAD_HASH, (cm == CRYPTO_MODE_ECB ? + SA_LOAD_IV_FROM_SA : SA_LOAD_IV_FROM_STATE), SA_NO_HEADER_PROC, SA_HASH_ALG_NULL, SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO, SA_OP_GROUP_BASIC, SA_OPCODE_DECRYPT, @@ -162,6 +163,11 @@ static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher, memcpy(ctx->sa_out, ctx->sa_in, ctx->sa_len * 4); sa = ctx->sa_out; sa->sa_command_0.bf.dir = DIR_OUTBOUND; + /* + * SA_OPCODE_ENCRYPT is the same value as SA_OPCODE_DECRYPT. + * it's the DIR_(IN|OUT)BOUND that matters + */ + sa->sa_command_0.bf.opcode = SA_OPCODE_ENCRYPT; return 0; } From 7a32ad34b889f874be154ec93aff892c975d2731 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 22 Apr 2019 13:25:59 +0200 Subject: [PATCH 068/152] crypto: crypto4xx - fix cfb and ofb "overran dst buffer" issues commit 7e92e1717e3eaf6b322c252947c696b3059f05be upstream. Currently, crypto4xx CFB and OFB AES ciphers are failing testmgr's test vectors. |cfb-aes-ppc4xx encryption overran dst buffer on test vector 3, cfg="in-place" |ofb-aes-ppc4xx encryption overran dst buffer on test vector 1, cfg="in-place" This is because of a very subtile "bug" in the hardware that gets indirectly mentioned in 18.1.3.5 Encryption/Decryption of the hardware spec: the OFB and CFB modes for AES are listed there as operation modes for >>> "Block ciphers" <<<. Which kind of makes sense, but we would like them to be considered as stream ciphers just like the CTR mode. To workaround this issue and stop the hardware from causing "overran dst buffer" on crypttexts that are not a multiple of 16 (AES_BLOCK_SIZE), we force the driver to use the scatter buffers as the go-between. As a bonus this patch also kills redundant pd_uinfo->num_gd and pd_uinfo->num_sd setters since the value has already been set before. Cc: stable@vger.kernel.org Fixes: f2a13e7cba9e ("crypto: crypto4xx - enable AES RFC3686, ECB, CFB and OFB offloads") Signed-off-by: Christian Lamparter Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/amcc/crypto4xx_core.c | 31 +++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 6eaec9ba0f68..d2ec9fd1b8bb 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -712,7 +712,23 @@ int crypto4xx_build_pd(struct crypto_async_request *req, size_t offset_to_sr_ptr; u32 gd_idx = 0; int tmp; - bool is_busy; + bool is_busy, force_sd; + + /* + * There's a very subtile/disguised "bug" in the hardware that + * gets indirectly mentioned in 18.1.3.5 Encryption/Decryption + * of the hardware spec: + * *drum roll* the AES/(T)DES OFB and CFB modes are listed as + * operation modes for >>> "Block ciphers" <<<. + * + * To workaround this issue and stop the hardware from causing + * "overran dst buffer" on crypttexts that are not a multiple + * of 16 (AES_BLOCK_SIZE), we force the driver to use the + * scatter buffers. + */ + force_sd = (req_sa->sa_command_1.bf.crypto_mode9_8 == CRYPTO_MODE_CFB + || req_sa->sa_command_1.bf.crypto_mode9_8 == CRYPTO_MODE_OFB) + && (datalen % AES_BLOCK_SIZE); /* figure how many gd are needed */ tmp = sg_nents_for_len(src, assoclen + datalen); @@ -730,7 +746,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req, } /* figure how many sd are needed */ - if (sg_is_last(dst)) { + if (sg_is_last(dst) && force_sd == false) { num_sd = 0; } else { if (datalen > PPC4XX_SD_BUFFER_SIZE) { @@ -805,9 +821,10 @@ int crypto4xx_build_pd(struct crypto_async_request *req, pd->sa_len = sa_len; pd_uinfo = &dev->pdr_uinfo[pd_entry]; - pd_uinfo->async_req = req; pd_uinfo->num_gd = num_gd; pd_uinfo->num_sd = num_sd; + pd_uinfo->dest_va = dst; + pd_uinfo->async_req = req; if (iv_len) memcpy(pd_uinfo->sr_va->save_iv, iv, iv_len); @@ -826,7 +843,6 @@ int crypto4xx_build_pd(struct crypto_async_request *req, /* get first gd we are going to use */ gd_idx = fst_gd; pd_uinfo->first_gd = fst_gd; - pd_uinfo->num_gd = num_gd; gd = crypto4xx_get_gdp(dev, &gd_dma, gd_idx); pd->src = gd_dma; /* enable gather */ @@ -863,17 +879,14 @@ int crypto4xx_build_pd(struct crypto_async_request *req, * Indicate gather array is not used */ pd_uinfo->first_gd = 0xffffffff; - pd_uinfo->num_gd = 0; } - if (sg_is_last(dst)) { + if (!num_sd) { /* * we know application give us dst a whole piece of memory * no need to use scatter ring. */ pd_uinfo->using_sd = 0; pd_uinfo->first_sd = 0xffffffff; - pd_uinfo->num_sd = 0; - pd_uinfo->dest_va = dst; sa->sa_command_0.bf.scatter = 0; pd->dest = (u32)dma_map_page(dev->core_dev->device, sg_page(dst), dst->offset, @@ -887,9 +900,7 @@ int crypto4xx_build_pd(struct crypto_async_request *req, nbytes = datalen; sa->sa_command_0.bf.scatter = 1; pd_uinfo->using_sd = 1; - pd_uinfo->dest_va = dst; pd_uinfo->first_sd = fst_sd; - pd_uinfo->num_sd = num_sd; sd = crypto4xx_get_sdp(dev, &sd_dma, sd_idx); pd->dest = sd_dma; /* setup scatter descriptor */ From 3b5ddd5ea0165fc6b11e1e200a14797adb51b4cb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 9 Apr 2019 23:46:30 -0700 Subject: [PATCH 069/152] crypto: salsa20 - don't access already-freed walk.iv commit edaf28e996af69222b2cb40455dbb5459c2b875a upstream. If the user-provided IV needs to be aligned to the algorithm's alignmask, then skcipher_walk_virt() copies the IV into a new aligned buffer walk.iv. But skcipher_walk_virt() can fail afterwards, and then if the caller unconditionally accesses walk.iv, it's a use-after-free. salsa20-generic doesn't set an alignmask, so currently it isn't affected by this despite unconditionally accessing walk.iv. However this is more subtle than desired, and it was actually broken prior to the alignmask being removed by commit b62b3db76f73 ("crypto: salsa20-generic - cleanup and convert to skcipher API"). Since salsa20-generic does not update the IV and does not need any IV alignment, update it to use req->iv instead of walk.iv. Fixes: 2407d60872dd ("[CRYPTO] salsa20: Salsa20 stream cipher") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/salsa20_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index 8c77bc78a09f..df8fc0f54374 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -161,7 +161,7 @@ static int salsa20_crypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, true); - salsa20_init(state, ctx, walk.iv); + salsa20_init(state, ctx, req->iv); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; From fe632ee5ade8290119b7ea82350c4695817fb9d6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:16 -0700 Subject: [PATCH 070/152] crypto: chacha20poly1305 - set cra_name correctly commit 5e27f38f1f3f45a0c938299c3a34a2d2db77165a upstream. If the rfc7539 template is instantiated with specific implementations, e.g. "rfc7539(chacha20-generic,poly1305-generic)" rather than "rfc7539(chacha20,poly1305)", then the implementation names end up included in the instance's cra_name. This is incorrect because it then prevents all users from allocating "rfc7539(chacha20,poly1305)", if the highest priority implementations of chacha20 and poly1305 were selected. Also, the self-tests aren't run on an instance allocated in this way. Fix it by setting the instance's cra_name from the underlying algorithms' actual cra_names, rather than from the requested names. This matches what other templates do. Fixes: 71ebc4d1b27d ("crypto: chacha20poly1305 - Add a ChaCha20-Poly1305 AEAD construction, RFC7539") Cc: # v4.2+ Cc: Martin Willi Signed-off-by: Eric Biggers Reviewed-by: Martin Willi Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/chacha20poly1305.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index 600afa99941f..4d6f51bcdfab 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -647,8 +647,8 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "%s(%s,%s)", name, chacha_name, - poly_name) >= CRYPTO_MAX_ALG_NAME) + "%s(%s,%s)", name, chacha->base.cra_name, + poly->cra_name) >= CRYPTO_MAX_ALG_NAME) goto out_drop_chacha; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s,%s)", name, chacha->base.cra_driver_name, From 07d677ae4db4fd48f55d75e9ec3b7527b18aacbe Mon Sep 17 00:00:00 2001 From: "Singh, Brijesh" Date: Mon, 8 Apr 2019 20:42:55 +0000 Subject: [PATCH 071/152] crypto: ccp - Do not free psp_master when PLATFORM_INIT fails commit f5a2aeb8b254c764772729a6e48d4e0c914bb56a upstream. Currently, we free the psp_master if the PLATFORM_INIT fails during the SEV FW probe. If psp_master is freed then driver does not invoke the PSP FW. As per SEV FW spec, there are several commands (PLATFORM_RESET, PLATFORM_STATUS, GET_ID etc) which can be executed in the UNINIT state We should not free the psp_master when PLATFORM_INIT fails. Fixes: 200664d5237f ("crypto: ccp: Add SEV support") Cc: Tom Lendacky Cc: Herbert Xu Cc: Gary Hook Cc: stable@vger.kernel.org # 4.19.y Signed-off-by: Brijesh Singh Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccp/psp-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index 72790d88236d..1603dc8d2d75 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -935,7 +935,7 @@ void psp_pci_init(void) rc = sev_platform_init(&error); if (rc) { dev_err(sp->dev, "SEV: failed to INIT error %#x\n", error); - goto err; + return; } dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major, From 66f5de68cb61a727e82adf86ef6d8a4b2101fd04 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 15 Mar 2019 13:09:01 +1100 Subject: [PATCH 072/152] crypto: vmx - fix copy-paste error in CTR mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dcf7b48212c0fab7df69e84fab22d6cb7c8c0fb9 upstream. The original assembly imported from OpenSSL has two copy-paste errors in handling CTR mode. When dealing with a 2 or 3 block tail, the code branches to the CBC decryption exit path, rather than to the CTR exit path. This leads to corruption of the IV, which leads to subsequent blocks being corrupted. This can be detected with libkcapi test suite, which is available at https://github.com/smuellerDD/libkcapi Reported-by: Ondrej Mosnáček Fixes: 5c380d623ed3 ("crypto: vmx - Add support for VMS instructions by ASM") Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Tested-by: Michael Ellerman Tested-by: Ondrej Mosnacek Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/vmx/aesp8-ppc.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index d6a9f63d65ba..de78282b8f44 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -1854,7 +1854,7 @@ Lctr32_enc8x_three: stvx_u $out1,$x10,$out stvx_u $out2,$x20,$out addi $out,$out,0x30 - b Lcbc_dec8x_done + b Lctr32_enc8x_done .align 5 Lctr32_enc8x_two: @@ -1866,7 +1866,7 @@ Lctr32_enc8x_two: stvx_u $out0,$x00,$out stvx_u $out1,$x10,$out addi $out,$out,0x20 - b Lcbc_dec8x_done + b Lctr32_enc8x_done .align 5 Lctr32_enc8x_one: From aabf86f24d9f6157960a5d702738c7a737748b11 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:15 -0700 Subject: [PATCH 073/152] crypto: skcipher - don't WARN on unprocessed data after slow walk step commit dcaca01a42cc2c425154a13412b4124293a6e11e upstream. skcipher_walk_done() assumes it's a bug if, after the "slow" path is executed where the next chunk of data is processed via a bounce buffer, the algorithm says it didn't process all bytes. Thus it WARNs on this. However, this can happen legitimately when the message needs to be evenly divisible into "blocks" but isn't, and the algorithm has a 'walksize' greater than the block size. For example, ecb-aes-neonbs sets 'walksize' to 128 bytes and only supports messages evenly divisible into 16-byte blocks. If, say, 17 message bytes remain but they straddle scatterlist elements, the skcipher_walk code will take the "slow" path and pass the algorithm all 17 bytes in the bounce buffer. But the algorithm will only be able to process 16 bytes, triggering the WARN. Fix this by just removing the WARN_ON(). Returning -EINVAL, as the code already does, is the right behavior. This bug was detected by my patches that improve testmgr to fuzz algorithms against their generic implementation. Fixes: b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") Cc: # v4.10+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/skcipher.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 46bb300d418f..b664cf867f5f 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -131,8 +131,13 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err) memcpy(walk->dst.virt.addr, walk->page, n); skcipher_unmap_dst(walk); } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { - if (WARN_ON(err)) { - /* unexpected case; didn't process all bytes */ + if (err) { + /* + * Didn't process all bytes. Either the algorithm is + * broken, or this was the last step and it turned out + * the message wasn't evenly divisible into blocks but + * the algorithm requires it. + */ err = -EINVAL; goto finish; } From 7a19a4bef218c259d02b6ac7b924bd4881c6a9fd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:12 -0700 Subject: [PATCH 074/152] crypto: crct10dif-generic - fix use via crypto_shash_digest() commit 307508d1072979f4435416f87936f87eaeb82054 upstream. The ->digest() method of crct10dif-generic reads the current CRC value from the shash_desc context. But this value is uninitialized, causing crypto_shash_digest() to compute the wrong result. Fix it. Probably this wasn't noticed before because lib/crc-t10dif.c only uses crypto_shash_update(), not crypto_shash_digest(). Likewise, crypto_shash_digest() is not yet tested by the crypto self-tests because those only test the ahash API which only uses shash init/update/final. This bug was detected by my patches that improve testmgr to fuzz algorithms against their generic implementation. Fixes: 2d31e518a428 ("crypto: crct10dif - Wrap crc_t10dif function all to use crypto transform framework") Cc: # v3.11+ Cc: Tim Chen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/crct10dif_generic.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c index 8e94e29dc6fc..d08048ae5552 100644 --- a/crypto/crct10dif_generic.c +++ b/crypto/crct10dif_generic.c @@ -65,10 +65,9 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -77,15 +76,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { From e7fd8a2862e0793312a3a0e77c75d573be52f971 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Mar 2019 13:04:13 -0700 Subject: [PATCH 075/152] crypto: x86/crct10dif-pcl - fix use via crypto_shash_digest() commit dec3d0b1071a0f3194e66a83d26ecf4aa8c5910e upstream. The ->digest() method of crct10dif-pclmul reads the current CRC value from the shash_desc context. But this value is uninitialized, causing crypto_shash_digest() to compute the wrong result. Fix it. Probably this wasn't noticed before because lib/crc-t10dif.c only uses crypto_shash_update(), not crypto_shash_digest(). Likewise, crypto_shash_digest() is not yet tested by the crypto self-tests because those only test the ahash API which only uses shash init/update/final. Fixes: 0b95a7f85718 ("crypto: crct10dif - Glue code to cast accelerated CRCT10DIF assembly as a crypto transform") Cc: # v3.11+ Cc: Tim Chen Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/crct10dif-pclmul_glue.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index cd4df9322501..7bbfe7d35da7 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -76,15 +76,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { if (irq_fpu_usable()) { kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -93,15 +92,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { From 63efe31cf544000f24b65675731f361d263ff66b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Mar 2019 22:12:46 -0700 Subject: [PATCH 076/152] crypto: arm64/gcm-aes-ce - fix no-NEON fallback code commit 580e295178402d14bbf598a5702f8e01fc59dbaa upstream. The arm64 gcm-aes-ce algorithm is failing the extra crypto self-tests following my patches to test the !may_use_simd() code paths, which previously were untested. The problem is that in the !may_use_simd() case, an odd number of AES blocks can be processed within each step of the skcipher_walk. However, the skcipher_walk is being done with a "stride" of 2 blocks and is advanced by an even number of blocks after each step. This causes the encryption to produce the wrong ciphertext and authentication tag, and causes the decryption to incorrectly fail. Fix it by only processing an even number of blocks per step. Fixes: c2b24c36e0a3 ("crypto: arm64/aes-gcm-ce - fix scatterwalk API violation") Fixes: 71e52c278c54 ("crypto: arm64/aes-ce-gcm - operate on two input blocks at a time") Cc: # v4.19+ Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/ghash-ce-glue.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 067d8937d5af..1ed227bf6106 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -418,9 +418,11 @@ static int gcm_encrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + const int blocks = + walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; + int remaining = blocks; do { __aes_arm64_encrypt(ctx->aes_key.key_enc, @@ -430,9 +432,9 @@ static int gcm_encrypt(struct aead_request *req) dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; - } while (--blocks > 0); + } while (--remaining > 0); - ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg, + ghash_do_update(blocks, dg, walk.dst.virt.addr, &ctx->ghash_key, NULL); @@ -553,7 +555,7 @@ static int gcm_decrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; From 9a61ab6898671b205840f837a4ea840fb45da6ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 18 Apr 2019 14:43:02 -0700 Subject: [PATCH 077/152] crypto: gcm - fix incompatibility between "gcm" and "gcm_base" commit f699594d436960160f6d5ba84ed4a222f20d11cd upstream. GCM instances can be created by either the "gcm" template, which only allows choosing the block cipher, e.g. "gcm(aes)"; or by "gcm_base", which allows choosing the ctr and ghash implementations, e.g. "gcm_base(ctr(aes-generic),ghash-generic)". However, a "gcm_base" instance prevents a "gcm" instance from being registered using the same implementations. Nor will the instance be found by lookups of "gcm". This can be used as a denial of service. Moreover, "gcm_base" instances are never tested by the crypto self-tests, even if there are compatible "gcm" tests. The root cause of these problems is that instances of the two templates use different cra_names. Therefore, fix these problems by making "gcm_base" instances set the same cra_name as "gcm" instances, e.g. "gcm(aes)" instead of "gcm_base(ctr(aes-generic),ghash-generic)". This requires extracting the block cipher name from the name of the ctr algorithm. It also requires starting to verify that the algorithms are really ctr and ghash, not something else entirely. But it would be bizarre if anyone were actually using non-gcm-compatible algorithms with gcm_base, so this shouldn't break anyone in practice. Fixes: d00aa19b507b ("[CRYPTO] gcm: Allow block cipher parameter") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/gcm.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index 0ad879e1f9b2..9b0ea3ded1a4 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -597,7 +597,6 @@ static void crypto_gcm_free(struct aead_instance *inst) static int crypto_gcm_create_common(struct crypto_template *tmpl, struct rtattr **tb, - const char *full_name, const char *ctr_name, const char *ghash_name) { @@ -638,7 +637,8 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, goto err_free_inst; err = -EINVAL; - if (ghash->digestsize != 16) + if (strcmp(ghash->base.cra_name, "ghash") != 0 || + ghash->digestsize != 16) goto err_drop_ghash; crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst)); @@ -650,24 +650,24 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, ctr = crypto_spawn_skcipher_alg(&ctx->ctr); - /* We only support 16-byte blocks. */ + /* The skcipher algorithm must be CTR mode, using 16-byte blocks. */ err = -EINVAL; - if (crypto_skcipher_alg_ivsize(ctr) != 16) - goto out_put_ctr; - - /* Not a stream cipher? */ - if (ctr->base.cra_blocksize != 1) + if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 || + crypto_skcipher_alg_ivsize(ctr) != 16 || + ctr->base.cra_blocksize != 1) goto out_put_ctr; err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "gcm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME) + goto out_put_ctr; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)", ctr->base.cra_driver_name, ghash_alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto out_put_ctr; - memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); - inst->alg.base.cra_flags = (ghash->base.cra_flags | ctr->base.cra_flags) & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = (ghash->base.cra_priority + @@ -709,7 +709,6 @@ static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; - char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) @@ -719,12 +718,7 @@ static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb) CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm(%s)", cipher_name) >= - CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_gcm_create_common(tmpl, tb, full_name, - ctr_name, "ghash"); + return crypto_gcm_create_common(tmpl, tb, ctr_name, "ghash"); } static struct crypto_template crypto_gcm_tmpl = { @@ -738,7 +732,6 @@ static int crypto_gcm_base_create(struct crypto_template *tmpl, { const char *ctr_name; const char *ghash_name; - char full_name[CRYPTO_MAX_ALG_NAME]; ctr_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(ctr_name)) @@ -748,12 +741,7 @@ static int crypto_gcm_base_create(struct crypto_template *tmpl, if (IS_ERR(ghash_name)) return PTR_ERR(ghash_name); - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)", - ctr_name, ghash_name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_gcm_create_common(tmpl, tb, full_name, - ctr_name, ghash_name); + return crypto_gcm_create_common(tmpl, tb, ctr_name, ghash_name); } static struct crypto_template crypto_gcm_base_tmpl = { From b7d2adfd0512a10757d216f60ecee50a1aa15b91 Mon Sep 17 00:00:00 2001 From: Zhang Zhijie Date: Fri, 12 Apr 2019 17:16:33 +0800 Subject: [PATCH 078/152] crypto: rockchip - update IV buffer to contain the next IV commit f0cfd57b43fec65761ca61d3892b983a71515f23 upstream. The Kernel Crypto API request output the next IV data to IV buffer for CBC implementation. So the last block data of ciphertext should be copid into assigned IV buffer. Reported-by: Eric Biggers Fixes: 433cd2c617bf ("crypto: rockchip - add crypto driver for rk3288") Cc: # v4.5+ Signed-off-by: Zhang Zhijie Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- .../rockchip/rk3288_crypto_ablkcipher.c | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c index 23305f22072f..204e4ad62c38 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c @@ -250,9 +250,14 @@ static int rk_set_data_start(struct rk_crypto_info *dev) u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + dev->sg_src->offset + dev->sg_src->length - ivsize; - /* store the iv that need to be updated in chain mode */ - if (ctx->mode & RK_CRYPTO_DEC) + /* Store the iv that need to be updated in chain mode. + * And update the IV buffer to contain the next IV for decryption mode. + */ + if (ctx->mode & RK_CRYPTO_DEC) { memcpy(ctx->iv, src_last_blk, ivsize); + sg_pcopy_to_buffer(dev->first, dev->src_nents, req->info, + ivsize, dev->total - ivsize); + } err = dev->load_data(dev, dev->sg_src, dev->sg_dst); if (!err) @@ -288,13 +293,19 @@ static void rk_iv_copyback(struct rk_crypto_info *dev) struct ablkcipher_request *req = ablkcipher_request_cast(dev->async_req); struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(tfm); u32 ivsize = crypto_ablkcipher_ivsize(tfm); - if (ivsize == DES_BLOCK_SIZE) - memcpy_fromio(req->info, dev->reg + RK_CRYPTO_TDES_IV_0, - ivsize); - else if (ivsize == AES_BLOCK_SIZE) - memcpy_fromio(req->info, dev->reg + RK_CRYPTO_AES_IV_0, ivsize); + /* Update the IV buffer to contain the next IV for encryption mode. */ + if (!(ctx->mode & RK_CRYPTO_DEC)) { + if (dev->aligned) { + memcpy(req->info, sg_virt(dev->sg_dst) + + dev->sg_dst->length - ivsize, ivsize); + } else { + memcpy(req->info, dev->addr_vir + + dev->count - ivsize, ivsize); + } + } } static void rk_update_iv(struct rk_crypto_info *dev) From 69b9d32d5139e1a722d3a12bf57f752c835b17b5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 9 Apr 2019 23:46:31 -0700 Subject: [PATCH 079/152] crypto: arm/aes-neonbs - don't access already-freed walk.iv commit 767f015ea0b7ab9d60432ff6cd06b664fd71f50f upstream. If the user-provided IV needs to be aligned to the algorithm's alignmask, then skcipher_walk_virt() copies the IV into a new aligned buffer walk.iv. But skcipher_walk_virt() can fail afterwards, and then if the caller unconditionally accesses walk.iv, it's a use-after-free. arm32 xts-aes-neonbs doesn't set an alignmask, so currently it isn't affected by this despite unconditionally accessing walk.iv. However this is more subtle than desired, and it was actually broken prior to the alignmask being removed by commit cc477bf64573 ("crypto: arm/aes - replace bit-sliced OpenSSL NEON code"). Thus, update xts-aes-neonbs to start checking the return value of skcipher_walk_virt(). Fixes: e4e7f10bfc40 ("ARM: add support for bit sliced AES using NEON instructions") Cc: # v3.13+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/aes-neonbs-glue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 07e31941dc67..617c2c99ebfb 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -278,6 +278,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv); From d42d342022b18d13a0cf3a4425011e17f3ecd9cb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 9 Apr 2019 23:46:32 -0700 Subject: [PATCH 080/152] crypto: arm64/aes-neonbs - don't access already-freed walk.iv commit 4a8108b70508df0b6c4ffa4a3974dab93dcbe851 upstream. If the user-provided IV needs to be aligned to the algorithm's alignmask, then skcipher_walk_virt() copies the IV into a new aligned buffer walk.iv. But skcipher_walk_virt() can fail afterwards, and then if the caller unconditionally accesses walk.iv, it's a use-after-free. xts-aes-neonbs doesn't set an alignmask, so currently it isn't affected by this despite unconditionally accessing walk.iv. However this is more subtle than desired, and unconditionally accessing walk.iv has caused a real problem in other algorithms. Thus, update xts-aes-neonbs to start checking the return value of skcipher_walk_virt(). Fixes: 1abee99eafab ("crypto: arm64/aes - reimplement bit-sliced ARM/NEON implementation for arm64") Cc: # v4.11+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-neonbs-glue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index e7a95a566462..5cc248967387 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -304,6 +304,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; kernel_neon_begin(); neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); From 003cf675eb0786f6728de391ec821599c38fcfd8 Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Thu, 2 May 2019 13:07:14 -0600 Subject: [PATCH 081/152] mmc: core: Fix tag set memory leak commit 43d8dabb4074cf7f3b1404bfbaeba5aa6f3e5cfc upstream. The tag set is allocated in mmc_init_queue but never freed. This results in a memory leak. This change makes sure we free the tag set when the queue is also freed. Signed-off-by: Raul E Rangel Reviewed-by: Jens Axboe Acked-by: Adrian Hunter Fixes: 81196976ed94 ("mmc: block: Add blk-mq support") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/queue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 6edffeed9953..18aae28845ec 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -494,6 +494,7 @@ void mmc_cleanup_queue(struct mmc_queue *mq) blk_mq_unquiesce_queue(q); blk_cleanup_queue(q); + blk_mq_free_tag_set(&mq->tag_set); /* * A request can be completed before the next request, potentially From 741e3efd8174fbc4278bc3dd8d3d5a3caab7f2c5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 May 2019 15:01:24 +0200 Subject: [PATCH 082/152] ALSA: line6: toneport: Fix broken usage of timer for delayed execution commit 7f84ff68be05ec7a5d2acf8fdc734fe5897af48f upstream. The line6 toneport driver has code for some delayed initialization, and this hits the kernel Oops because mutex and other sleepable functions are used in the timer callback. Fix the abuse by a delayed work instead so that everything works gracefully. Reported-by: syzbot+a07d0142e74fdd595cfb@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/line6/toneport.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c index 19bee725de00..325b07b98b3c 100644 --- a/sound/usb/line6/toneport.c +++ b/sound/usb/line6/toneport.c @@ -54,8 +54,8 @@ struct usb_line6_toneport { /* Firmware version (x 100) */ u8 firmware_version; - /* Timer for delayed PCM startup */ - struct timer_list timer; + /* Work for delayed PCM startup */ + struct delayed_work pcm_work; /* Device type */ enum line6_device_type type; @@ -241,9 +241,10 @@ static int snd_toneport_source_put(struct snd_kcontrol *kcontrol, return 1; } -static void toneport_start_pcm(struct timer_list *t) +static void toneport_start_pcm(struct work_struct *work) { - struct usb_line6_toneport *toneport = from_timer(toneport, t, timer); + struct usb_line6_toneport *toneport = + container_of(work, struct usb_line6_toneport, pcm_work.work); struct usb_line6 *line6 = &toneport->line6; line6_pcm_acquire(line6->line6pcm, LINE6_STREAM_MONITOR, true); @@ -393,7 +394,8 @@ static int toneport_setup(struct usb_line6_toneport *toneport) if (toneport_has_led(toneport)) toneport_update_led(toneport); - mod_timer(&toneport->timer, jiffies + TONEPORT_PCM_DELAY * HZ); + schedule_delayed_work(&toneport->pcm_work, + msecs_to_jiffies(TONEPORT_PCM_DELAY * 1000)); return 0; } @@ -405,7 +407,7 @@ static void line6_toneport_disconnect(struct usb_line6 *line6) struct usb_line6_toneport *toneport = (struct usb_line6_toneport *)line6; - del_timer_sync(&toneport->timer); + cancel_delayed_work_sync(&toneport->pcm_work); if (toneport_has_led(toneport)) toneport_remove_leds(toneport); @@ -422,7 +424,7 @@ static int toneport_init(struct usb_line6 *line6, struct usb_line6_toneport *toneport = (struct usb_line6_toneport *) line6; toneport->type = id->driver_info; - timer_setup(&toneport->timer, toneport_start_pcm, 0); + INIT_DELAYED_WORK(&toneport->pcm_work, toneport_start_pcm); line6->disconnect = line6_toneport_disconnect; From 30dda277333eb29f0ce12fc98fce3e421b0f7dc6 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 27 Apr 2019 01:06:46 -0500 Subject: [PATCH 083/152] ALSA: usb-audio: Fix a memory leak bug commit cb5173594d50c72b7bfa14113dfc5084b4d2f726 upstream. In parse_audio_selector_unit(), the string array 'namelist' is allocated through kmalloc_array(), and each string pointer in this array, i.e., 'namelist[]', is allocated through kmalloc() in the following for loop. Then, a control instance 'kctl' is created by invoking snd_ctl_new1(). If an error occurs during the creation process, the string array 'namelist', including all string pointers in the array 'namelist[]', should be freed, before the error code ENOMEM is returned. However, the current code does not free 'namelist[]', resulting in memory leaks. To fix the above issue, free all string pointers 'namelist[]' in a loop. Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index e7d441d0e839..5a10b1b7f6b9 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2679,6 +2679,8 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, kctl = snd_ctl_new1(&mixer_selectunit_ctl, cval); if (! kctl) { usb_audio_err(state->chip, "cannot malloc kcontrol\n"); + for (i = 0; i < desc->bNrInPins; i++) + kfree(namelist[i]); kfree(namelist); kfree(cval); return -ENOMEM; From 8c827cda2864645013a7b78109bfc400464c4b56 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 6 May 2019 22:09:31 +0800 Subject: [PATCH 084/152] ALSA: hda/hdmi - Read the pin sense from register when repolling commit 8c2e6728c2bf95765b724e07d0278ae97cd1ee0d upstream. The driver will check the monitor presence when resuming from suspend, starting poll or interrupt triggers. In these 3 situations, the jack_dirty will be set to 1 first, then the hda_jack.c reads the pin_sense from register, after reading the register, the jack_dirty will be set to 0. But hdmi_repoll_work() is enabled in these 3 situations, It will read the pin_sense a couple of times subsequently, since the jack_dirty is 0 now, It does not read the register anymore, instead it uses the shadow pin_sense which is read at the first time. It is meaningless to check the shadow pin_sense a couple of times, we need to read the register to check the real plugging state, so we set the jack_dirty to 1 in the hdmi_repoll_work(). Signed-off-by: Hui Wang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index cb587dce67a9..da95c581f614 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1660,6 +1660,11 @@ static void hdmi_repoll_eld(struct work_struct *work) container_of(to_delayed_work(work), struct hdmi_spec_per_pin, work); struct hda_codec *codec = per_pin->codec; struct hdmi_spec *spec = codec->spec; + struct hda_jack_tbl *jack; + + jack = snd_hda_jack_tbl_get(codec, per_pin->pin_nid); + if (jack) + jack->jack_dirty = 1; if (per_pin->repoll_count++ > 6) per_pin->repoll_count = 0; From 4ac6316a7c0f994700dfa71abd6d0c5e2a024771 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 6 May 2019 22:09:32 +0800 Subject: [PATCH 085/152] ALSA: hda/hdmi - Consider eld_valid when reporting jack event commit 7f641e26a6df9269cb25dd7a4b0a91d6586ed441 upstream. On the machines with AMD GPU or Nvidia GPU, we often meet this issue: after s3, there are 4 HDMI/DP audio devices in the gnome-sound-setting even there is no any monitors plugged. When this problem happens, we check the /proc/asound/cardX/eld#N.M, we will find the monitor_present=1, eld_valid=0. The root cause is BIOS or GPU driver makes the PRESENCE valid even no monitor plugged, and of course the driver will not get the valid eld_data subsequently. In this situation, we should not report the jack_plugged event, to do so, let us change the function hdmi_present_sense_via_verbs(). In this function, it reads the pin_sense via snd_hda_pin_sense(), after calling this function, the jack_dirty is 0, and before exiting via_verbs(), we change the shadow pin_sense according to both monitor_present and eld_valid, then in the snd_hda_jack_report_sync(), since the jack_dirty is still 0, it will report jack event according to this modified shadow pin_sense. After this change, the driver will not report Jack_is_plugged event through hdmi_present_sense_via_verbs() if monitor_present is 1 and eld_valid is 0. Signed-off-by: Hui Wang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index da95c581f614..35931a18418f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1548,9 +1548,11 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, ret = !repoll || !eld->monitor_present || eld->eld_valid; jack = snd_hda_jack_tbl_get(codec, pin_nid); - if (jack) + if (jack) { jack->block_report = !ret; - + jack->pin_sense = (eld->monitor_present && eld->eld_valid) ? + AC_PINSENSE_PRESENCE : 0; + } mutex_unlock(&per_pin->lock); return ret; } From d33f6063b7c351378ff27503633285df35f7d360 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 26 Apr 2019 16:35:41 +0800 Subject: [PATCH 086/152] ALSA: hda/realtek - EAPD turn on later commit 607ca3bd220f4022e6f5356026b19dafc363863a upstream. Let EAPD turn on after set pin output. [ NOTE: This change is supposed to reduce the possible click noises at (runtime) PM resume. The functionality should be same (i.e. the verbs are executed correctly) no matter which order is, so this should be safe to apply for all codecs -- tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 75a0be2aa9c2..850af4919bf4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -803,11 +803,10 @@ static int alc_init(struct hda_codec *codec) if (spec->init_hook) spec->init_hook(codec); + snd_hda_gen_init(codec); alc_fix_pll(codec); alc_auto_init_amp(codec, spec->init_amp); - snd_hda_gen_init(codec); - snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); return 0; From e13bac4031eb8f4259a69120cc1893cdaabf6038 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Tue, 7 May 2019 17:11:08 -0400 Subject: [PATCH 087/152] ALSA: hdea/realtek - Headset fixup for System76 Gazelle (gaze14) commit 80a5052db75131423b67f38b21958555d7d970e4 upstream. On the System76 Gazelle (gaze14), there is a headset microphone input attached to 0x1a that does not have a jack detect. In order to get it working, the pin configuration needs to be set correctly, and the ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC fixup needs to be applied. This is identical to the patch already applied for the System76 Darter Pro (darp5). Signed-off-by: Jeremy Soller Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 850af4919bf4..c72a8ec5dc2f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6840,6 +6840,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8550, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), From 7295359bd6ac859a66ee11777c5628eccf22d545 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 1 May 2019 15:29:38 +0100 Subject: [PATCH 088/152] ASoC: max98090: Fix restore of DAPM Muxes commit ecb2795c08bc825ebd604997e5be440b060c5b18 upstream. The max98090 driver defines 3 DAPM muxes; one for the right line output (LINMOD Mux), one for the left headphone mixer source (MIXHPLSEL Mux) and one for the right headphone mixer source (MIXHPRSEL Mux). The same bit is used for the mux as well as the DAPM enable, and although the mux can be correctly configured, after playback has completed, the mux will be reset during the disable phase. This is preventing the state of these muxes from being saved and restored correctly on system reboot. Fix this by marking these muxes as SND_SOC_NOPM. Note this has been verified this on the Tegra124 Nyan Big which features the MAX98090 codec. Signed-off-by: Jon Hunter Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/max98090.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index c97f21836c66..f06ae43650a3 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1209,14 +1209,14 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = { &max98090_right_rcv_mixer_controls[0], ARRAY_SIZE(max98090_right_rcv_mixer_controls)), - SND_SOC_DAPM_MUX("LINMOD Mux", M98090_REG_LOUTR_MIXER, - M98090_LINMOD_SHIFT, 0, &max98090_linmod_mux), + SND_SOC_DAPM_MUX("LINMOD Mux", SND_SOC_NOPM, 0, 0, + &max98090_linmod_mux), - SND_SOC_DAPM_MUX("MIXHPLSEL Mux", M98090_REG_HP_CONTROL, - M98090_MIXHPLSEL_SHIFT, 0, &max98090_mixhplsel_mux), + SND_SOC_DAPM_MUX("MIXHPLSEL Mux", SND_SOC_NOPM, 0, 0, + &max98090_mixhplsel_mux), - SND_SOC_DAPM_MUX("MIXHPRSEL Mux", M98090_REG_HP_CONTROL, - M98090_MIXHPRSEL_SHIFT, 0, &max98090_mixhprsel_mux), + SND_SOC_DAPM_MUX("MIXHPRSEL Mux", SND_SOC_NOPM, 0, 0, + &max98090_mixhprsel_mux), SND_SOC_DAPM_PGA("HP Left Out", M98090_REG_OUTPUT_ENABLE, M98090_HPLEN_SHIFT, 0, NULL, 0), From df9f111db8711835dec2b6ac94a87c8018510e49 Mon Sep 17 00:00:00 2001 From: Curtis Malainey Date: Fri, 3 May 2019 12:32:14 -0700 Subject: [PATCH 089/152] ASoC: RT5677-SPI: Disable 16Bit SPI Transfers commit a46eb523220e242affb9a6bc9bb8efc05f4f7459 upstream. The current algorithm allows 3 types of transfers, 16bit, 32bit and burst. According to Realtek, 16bit transfers have a special restriction in that it is restricted to the memory region of 0x18020000 ~ 0x18021000. This region is the memory location of the I2C registers. The current algorithm does not uphold this restriction and therefore fails to complete writes. Since this has been broken for some time it likely no one is using it. Better to simply disable the 16 bit writes. This will allow users to properly load firmware over SPI without data corruption. Signed-off-by: Curtis Malainey Reviewed-by: Ben Zhang Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5677-spi.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/sound/soc/codecs/rt5677-spi.c b/sound/soc/codecs/rt5677-spi.c index bd51f3655ee3..06abcd017650 100644 --- a/sound/soc/codecs/rt5677-spi.c +++ b/sound/soc/codecs/rt5677-spi.c @@ -58,13 +58,15 @@ static DEFINE_MUTEX(spi_mutex); * RT5677_SPI_READ/WRITE_32: Transfer 4 bytes * RT5677_SPI_READ/WRITE_BURST: Transfer any multiples of 8 bytes * - * For example, reading 260 bytes at 0x60030002 uses the following commands: - * 0x60030002 RT5677_SPI_READ_16 2 bytes + * Note: + * 16 Bit writes and reads are restricted to the address range + * 0x18020000 ~ 0x18021000 + * + * For example, reading 256 bytes at 0x60030004 uses the following commands: * 0x60030004 RT5677_SPI_READ_32 4 bytes * 0x60030008 RT5677_SPI_READ_BURST 240 bytes * 0x600300F8 RT5677_SPI_READ_BURST 8 bytes * 0x60030100 RT5677_SPI_READ_32 4 bytes - * 0x60030104 RT5677_SPI_READ_16 2 bytes * * Input: * @read: true for read commands; false for write commands @@ -79,15 +81,13 @@ static u8 rt5677_spi_select_cmd(bool read, u32 align, u32 remain, u32 *len) { u8 cmd; - if (align == 2 || align == 6 || remain == 2) { - cmd = RT5677_SPI_READ_16; - *len = 2; - } else if (align == 4 || remain <= 6) { + if (align == 4 || remain <= 4) { cmd = RT5677_SPI_READ_32; *len = 4; } else { cmd = RT5677_SPI_READ_BURST; - *len = min_t(u32, remain & ~7, RT5677_SPI_BURST_LEN); + *len = (((remain - 1) >> 3) + 1) << 3; + *len = min_t(u32, *len, RT5677_SPI_BURST_LEN); } return read ? cmd : cmd + 1; } @@ -108,7 +108,7 @@ static void rt5677_spi_reverse(u8 *dst, u32 dstlen, const u8 *src, u32 srclen) } } -/* Read DSP address space using SPI. addr and len have to be 2-byte aligned. */ +/* Read DSP address space using SPI. addr and len have to be 4-byte aligned. */ int rt5677_spi_read(u32 addr, void *rxbuf, size_t len) { u32 offset; @@ -124,7 +124,7 @@ int rt5677_spi_read(u32 addr, void *rxbuf, size_t len) if (!g_spi) return -ENODEV; - if ((addr & 1) || (len & 1)) { + if ((addr & 3) || (len & 3)) { dev_err(&g_spi->dev, "Bad read align 0x%x(%zu)\n", addr, len); return -EACCES; } @@ -159,13 +159,13 @@ int rt5677_spi_read(u32 addr, void *rxbuf, size_t len) } EXPORT_SYMBOL_GPL(rt5677_spi_read); -/* Write DSP address space using SPI. addr has to be 2-byte aligned. - * If len is not 2-byte aligned, an extra byte of zero is written at the end +/* Write DSP address space using SPI. addr has to be 4-byte aligned. + * If len is not 4-byte aligned, then extra zeros are written at the end * as padding. */ int rt5677_spi_write(u32 addr, const void *txbuf, size_t len) { - u32 offset, len_with_pad = len; + u32 offset; int status = 0; struct spi_transfer t; struct spi_message m; @@ -178,22 +178,19 @@ int rt5677_spi_write(u32 addr, const void *txbuf, size_t len) if (!g_spi) return -ENODEV; - if (addr & 1) { + if (addr & 3) { dev_err(&g_spi->dev, "Bad write align 0x%x(%zu)\n", addr, len); return -EACCES; } - if (len & 1) - len_with_pad = len + 1; - memset(&t, 0, sizeof(t)); t.tx_buf = buf; t.speed_hz = RT5677_SPI_FREQ; spi_message_init_with_transfers(&m, &t, 1); - for (offset = 0; offset < len_with_pad;) { + for (offset = 0; offset < len;) { spi_cmd = rt5677_spi_select_cmd(false, (addr + offset) & 7, - len_with_pad - offset, &t.len); + len - offset, &t.len); /* Construct SPI message header */ buf[0] = spi_cmd; From 975ef5c2f6ca60305385698862845fec15022f96 Mon Sep 17 00:00:00 2001 From: "S.j. Wang" Date: Sun, 28 Apr 2019 02:24:27 +0000 Subject: [PATCH 090/152] ASoC: fsl_esai: Fix missing break in switch statement commit 903c220b1ece12f17c868e43f2243b8f81ff2d4c upstream. case ESAI_HCKT_EXTAL and case ESAI_HCKR_EXTAL should be independent of each other, so replace fall-through with break. Fixes: 43d24e76b698 ("ASoC: fsl_esai: Add ESAI CPU DAI driver") Signed-off-by: Shengjiu Wang Acked-by: Nicolin Chen Cc: Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_esai.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 38fd32ab443c..ff96db91f818 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -251,7 +251,7 @@ static int fsl_esai_set_dai_sysclk(struct snd_soc_dai *dai, int clk_id, break; case ESAI_HCKT_EXTAL: ecr |= ESAI_ECR_ETI; - /* fall through */ + break; case ESAI_HCKR_EXTAL: ecr |= ESAI_ECR_ERI; break; From f3714257c4220d45f60bf247b6f5550b3afc476e Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Sat, 13 Apr 2019 21:18:12 +0800 Subject: [PATCH 091/152] ASoC: codec: hdac_hdmi add device_link to card device commit 01c8327667c249818d3712c3e25c7ad2aca7f389 upstream. In resume from S3, HDAC HDMI codec driver dapm event callback may be operated before HDMI codec driver turns on the display audio power domain because of the contest between display driver and hdmi codec driver. This patch adds the device_link between soc card device (consumer) and hdmi codec device (supplier) to make sure the sequence is always correct. Signed-off-by: Libin Yang Reviewed-by: Takashi Iwai Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/hdac_hdmi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index b61d518f4fef..63487240b61e 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1828,6 +1828,17 @@ static int hdmi_codec_probe(struct snd_soc_component *component) /* Imp: Store the card pointer in hda_codec */ hdmi->card = dapm->card->snd_card; + /* + * Setup a device_link between card device and HDMI codec device. + * The card device is the consumer and the HDMI codec device is + * the supplier. With this setting, we can make sure that the audio + * domain in display power will be always turned on before operating + * on the HDMI audio codec registers. + * Let's use the flag DL_FLAG_AUTOREMOVE_CONSUMER. This can make + * sure the device link is freed when the machine driver is removed. + */ + device_link_add(component->card->dev, &hdev->dev, DL_FLAG_RPM_ACTIVE | + DL_FLAG_AUTOREMOVE_CONSUMER); /* * hdac_device core already sets the state to active and calls * get_noresume. So enable runtime and set the device to suspend. From 1bfceb375034f7f3222c49e3fbe5bfe820bdcba0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Apr 2019 21:48:21 +0200 Subject: [PATCH 092/152] bpf, arm64: remove prefetch insn in xadd mapping commit 8968c67a82ab7501bc3b9439c3624a49b42fe54c upstream. Prefetch-with-intent-to-write is currently part of the XADD mapping in the AArch64 JIT and follows the kernel's implementation of atomic_add. This may interfere with other threads executing the LDXR/STXR loop, leading to potential starvation and fairness issues. Drop the optional prefetch instruction. Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD") Reported-by: Will Deacon Signed-off-by: Daniel Borkmann Acked-by: Jean-Philippe Brucker Acked-by: Will Deacon Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- arch/arm64/net/bpf_jit.h | 6 ------ arch/arm64/net/bpf_jit_comp.c | 1 - 2 files changed, 7 deletions(-) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 783de51a6c4e..6c881659ee8a 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,12 +100,6 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* Prefetch */ -#define A64_PRFM(Rn, type, target, policy) \ - aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \ - AARCH64_INSN_PRFM_TARGET_##target, \ - AARCH64_INSN_PRFM_POLICY_##policy) - /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a6fdaea07c63..2eef156b38bb 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -736,7 +736,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_STX | BPF_XADD | BPF_DW: emit_a64_mov_i(1, tmp, off, ctx); emit(A64_ADD(1, tmp, tmp, dst), ctx); - emit(A64_PRFM(tmp, PST, L1, STRM), ctx); emit(A64_LDXR(isdw, tmp2, tmp), ctx); emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); From 642de1c00a14f875a20c8fb832bae820f02089d0 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 Apr 2019 16:38:48 +0300 Subject: [PATCH 093/152] crypto: ccree - remove special handling of chained sg commit c4b22bf51b815fb61a35a27fc847a88bc28ebb63 upstream. We were handling chained scattergather lists with specialized code needlessly as the regular sg APIs handle them just fine. The code handling this also had an (unused) code path with a use-before-init error, flagged by Coverity. Remove all special handling of chained sg and leave their handling to the regular sg APIs. Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_buffer_mgr.c | 98 +++++++--------------------- 1 file changed, 22 insertions(+), 76 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 3bcb6bce666e..b9138ad14e1d 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -83,24 +83,17 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, */ static unsigned int cc_get_sgl_nents(struct device *dev, struct scatterlist *sg_list, - unsigned int nbytes, u32 *lbytes, - bool *is_chained) + unsigned int nbytes, u32 *lbytes) { unsigned int nents = 0; while (nbytes && sg_list) { - if (sg_list->length) { - nents++; - /* get the number of bytes in the last entry */ - *lbytes = nbytes; - nbytes -= (sg_list->length > nbytes) ? - nbytes : sg_list->length; - sg_list = sg_next(sg_list); - } else { - sg_list = (struct scatterlist *)sg_page(sg_list); - if (is_chained) - *is_chained = true; - } + nents++; + /* get the number of bytes in the last entry */ + *lbytes = nbytes; + nbytes -= (sg_list->length > nbytes) ? + nbytes : sg_list->length; + sg_list = sg_next(sg_list); } dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes); return nents; @@ -142,7 +135,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, { u32 nents, lbytes; - nents = cc_get_sgl_nents(dev, sg, end, &lbytes, NULL); + nents = cc_get_sgl_nents(dev, sg, end, &lbytes); sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip, (direct == CC_SG_TO_BUF)); } @@ -311,40 +304,10 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, sgl_data->num_of_buffers++; } -static int cc_dma_map_sg(struct device *dev, struct scatterlist *sg, u32 nents, - enum dma_data_direction direction) -{ - u32 i, j; - struct scatterlist *l_sg = sg; - - for (i = 0; i < nents; i++) { - if (!l_sg) - break; - if (dma_map_sg(dev, l_sg, 1, direction) != 1) { - dev_err(dev, "dma_map_page() sg buffer failed\n"); - goto err; - } - l_sg = sg_next(l_sg); - } - return nents; - -err: - /* Restore mapped parts */ - for (j = 0; j < i; j++) { - if (!sg) - break; - dma_unmap_sg(dev, sg, 1, direction); - sg = sg_next(sg); - } - return 0; -} - static int cc_map_sg(struct device *dev, struct scatterlist *sg, unsigned int nbytes, int direction, u32 *nents, u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents) { - bool is_chained = false; - if (sg_is_last(sg)) { /* One entry only case -set to DLLI */ if (dma_map_sg(dev, sg, 1, direction) != 1) { @@ -358,35 +321,21 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, *nents = 1; *mapped_nents = 1; } else { /*sg_is_last*/ - *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes, - &is_chained); + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); if (*nents > max_sg_nents) { *nents = 0; dev_err(dev, "Too many fragments. current %d max %d\n", *nents, max_sg_nents); return -ENOMEM; } - if (!is_chained) { - /* In case of mmu the number of mapped nents might - * be changed from the original sgl nents - */ - *mapped_nents = dma_map_sg(dev, sg, *nents, direction); - if (*mapped_nents == 0) { - *nents = 0; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } - } else { - /*In this case the driver maps entry by entry so it - * must have the same nents before and after map - */ - *mapped_nents = cc_dma_map_sg(dev, sg, *nents, - direction); - if (*mapped_nents != *nents) { - *nents = *mapped_nents; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } + /* In case of mmu the number of mapped nents might + * be changed from the original sgl nents + */ + *mapped_nents = dma_map_sg(dev, sg, *nents, direction); + if (*mapped_nents == 0) { + *nents = 0; + dev_err(dev, "dma_map_sg() sg buffer failed\n"); + return -ENOMEM; } } @@ -571,7 +520,6 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_drvdata *drvdata = dev_get_drvdata(dev); u32 dummy; - bool chained; u32 size_to_unmap = 0; if (areq_ctx->mac_buf_dma_addr) { @@ -636,15 +584,14 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) size_to_unmap += crypto_aead_ivsize(tfm); dma_unmap_sg(dev, req->src, - cc_get_sgl_nents(dev, req->src, size_to_unmap, - &dummy, &chained), + cc_get_sgl_nents(dev, req->src, size_to_unmap, &dummy), DMA_BIDIRECTIONAL); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); dma_unmap_sg(dev, req->dst, cc_get_sgl_nents(dev, req->dst, size_to_unmap, - &dummy, &chained), + &dummy), DMA_BIDIRECTIONAL); } if (drvdata->coherent && @@ -1022,7 +969,6 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, unsigned int size_for_map = req->assoclen + req->cryptlen; struct crypto_aead *tfm = crypto_aead_reqtfm(req); u32 sg_index = 0; - bool chained = false; bool is_gcm4543 = areq_ctx->is_gcm4543; u32 size_to_skip = req->assoclen; @@ -1043,7 +989,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? authsize : 0; src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map, - &src_last_bytes, &chained); + &src_last_bytes); sg_index = areq_ctx->src_sgl->length; //check where the data starts while (sg_index <= size_to_skip) { @@ -1085,7 +1031,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, } dst_mapped_nents = cc_get_sgl_nents(dev, req->dst, size_for_map, - &dst_last_bytes, &chained); + &dst_last_bytes); sg_index = areq_ctx->dst_sgl->length; offset = size_to_skip; @@ -1486,7 +1432,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, dev_dbg(dev, " less than one block: curr_buff=%pK *curr_buff_cnt=0x%X copy_to=%pK\n", curr_buff, *curr_buff_cnt, &curr_buff[*curr_buff_cnt]); areq_ctx->in_nents = - cc_get_sgl_nents(dev, src, nbytes, &dummy, NULL); + cc_get_sgl_nents(dev, src, nbytes, &dummy); sg_copy_to_buffer(src, areq_ctx->in_nents, &curr_buff[*curr_buff_cnt], nbytes); *curr_buff_cnt += nbytes; From 7560c0adad344a4834e2a8e809b6d671d199f0f0 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 Apr 2019 16:38:50 +0300 Subject: [PATCH 094/152] crypto: ccree - fix mem leak on error path commit d574b707c873d6ef1a2a155f8cfcfecd821e9a2e upstream. Fix a memory leak on the error path of IV generation code. Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_ivgen.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ccree/cc_ivgen.c b/drivers/crypto/ccree/cc_ivgen.c index 769458323394..1abec3896a78 100644 --- a/drivers/crypto/ccree/cc_ivgen.c +++ b/drivers/crypto/ccree/cc_ivgen.c @@ -154,9 +154,6 @@ void cc_ivgen_fini(struct cc_drvdata *drvdata) } ivgen_ctx->pool = NULL_SRAM_ADDR; - - /* release "this" context */ - kfree(ivgen_ctx); } /*! @@ -174,10 +171,12 @@ int cc_ivgen_init(struct cc_drvdata *drvdata) int rc; /* Allocate "this" context */ - ivgen_ctx = kzalloc(sizeof(*ivgen_ctx), GFP_KERNEL); + ivgen_ctx = devm_kzalloc(device, sizeof(*ivgen_ctx), GFP_KERNEL); if (!ivgen_ctx) return -ENOMEM; + drvdata->ivgen_handle = ivgen_ctx; + /* Allocate pool's header for initial enc. key/IV */ ivgen_ctx->pool_meta = dma_alloc_coherent(device, CC_IVPOOL_META_SIZE, &ivgen_ctx->pool_meta_dma, @@ -196,8 +195,6 @@ int cc_ivgen_init(struct cc_drvdata *drvdata) goto out; } - drvdata->ivgen_handle = ivgen_ctx; - return cc_init_iv_sram(drvdata); out: From 766121a0a798d62a57b265ae3253508ba8d79a10 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 Apr 2019 16:39:04 +0300 Subject: [PATCH 095/152] crypto: ccree - don't map MAC key on stack commit 874e163759f27e0a9988c5d1f4605e3f25564fd2 upstream. The MAC hash key might be passed to us on stack. Copy it to a slab buffer before mapping to gurantee proper DMA mapping. Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_hash.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index b9313306c36f..7a1c3eeb10e7 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -64,6 +64,7 @@ struct cc_hash_alg { struct hash_key_req_ctx { u32 keylen; dma_addr_t key_dma_addr; + u8 *key; }; /* hash per-session context */ @@ -724,13 +725,20 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, ctx->key_params.keylen = keylen; ctx->key_params.key_dma_addr = 0; ctx->is_hmac = true; + ctx->key_params.key = NULL; if (keylen) { + ctx->key_params.key = kmemdup(key, keylen, GFP_KERNEL); + if (!ctx->key_params.key) + return -ENOMEM; + ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); + dma_map_single(dev, (void *)ctx->key_params.key, keylen, + DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", - key, keylen); + ctx->key_params.key, keylen); + kzfree(ctx->key_params.key); return -ENOMEM; } dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n", @@ -881,6 +889,9 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n", &ctx->key_params.key_dma_addr, ctx->key_params.keylen); } + + kzfree(ctx->key_params.key); + return rc; } @@ -907,11 +918,16 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, ctx->key_params.keylen = keylen; + ctx->key_params.key = kmemdup(key, keylen, GFP_KERNEL); + if (!ctx->key_params.key) + return -ENOMEM; + ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); + dma_map_single(dev, ctx->key_params.key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); + kzfree(ctx->key_params.key); return -ENOMEM; } dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n", @@ -963,6 +979,8 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n", &ctx->key_params.key_dma_addr, ctx->key_params.keylen); + kzfree(ctx->key_params.key); + return rc; } From ca687cdb615918e30b9cb3f21197f8df9bfbef08 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 Apr 2019 16:39:02 +0300 Subject: [PATCH 096/152] crypto: ccree - use correct internal state sizes for export commit f3df82b468f00cca241d96ee3697c9a5e7fb6bd0 upstream. We were computing the size of the import buffer based on the digest size but the 318 and 224 byte variants use 512 and 256 bytes internal state sizes respectfully, thus causing the import buffer to overrun. Fix it by using the right sizes. Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 7a1c3eeb10e7..2cadd7a21844 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -1616,7 +1616,7 @@ static struct cc_hash_template driver_hash[] = { .setkey = cc_hash_setkey, .halg = { .digestsize = SHA224_DIGEST_SIZE, - .statesize = CC_STATE_SIZE(SHA224_DIGEST_SIZE), + .statesize = CC_STATE_SIZE(SHA256_DIGEST_SIZE), }, }, .hash_mode = DRV_HASH_SHA224, @@ -1641,7 +1641,7 @@ static struct cc_hash_template driver_hash[] = { .setkey = cc_hash_setkey, .halg = { .digestsize = SHA384_DIGEST_SIZE, - .statesize = CC_STATE_SIZE(SHA384_DIGEST_SIZE), + .statesize = CC_STATE_SIZE(SHA512_DIGEST_SIZE), }, }, .hash_mode = DRV_HASH_SHA384, From 120ab825c6fd3d6589a9dc056273b5f265e766f5 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 Apr 2019 16:39:05 +0300 Subject: [PATCH 097/152] crypto: ccree - don't map AEAD key and IV on stack commit e8662a6a5f8f7f2cadc0edb934aef622d96ac3ee upstream. The AEAD authenc key and IVs might be passed to us on stack. Copy it to a slab buffer before mapping to gurantee proper DMA mapping. Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_aead.c | 11 ++++++++++- drivers/crypto/ccree/cc_buffer_mgr.c | 15 ++++++++++++--- drivers/crypto/ccree/cc_driver.h | 1 + 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 5852d29ae2da..0669033f5be5 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -415,7 +415,7 @@ static int validate_keys_sizes(struct cc_aead_ctx *ctx) /* This function prepers the user key so it can pass to the hmac processing * (copy to intenral buffer or hash in case of key longer than block */ -static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, +static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, unsigned int keylen) { dma_addr_t key_dma_addr = 0; @@ -428,6 +428,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, unsigned int hashmode; unsigned int idx = 0; int rc = 0; + u8 *key = NULL; struct cc_hw_desc desc[MAX_AEAD_SETKEY_SEQ]; dma_addr_t padded_authkey_dma_addr = ctx->auth_state.hmac.padded_authkey_dma_addr; @@ -446,11 +447,17 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, } if (keylen != 0) { + + key = kmemdup(authkey, keylen, GFP_KERNEL); + if (!key) + return -ENOMEM; + key_dma_addr = dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); + kzfree(key); return -ENOMEM; } if (keylen > blocksize) { @@ -533,6 +540,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key, if (key_dma_addr) dma_unmap_single(dev, key_dma_addr, keylen, DMA_TO_DEVICE); + kzfree(key); + return rc; } diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index b9138ad14e1d..90b4870078fb 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -560,6 +560,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) if (areq_ctx->gen_ctx.iv_dma_addr) { dma_unmap_single(dev, areq_ctx->gen_ctx.iv_dma_addr, hw_iv_size, DMA_BIDIRECTIONAL); + kzfree(areq_ctx->gen_ctx.iv); } /* Release pool */ @@ -664,19 +665,27 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, struct aead_req_ctx *areq_ctx = aead_request_ctx(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct device *dev = drvdata_to_dev(drvdata); + gfp_t flags = cc_gfp_flags(&req->base); int rc = 0; if (!req->iv) { areq_ctx->gen_ctx.iv_dma_addr = 0; + areq_ctx->gen_ctx.iv = NULL; goto chain_iv_exit; } - areq_ctx->gen_ctx.iv_dma_addr = dma_map_single(dev, req->iv, - hw_iv_size, - DMA_BIDIRECTIONAL); + areq_ctx->gen_ctx.iv = kmemdup(req->iv, hw_iv_size, flags); + if (!areq_ctx->gen_ctx.iv) + return -ENOMEM; + + areq_ctx->gen_ctx.iv_dma_addr = + dma_map_single(dev, areq_ctx->gen_ctx.iv, hw_iv_size, + DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, areq_ctx->gen_ctx.iv_dma_addr)) { dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", hw_iv_size, req->iv); + kzfree(areq_ctx->gen_ctx.iv); + areq_ctx->gen_ctx.iv = NULL; rc = -ENOMEM; goto chain_iv_exit; } diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index d608a4faf662..be7f9bd5c559 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -162,6 +162,7 @@ struct cc_alg_template { struct async_gen_req_ctx { dma_addr_t iv_dma_addr; + u8 *iv; enum drv_crypto_direction op_type; }; From 1a4fc3d29632fe86ee461dee9ca8ab26aaa95806 Mon Sep 17 00:00:00 2001 From: Ofir Drang Date: Thu, 18 Apr 2019 16:39:06 +0300 Subject: [PATCH 098/152] crypto: ccree - pm resume first enable the source clk commit 7766dd774d80463cec7b81d90c8672af91de2da1 upstream. On power management resume function first enable the device clk source to allow access to the device registers. Signed-off-by: Ofir Drang Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_pm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index d990f472e89f..d433804dead3 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -42,14 +42,15 @@ int cc_pm_resume(struct device *dev) struct cc_drvdata *drvdata = dev_get_drvdata(dev); dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n"); - cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); - + /* Enables the device source clk */ rc = cc_clk_on(drvdata); if (rc) { dev_err(dev, "failed getting clock back on. We're toast.\n"); return rc; } + cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); + rc = init_cc_regs(drvdata, false); if (rc) { dev_err(dev, "init_cc_regs (%x)\n", rc); From 65f5c14a60118e97bbd8fc1f78232c6ceb0efd63 Mon Sep 17 00:00:00 2001 From: Ofir Drang Date: Thu, 18 Apr 2019 16:39:08 +0300 Subject: [PATCH 099/152] crypto: ccree - HOST_POWER_DOWN_EN should be the last CC access during suspend commit 3499efbeed39d114873267683b9e776bcb34b058 upstream. During power management suspend the driver need to prepare the device for the power down operation and as a last indication write to the HOST_POWER_DOWN_EN register which signals to the hardware that The ccree is ready for power down. Signed-off-by: Ofir Drang Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index d433804dead3..27e363ffb467 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -25,13 +25,13 @@ int cc_pm_suspend(struct device *dev) int rc; dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); - cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); rc = cc_suspend_req_queue(drvdata); if (rc) { dev_err(dev, "cc_suspend_req_queue (%x)\n", rc); return rc; } fini_cc_regs(drvdata); + cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); cc_clk_off(drvdata); return 0; } From 4fb3d87ee7b7d82498fa05d46b8cd6a4a9bf8fe1 Mon Sep 17 00:00:00 2001 From: Ofir Drang Date: Thu, 18 Apr 2019 16:39:09 +0300 Subject: [PATCH 100/152] crypto: ccree - add function to handle cryptocell tee fips error commit 897ab2316910a66bb048f1c9cefa25e6a592dcd7 upstream. Adds function that checks if cryptocell tee fips error occurred and in such case triggers system error through kernel panic. Change fips function to use this new routine. Signed-off-by: Ofir Drang Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_fips.c | 23 +++++++++++++++-------- drivers/crypto/ccree/cc_fips.h | 2 ++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/ccree/cc_fips.c b/drivers/crypto/ccree/cc_fips.c index b4d0a6d983e0..09f708f6418e 100644 --- a/drivers/crypto/ccree/cc_fips.c +++ b/drivers/crypto/ccree/cc_fips.c @@ -72,20 +72,28 @@ static inline void tee_fips_error(struct device *dev) dev_err(dev, "TEE reported error!\n"); } +/* + * This function check if cryptocell tee fips error occurred + * and in such case triggers system error + */ +void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata) +{ + struct device *dev = drvdata_to_dev(p_drvdata); + + if (!cc_get_tee_fips_status(p_drvdata)) + tee_fips_error(dev); +} + /* Deferred service handler, run as interrupt-fired tasklet */ static void fips_dsr(unsigned long devarg) { struct cc_drvdata *drvdata = (struct cc_drvdata *)devarg; - struct device *dev = drvdata_to_dev(drvdata); - u32 irq, state, val; + u32 irq, val; irq = (drvdata->irq & (CC_GPR0_IRQ_MASK)); if (irq) { - state = cc_ioread(drvdata, CC_REG(GPR_HOST)); - - if (state != (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK)) - tee_fips_error(dev); + cc_tee_handle_fips_error(drvdata); } /* after verifing that there is nothing to do, @@ -113,8 +121,7 @@ int cc_fips_init(struct cc_drvdata *p_drvdata) dev_dbg(dev, "Initializing fips tasklet\n"); tasklet_init(&fips_h->tasklet, fips_dsr, (unsigned long)p_drvdata); - if (!cc_get_tee_fips_status(p_drvdata)) - tee_fips_error(dev); + cc_tee_handle_fips_error(p_drvdata); return 0; } diff --git a/drivers/crypto/ccree/cc_fips.h b/drivers/crypto/ccree/cc_fips.h index 645e096a7a82..67d5fbfa09b5 100644 --- a/drivers/crypto/ccree/cc_fips.h +++ b/drivers/crypto/ccree/cc_fips.h @@ -18,6 +18,7 @@ int cc_fips_init(struct cc_drvdata *p_drvdata); void cc_fips_fini(struct cc_drvdata *drvdata); void fips_handler(struct cc_drvdata *drvdata); void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool ok); +void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata); #else /* CONFIG_CRYPTO_FIPS */ @@ -30,6 +31,7 @@ static inline void cc_fips_fini(struct cc_drvdata *drvdata) {} static inline void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool ok) {} static inline void fips_handler(struct cc_drvdata *drvdata) {} +static inline void cc_tee_handle_fips_error(struct cc_drvdata *p_drvdata) {} #endif /* CONFIG_CRYPTO_FIPS */ From 681f3695d514e5154997c9c3d04f4ff7128a4382 Mon Sep 17 00:00:00 2001 From: Ofir Drang Date: Thu, 18 Apr 2019 16:39:10 +0300 Subject: [PATCH 101/152] crypto: ccree - handle tee fips error during power management resume commit 7138377ce10455b7183c6dde4b2c51b33f464c45 upstream. in order to support cryptocell tee fips error that may occurs while cryptocell ree is suspended, an cc_tee_handle_fips_error call added to the cc_pm_resume function. Signed-off-by: Ofir Drang Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/ccree/cc_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 27e363ffb467..79fc0a37ba6e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -11,6 +11,7 @@ #include "cc_ivgen.h" #include "cc_hash.h" #include "cc_pm.h" +#include "cc_fips.h" #define POWER_DOWN_ENABLE 0x01 #define POWER_DOWN_DISABLE 0x00 @@ -50,12 +51,13 @@ int cc_pm_resume(struct device *dev) } cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE); - rc = init_cc_regs(drvdata, false); if (rc) { dev_err(dev, "init_cc_regs (%x)\n", rc); return rc; } + /* check if tee fips error occurred during power down */ + cc_tee_handle_fips_error(drvdata); rc = cc_resume_req_queue(drvdata); if (rc) { From f580a54bbd522f2518fd642f7d4d73ad728e5d58 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 14 May 2019 15:41:38 -0700 Subject: [PATCH 102/152] mm/mincore.c: make mincore() more conservative commit 134fca9063ad4851de767d1768180e5dede9a881 upstream. The semantics of what mincore() considers to be resident is not completely clear, but Linux has always (since 2.3.52, which is when mincore() was initially done) treated it as "page is available in page cache". That's potentially a problem, as that [in]directly exposes meta-information about pagecache / memory mapping state even about memory not strictly belonging to the process executing the syscall, opening possibilities for sidechannel attacks. Change the semantics of mincore() so that it only reveals pagecache information for non-anonymous mappings that belog to files that the calling process could (if it tried to) successfully open for writing; otherwise we'd be including shared non-exclusive mappings, which - is the sidechannel - is not the usecase for mincore(), as that's primarily used for data, not (shared) text [jkosina@suse.cz: v2] Link: http://lkml.kernel.org/r/20190312141708.6652-2-vbabka@suse.cz [mhocko@suse.com: restructure can_do_mincore() conditions] Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1903062342020.19912@cbobk.fhfr.pm Signed-off-by: Jiri Kosina Signed-off-by: Vlastimil Babka Acked-by: Josh Snyder Acked-by: Michal Hocko Originally-by: Linus Torvalds Originally-by: Dominique Martinet Cc: Andy Lutomirski Cc: Dave Chinner Cc: Kevin Easton Cc: Matthew Wilcox Cc: Cyril Hrubis Cc: Tejun Heo Cc: Kirill A. Shutemov Cc: Daniel Gruss Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mincore.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/mm/mincore.c b/mm/mincore.c index fc37afe226e6..2732c8c0764c 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -169,6 +169,22 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } +static inline bool can_do_mincore(struct vm_area_struct *vma) +{ + if (vma_is_anonymous(vma)) + return true; + if (!vma->vm_file) + return false; + /* + * Reveal pagecache information only for non-anonymous mappings that + * correspond to the files the calling process could (if tried) open + * for writing; otherwise we'd be including shared non-exclusive + * mappings, which opens a side channel. + */ + return inode_owner_or_capable(file_inode(vma->vm_file)) || + inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; +} + /* * Do a chunk of "sys_mincore()". We've already checked * all the arguments, we hold the mmap semaphore: we should @@ -189,8 +205,13 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v vma = find_vma(current->mm, addr); if (!vma || addr < vma->vm_start) return -ENOMEM; - mincore_walk.mm = vma->vm_mm; end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); + if (!can_do_mincore(vma)) { + unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); + memset(vec, 1, pages); + return pages; + } + mincore_walk.mm = vma->vm_mm; err = walk_page_range(addr, end, &mincore_walk); if (err < 0) return err; From 58db3813680ef3fd74dea8154124831043dd3860 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 May 2019 17:15:33 -0700 Subject: [PATCH 103/152] mm/huge_memory: fix vmf_insert_pfn_{pmd, pud}() crash, handle unaligned addresses commit fce86ff5802bac3a7b19db171aa1949ef9caac31 upstream. Starting with c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd()") vmf_insert_pfn_pmd() internally calls pmdp_set_access_flags(). That helper enforces a pmd aligned @address argument via VM_BUG_ON() assertion. Update the implementation to take a 'struct vm_fault' argument directly and apply the address alignment fixup internally to fix crash signatures like: kernel BUG at arch/x86/mm/pgtable.c:515! invalid opcode: 0000 [#1] SMP NOPTI CPU: 51 PID: 43713 Comm: java Tainted: G OE 4.19.35 #1 [..] RIP: 0010:pmdp_set_access_flags+0x48/0x50 [..] Call Trace: vmf_insert_pfn_pmd+0x198/0x350 dax_iomap_fault+0xe82/0x1190 ext4_dax_huge_fault+0x103/0x1f0 ? __switch_to_asm+0x40/0x70 __handle_mm_fault+0x3f6/0x1370 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 handle_mm_fault+0xda/0x200 __do_page_fault+0x249/0x4f0 do_page_fault+0x32/0x110 ? page_fault+0x8/0x30 page_fault+0x1e/0x30 Link: http://lkml.kernel.org/r/155741946350.372037.11148198430068238140.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd()") Signed-off-by: Dan Williams Reported-by: Piotr Balcer Tested-by: Yan Ma Tested-by: Pankaj Gupta Reviewed-by: Matthew Wilcox Reviewed-by: Jan Kara Reviewed-by: Aneesh Kumar K.V Cc: Chandan Rajendra Cc: Souptick Joarder Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/dax/device.c | 6 ++---- fs/dax.c | 6 ++---- include/linux/huge_mm.h | 6 ++---- mm/huge_memory.c | 16 ++++++++++------ 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 948806e57cee..a89ebd94c670 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -325,8 +325,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn, - vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD @@ -376,8 +375,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn, - vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, diff --git a/fs/dax.c b/fs/dax.c index 09fa70683c41..004c8ac1117c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1660,8 +1660,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, } trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry); - result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, - write); + result = vmf_insert_pfn_pmd(vmf, pfn, write); break; case IOMAP_UNWRITTEN: case IOMAP_HOLE: @@ -1775,8 +1774,7 @@ static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, break; #ifdef CONFIG_FS_DAX_PMD case PE_SIZE_PMD: - ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, - pfn, true); + ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); break; #endif default: diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index fdcb45999b26..77227224ca88 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -47,10 +47,8 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, pfn_t pfn, bool write); -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud, pfn_t pfn, bool write); +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7d08e89361ee..6fad1864ba03 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -772,11 +772,13 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, pte_free(mm, pgtable); } -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, pfn_t pfn, bool write) +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) { + unsigned long addr = vmf->address & PMD_MASK; + struct vm_area_struct *vma = vmf->vma; pgprot_t pgprot = vma->vm_page_prot; pgtable_t pgtable = NULL; + /* * If we had pmd_special, we could avoid all these restrictions, * but we need to be consistent with PTEs and architectures that @@ -799,7 +801,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, track_pfn_insert(vma, &pgprot, pfn); - insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable); + insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); @@ -848,10 +850,12 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, spin_unlock(ptl); } -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud, pfn_t pfn, bool write) +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) { + unsigned long addr = vmf->address & PUD_MASK; + struct vm_area_struct *vma = vmf->vma; pgprot_t pgprot = vma->vm_page_prot; + /* * If we had pud_special, we could avoid all these restrictions, * but we need to be consistent with PTEs and architectures that @@ -868,7 +872,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, track_pfn_insert(vma, &pgprot, pfn); - insert_pfn_pud(vma, addr, pud, pfn, pgprot, write); + insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); From 0b16b09a723e42b82f60655467ad864809ff7dd2 Mon Sep 17 00:00:00 2001 From: Kai Shen Date: Mon, 13 May 2019 17:15:37 -0700 Subject: [PATCH 104/152] mm/hugetlb.c: don't put_page in lock of hugetlb_lock commit 2bf753e64b4a702e27ce26ff520c59563c62f96b upstream. spinlock recursion happened when do LTP test: #!/bin/bash ./runltp -p -f hugetlb & ./runltp -p -f hugetlb & ./runltp -p -f hugetlb & ./runltp -p -f hugetlb & ./runltp -p -f hugetlb & The dtor returned by get_compound_page_dtor in __put_compound_page may be the function of free_huge_page which will lock the hugetlb_lock, so don't put_page in lock of hugetlb_lock. BUG: spinlock recursion on CPU#0, hugemmap05/1079 lock: hugetlb_lock+0x0/0x18, .magic: dead4ead, .owner: hugemmap05/1079, .owner_cpu: 0 Call trace: dump_backtrace+0x0/0x198 show_stack+0x24/0x30 dump_stack+0xa4/0xcc spin_dump+0x84/0xa8 do_raw_spin_lock+0xd0/0x108 _raw_spin_lock+0x20/0x30 free_huge_page+0x9c/0x260 __put_compound_page+0x44/0x50 __put_page+0x2c/0x60 alloc_surplus_huge_page.constprop.19+0xf0/0x140 hugetlb_acct_memory+0x104/0x378 hugetlb_reserve_pages+0xe0/0x250 hugetlbfs_file_mmap+0xc0/0x140 mmap_region+0x3e8/0x5b0 do_mmap+0x280/0x460 vm_mmap_pgoff+0xf4/0x128 ksys_mmap_pgoff+0xb4/0x258 __arm64_sys_mmap+0x34/0x48 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc Link: http://lkml.kernel.org/r/b8ade452-2d6b-0372-32c2-703644032b47@huawei.com Fixes: 9980d744a0 ("mm, hugetlb: get rid of surplus page accounting tricks") Signed-off-by: Kai Shen Signed-off-by: Feilong Lin Reported-by: Wang Wang Reviewed-by: Oscar Salvador Reviewed-by: Mike Kravetz Reviewed-by: Andrew Morton Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5fb779cda972..98a99f00efc8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1572,8 +1572,9 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, */ if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { SetPageHugeTemporary(page); + spin_unlock(&hugetlb_lock); put_page(page); - page = NULL; + return NULL; } else { h->surplus_huge_pages++; h->surplus_huge_pages_node[page_to_nid(page)]++; From a3ccc156f365a9615890b9fbf99d8a663f2c4b56 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 13 May 2019 17:19:41 -0700 Subject: [PATCH 105/152] hugetlb: use same fault hash key for shared and private mappings commit 1b426bac66e6cc83c9f2d92b96e4e72acf43419a upstream. hugetlb uses a fault mutex hash table to prevent page faults of the same pages concurrently. The key for shared and private mappings is different. Shared keys off address_space and file index. Private keys off mm and virtual address. Consider a private mappings of a populated hugetlbfs file. A fault will map the page from the file and if needed do a COW to map a writable page. Hugetlbfs hole punch uses the fault mutex to prevent mappings of file pages. It uses the address_space file index key. However, private mappings will use a different key and could race with this code to map the file page. This causes problems (BUG) for the page cache remove code as it expects the page to be unmapped. A sample stack is: page dumped because: VM_BUG_ON_PAGE(page_mapped(page)) kernel BUG at mm/filemap.c:169! ... RIP: 0010:unaccount_page_cache_page+0x1b8/0x200 ... Call Trace: __delete_from_page_cache+0x39/0x220 delete_from_page_cache+0x45/0x70 remove_inode_hugepages+0x13c/0x380 ? __add_to_page_cache_locked+0x162/0x380 hugetlbfs_fallocate+0x403/0x540 ? _cond_resched+0x15/0x30 ? __inode_security_revalidate+0x5d/0x70 ? selinux_file_permission+0x100/0x130 vfs_fallocate+0x13f/0x270 ksys_fallocate+0x3c/0x80 __x64_sys_fallocate+0x1a/0x20 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 There seems to be another potential COW issue/race with this approach of different private and shared keys as noted in commit 8382d914ebf7 ("mm, hugetlb: improve page-fault scalability"). Since every hugetlb mapping (even anon and private) is actually a file mapping, just use the address_space index key for all mappings. This results in potentially more hash collisions. However, this should not be the common case. Link: http://lkml.kernel.org/r/20190328234704.27083-3-mike.kravetz@oracle.com Link: http://lkml.kernel.org/r/20190412165235.t4sscoujczfhuiyt@linux-r8p5 Fixes: b5cec28d36f5 ("hugetlbfs: truncate_hugepages() takes a range of pages") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Reviewed-by: Davidlohr Bueso Cc: Joonsoo Kim Cc: "Kirill A . Shutemov" Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 7 ++----- include/linux/hugetlb.h | 4 +--- mm/hugetlb.c | 22 ++++++---------------- mm/userfaultfd.c | 3 +-- 4 files changed, 10 insertions(+), 26 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a3a3d256fb0e..7a24f91af29e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -426,9 +426,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, u32 hash; index = page->index; - hash = hugetlb_fault_mutex_hash(h, current->mm, - &pseudo_vma, - mapping, index, 0); + hash = hugetlb_fault_mutex_hash(h, mapping, index, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* @@ -625,8 +623,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, addr = index * hpage_size; /* mutex taken here, fault path and hole punch */ - hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, - index, addr); + hash = hugetlb_fault_mutex_hash(h, mapping, index, addr); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* See if already present in mapping to avoid alloc/free */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 087fd5f48c91..d34112fb3d52 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -123,9 +123,7 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, - struct vm_area_struct *vma, - struct address_space *mapping, +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, pgoff_t idx, unsigned long address); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 98a99f00efc8..0bbb033d7d8c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3778,8 +3778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * handling userfault. Reacquire after handling * fault to make calling code simpler. */ - hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, - idx, haddr); + hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr); mutex_unlock(&hugetlb_fault_mutex_table[hash]); ret = handle_userfault(&vmf, VM_UFFD_MISSING); mutex_lock(&hugetlb_fault_mutex_table[hash]); @@ -3887,21 +3886,14 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, } #ifdef CONFIG_SMP -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, - struct vm_area_struct *vma, - struct address_space *mapping, +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, pgoff_t idx, unsigned long address) { unsigned long key[2]; u32 hash; - if (vma->vm_flags & VM_SHARED) { - key[0] = (unsigned long) mapping; - key[1] = idx; - } else { - key[0] = (unsigned long) mm; - key[1] = address >> huge_page_shift(h); - } + key[0] = (unsigned long) mapping; + key[1] = idx; hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0); @@ -3912,9 +3904,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, * For uniprocesor systems we always use a single mutex, so just * return 0 and avoid the hashing overhead. */ -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, - struct vm_area_struct *vma, - struct address_space *mapping, +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, pgoff_t idx, unsigned long address) { return 0; @@ -3959,7 +3949,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * get spurious allocation failures if two CPUs race to instantiate * the same page in the page cache. */ - hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr); + hash = hugetlb_fault_mutex_hash(h, mapping, idx, haddr); mutex_lock(&hugetlb_fault_mutex_table[hash]); entry = huge_ptep_get(ptep); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 458acda96f20..7529d3fcc899 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -271,8 +271,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, */ idx = linear_page_index(dst_vma, dst_addr); mapping = dst_vma->vm_file->f_mapping; - hash = hugetlb_fault_mutex_hash(h, dst_mm, dst_vma, mapping, - idx, dst_addr); + hash = hugetlb_fault_mutex_hash(h, mapping, idx, dst_addr); mutex_lock(&hugetlb_fault_mutex_table[hash]); err = -ENOMEM; From 3574bc98e2fe3f66090682dd91ebd9b96d0236f3 Mon Sep 17 00:00:00 2001 From: Shuning Zhang Date: Mon, 13 May 2019 17:15:56 -0700 Subject: [PATCH 106/152] ocfs2: fix ocfs2 read inode data panic in ocfs2_iget commit e091eab028f9253eac5c04f9141bbc9d170acab3 upstream. In some cases, ocfs2_iget() reads the data of inode, which has been deleted for some reason. That will make the system panic. So We should judge whether this inode has been deleted, and tell the caller that the inode is a bad inode. For example, the ocfs2 is used as the backed of nfs, and the client is nfsv3. This issue can be reproduced by the following steps. on the nfs server side, ..../patha/pathb Step 1: The process A was scheduled before calling the function fh_verify. Step 2: The process B is removing the 'pathb', and just completed the call to function dput. Then the dentry of 'pathb' has been deleted from the dcache, and all ancestors have been deleted also. The relationship of dentry and inode was deleted through the function hlist_del_init. The following is the call stack. dentry_iput->hlist_del_init(&dentry->d_u.d_alias) At this time, the inode is still in the dcache. Step 3: The process A call the function ocfs2_get_dentry, which get the inode from dcache. Then the refcount of inode is 1. The following is the call stack. nfsd3_proc_getacl->fh_verify->exportfs_decode_fh->fh_to_dentry(ocfs2_get_dentry) Step 4: Dirty pages are flushed by bdi threads. So the inode of 'patha' is evicted, and this directory was deleted. But the inode of 'pathb' can't be evicted, because the refcount of the inode was 1. Step 5: The process A keep running, and call the function reconnect_path(in exportfs_decode_fh), which call function ocfs2_get_parent of ocfs2. Get the block number of parent directory(patha) by the name of ... Then read the data from disk by the block number. But this inode has been deleted, so the system panic. Process A Process B 1. in nfsd3_proc_getacl | 2. | dput 3. fh_to_dentry(ocfs2_get_dentry) | 4. bdi flush dirty cache | 5. ocfs2_iget | [283465.542049] OCFS2: ERROR (device sdp): ocfs2_validate_inode_block: Invalid dinode #580640: OCFS2_VALID_FL not set [283465.545490] Kernel panic - not syncing: OCFS2: (device sdp): panic forced after error [283465.546889] CPU: 5 PID: 12416 Comm: nfsd Tainted: G W 4.1.12-124.18.6.el6uek.bug28762940v3.x86_64 #2 [283465.548382] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/21/2015 [283465.549657] 0000000000000000 ffff8800a56fb7b8 ffffffff816e839c ffffffffa0514758 [283465.550392] 000000000008dc20 ffff8800a56fb838 ffffffff816e62d3 0000000000000008 [283465.551056] ffff880000000010 ffff8800a56fb848 ffff8800a56fb7e8 ffff88005df9f000 [283465.551710] Call Trace: [283465.552516] [] dump_stack+0x63/0x81 [283465.553291] [] panic+0xcb/0x21b [283465.554037] [] ocfs2_handle_error+0xf0/0xf0 [ocfs2] [283465.554882] [] __ocfs2_error+0x67/0x70 [ocfs2] [283465.555768] [] ocfs2_validate_inode_block+0x229/0x230 [ocfs2] [283465.556683] [] ocfs2_read_blocks+0x46c/0x7b0 [ocfs2] [283465.557408] [] ? ocfs2_inode_cache_io_unlock+0x20/0x20 [ocfs2] [283465.557973] [] ocfs2_read_inode_block_full+0x3b/0x60 [ocfs2] [283465.558525] [] ocfs2_iget+0x4aa/0x880 [ocfs2] [283465.559082] [] ocfs2_get_parent+0x9e/0x220 [ocfs2] [283465.559622] [] reconnect_path+0xb5/0x300 [283465.560156] [] exportfs_decode_fh+0xf6/0x2b0 [283465.560708] [] ? nfsd_proc_getattr+0xa0/0xa0 [nfsd] [283465.561262] [] ? prepare_creds+0x26/0x110 [283465.561932] [] fh_verify+0x350/0x660 [nfsd] [283465.562862] [] ? nfsd_cache_lookup+0x44/0x630 [nfsd] [283465.563697] [] nfsd3_proc_getattr+0x69/0xf0 [nfsd] [283465.564510] [] nfsd_dispatch+0xe0/0x290 [nfsd] [283465.565358] [] ? svc_tcp_adjust_wspace+0x12/0x30 [sunrpc] [283465.566272] [] svc_process_common+0x412/0x6a0 [sunrpc] [283465.567155] [] svc_process+0x123/0x210 [sunrpc] [283465.568020] [] nfsd+0xff/0x170 [nfsd] [283465.568962] [] ? nfsd_destroy+0x80/0x80 [nfsd] [283465.570112] [] kthread+0xcb/0xf0 [283465.571099] [] ? kthread_create_on_node+0x180/0x180 [283465.572114] [] ret_from_fork+0x58/0x90 [283465.573156] [] ? kthread_create_on_node+0x180/0x180 Link: http://lkml.kernel.org/r/1554185919-3010-1-git-send-email-sunny.s.zhang@oracle.com Signed-off-by: Shuning Zhang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: piaojun Cc: "Gang He" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/export.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 4bf8d5854b27..af2888d23de3 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) u64 blkno; struct dentry *parent; struct inode *dir = d_inode(child); + int set; trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); + status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1); + if (status < 0) { + mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status); + parent = ERR_PTR(status); + goto bail; + } + status = ocfs2_inode_lock(dir, NULL, 0); if (status < 0) { if (status != -ENOENT) mlog_errno(status); parent = ERR_PTR(status); - goto bail; + goto unlock_nfs_sync; } status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno); @@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) goto bail_unlock; } + status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set); + if (status < 0) { + if (status == -EINVAL) { + status = -ESTALE; + } else + mlog(ML_ERROR, "test inode bit failed %d\n", status); + parent = ERR_PTR(status); + goto bail_unlock; + } + + trace_ocfs2_get_dentry_test_bit(status, set); + if (!set) { + status = -ESTALE; + parent = ERR_PTR(status); + goto bail_unlock; + } + parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); bail_unlock: ocfs2_inode_unlock(dir, 0); +unlock_nfs_sync: + ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1); + bail: trace_ocfs2_get_parent_end(parent); From 8bae43985571a2c6072eb815ef60d2c08aeefb0c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 14 May 2019 15:40:46 -0700 Subject: [PATCH 107/152] userfaultfd: use RCU to free the task struct when fork fails commit c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 upstream. The task structure is freed while get_mem_cgroup_from_mm() holds rcu_read_lock() and dereferences mm->owner. get_mem_cgroup_from_mm() failing fork() ---- --- task = mm->owner mm->owner = NULL; free(task) if (task) *task; /* use after free */ The fix consists in freeing the task with RCU also in the fork failure case, exactly like it always happens for the regular exit(2) path. That is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm() (left side above) effective to avoid a use after free when dereferencing the task structure. An alternate possible fix would be to defer the delivery of the userfaultfd contexts to the monitor until after fork() is guaranteed to succeed. Such a change would require more changes because it would create a strict ordering dependency where the uffd methods would need to be called beyond the last potentially failing branch in order to be safe. This solution as opposed only adds the dependency to common code to set mm->owner to NULL and to free the task struct that was pointed by mm->owner with RCU, if fork ends up failing. The userfaultfd methods can still be called anywhere during the fork runtime and the monitor will keep discarding orphaned "mm" coming from failed forks in userland. This race condition couldn't trigger if CONFIG_MEMCG was set =n at build time. [aarcange@redhat.com: improve changelog, reduce #ifdefs per Michal] Link: http://lkml.kernel.org/r/20190429035752.4508-1-aarcange@redhat.com Link: http://lkml.kernel.org/r/20190325225636.11635-2-aarcange@redhat.com Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event") Signed-off-by: Andrea Arcangeli Tested-by: zhong jiang Reported-by: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com Cc: Oleg Nesterov Cc: Jann Horn Cc: Hugh Dickins Cc: Mike Rapoport Cc: Mike Kravetz Cc: Peter Xu Cc: Jason Gunthorpe Cc: "Kirill A . Shutemov" Cc: Michal Hocko Cc: zhong jiang Cc: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 64ef113e387e..69874db3fba8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -907,6 +907,15 @@ static void mm_init_aio(struct mm_struct *mm) #endif } +static __always_inline void mm_clear_owner(struct mm_struct *mm, + struct task_struct *p) +{ +#ifdef CONFIG_MEMCG + if (mm->owner == p) + WRITE_ONCE(mm->owner, NULL); +#endif +} + static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { #ifdef CONFIG_MEMCG @@ -1286,6 +1295,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) free_pt: /* don't put binfmt in mmput, we haven't got module yet */ mm->binfmt = NULL; + mm_init_owner(mm, NULL); mmput(mm); fail_nomem: @@ -1617,6 +1627,21 @@ static inline void rcu_copy_process(struct task_struct *p) #endif /* #ifdef CONFIG_TASKS_RCU */ } +static void __delayed_free_task(struct rcu_head *rhp) +{ + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + free_task(tsk); +} + +static __always_inline void delayed_free_task(struct task_struct *tsk) +{ + if (IS_ENABLED(CONFIG_MEMCG)) + call_rcu(&tsk->rcu, __delayed_free_task); + else + free_task(tsk); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2072,8 +2097,10 @@ static __latent_entropy struct task_struct *copy_process( bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: - if (p->mm) + if (p->mm) { + mm_clear_owner(p->mm, p); mmput(p->mm); + } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); @@ -2104,7 +2131,7 @@ static __latent_entropy struct task_struct *copy_process( bad_fork_free: p->state = TASK_DEAD; put_task_stack(p); - free_task(p); + delayed_free_task(p); fork_out: spin_lock_irq(¤t->sighand->siglock); hlist_del_init(&delayed.node); From 770e46b38ebe8c3c93dc00b2ee869a1aa1718928 Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Mon, 13 May 2019 12:17:08 -0700 Subject: [PATCH 108/152] ACPI: PM: Set enable_for_wake for wakeup GPEs during suspend-to-idle commit 2f844b61db8297a1f7a06adf2eb5c43381f2c183 upstream. I noticed that recently multiple systems (chromebooks) couldn't wake from S0ix using LID or Keyboard after updating to a newer kernel. I bisected and it turned up commit f941d3e41da7 ("ACPI: EC / PM: Disable non-wakeup GPEs for suspend-to-idle"). I checked that the issue got fixed if that commit was reverted. I debugged and found that although PNP0C0D:00 (representing the LID) is wake capable and should wakeup the system per the code in acpi_wakeup_gpe_init() and in drivers/acpi/button.c: localhost /sys # cat /proc/acpi/wakeup Device S-state Status Sysfs node LID0 S4 *enabled platform:PNP0C0D:00 CREC S5 *disabled platform:GOOG0004:00 *disabled platform:cros-ec-dev.1.auto *disabled platform:cros-ec-accel.0 *disabled platform:cros-ec-accel.1 *disabled platform:cros-ec-gyro.0 *disabled platform:cros-ec-ring.0 *disabled platform:cros-usbpd-charger.2.auto *disabled platform:cros-usbpd-logger.3.auto D015 S3 *enabled i2c:i2c-ELAN0000:00 PENH S3 *enabled platform:PRP0001:00 XHCI S3 *enabled pci:0000:00:14.0 GLAN S4 *disabled WIFI S3 *disabled pci:0000:00:14.3 localhost /sys # On debugging, I found that its corresponding GPE is not being enabled. The particular GPE's "gpe_register_info->enable_for_wake" does not have any bits set when acpi_enable_all_wakeup_gpes() comes around to use it. I looked at code and could not find any other code path that should set the bits in "enable_for_wake" bitmask for the wake enabled devices for s2idle. [I do see that it happens for S3 in acpi_sleep_prepare()]. Thus I used the same call to enable the GPEs for wake enabled devices, and verified that this fixes the regression I was seeing on multiple of my devices. [ rjw: The problem is that commit f941d3e41da7 ("ACPI: EC / PM: Disable non-wakeup GPEs for suspend-to-idle") forgot to add the acpi_enable_wakeup_devices() call for s2idle along with acpi_enable_all_wakeup_gpes(). ] Fixes: f941d3e41da7 ("ACPI: EC / PM: Disable non-wakeup GPEs for suspend-to-idle") Link: https://bugzilla.kernel.org/show_bug.cgi?id=203579 Signed-off-by: Rajat Jain [ rjw: Subject & changelog ] Cc: 5.0+ # 5.0+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 74c489047f57..847db3edcb5b 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -977,6 +977,8 @@ static int acpi_s2idle_prepare(void) if (acpi_sci_irq_valid()) enable_irq_wake(acpi_sci_irq); + acpi_enable_wakeup_devices(ACPI_STATE_S0); + /* Change the configuration of GPEs to avoid spurious wakeup. */ acpi_enable_all_wakeup_gpes(); acpi_os_wait_events_complete(); @@ -1026,6 +1028,8 @@ static void acpi_s2idle_restore(void) { acpi_enable_all_runtime_gpes(); + acpi_disable_wakeup_devices(ACPI_STATE_S0); + if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); From 5185672f2acf5360878143905c02f62a0d4828fe Mon Sep 17 00:00:00 2001 From: Steve Twiss Date: Fri, 26 Apr 2019 14:33:35 +0100 Subject: [PATCH 109/152] mfd: da9063: Fix OTP control register names to match datasheets for DA9063/63L commit 6b4814a9451add06d457e198be418bf6a3e6a990 upstream. Mismatch between what is found in the Datasheets for DA9063 and DA9063L provided by Dialog Semiconductor, and the register names provided in the MFD registers file. The changes are for the OTP (one-time-programming) control registers. The two naming errors are OPT instead of OTP, and COUNT instead of CONT (i.e. control). Cc: Stable Signed-off-by: Steve Twiss Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/da9063/registers.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index 5d42859cb441..844fc2973392 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -215,9 +215,9 @@ /* DA9063 Configuration registers */ /* OTP */ -#define DA9063_REG_OPT_COUNT 0x101 -#define DA9063_REG_OPT_ADDR 0x102 -#define DA9063_REG_OPT_DATA 0x103 +#define DA9063_REG_OTP_CONT 0x101 +#define DA9063_REG_OTP_ADDR 0x102 +#define DA9063_REG_OTP_DATA 0x103 /* Customer Trim and Configuration */ #define DA9063_REG_T_OFFSET 0x104 From dc6d69bde82987524c4fce84b02019f9bd5c34ec Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 5 May 2019 18:43:22 +0300 Subject: [PATCH 110/152] mfd: max77620: Fix swapped FPS_PERIOD_MAX_US values commit ea611d1cc180fbb56982c83cd5142a2b34881f5c upstream. The FPS_PERIOD_MAX_US definitions are swapped for MAX20024 and MAX77620, fix it. Cc: stable Signed-off-by: Dmitry Osipenko Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/max77620.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mfd/max77620.h b/include/linux/mfd/max77620.h index ad2a9a852aea..b4fd5a7c2aaa 100644 --- a/include/linux/mfd/max77620.h +++ b/include/linux/mfd/max77620.h @@ -136,8 +136,8 @@ #define MAX77620_FPS_PERIOD_MIN_US 40 #define MAX20024_FPS_PERIOD_MIN_US 20 -#define MAX77620_FPS_PERIOD_MAX_US 2560 -#define MAX20024_FPS_PERIOD_MAX_US 5120 +#define MAX20024_FPS_PERIOD_MAX_US 2560 +#define MAX77620_FPS_PERIOD_MAX_US 5120 #define MAX77620_REG_FPS_GPIO1 0x54 #define MAX77620_REG_FPS_GPIO2 0x55 From 6a01793e07639b5a4e321297b92e8909b1bd8467 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Tue, 19 Mar 2019 17:18:07 +0000 Subject: [PATCH 111/152] mtd: spi-nor: intel-spi: Avoid crossing 4K address boundary on read/write commit 2b75ebeea6f4937d4d05ec4982c471cef9a29b7f upstream. It was observed that reads crossing 4K address boundary are failing. This limitation is mentioned in Intel documents: Intel(R) 9 Series Chipset Family Platform Controller Hub (PCH) Datasheet: "5.26.3 Flash Access Program Register Access: * Program Register Accesses are not allowed to cross a 4 KB boundary..." Enhanced Serial Peripheral Interface (eSPI) Interface Base Specification (for Client and Server Platforms): "5.1.4 Address For other memory transactions, the address may start or end at any byte boundary. However, the address and payload length combination must not cross the naturally aligned address boundary of the corresponding Maximum Payload Size. It must not cross a 4 KB address boundary." Avoid this by splitting an operation crossing the boundary into two operations. Fixes: 8afda8b26d01 ("spi-nor: Add support for Intel SPI serial flash controller") Cc: stable@vger.kernel.org Reported-by: Romain Porte Tested-by: Pascal Fabreges Signed-off-by: Alexander Sverdlin Reviewed-by: Tudor Ambarus Acked-by: Mika Westerberg Signed-off-by: Miquel Raynal Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/intel-spi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c index af0a22019516..d60cbf23d9aa 100644 --- a/drivers/mtd/spi-nor/intel-spi.c +++ b/drivers/mtd/spi-nor/intel-spi.c @@ -632,6 +632,10 @@ static ssize_t intel_spi_read(struct spi_nor *nor, loff_t from, size_t len, while (len > 0) { block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + /* Read cannot cross 4K boundary */ + block_size = min_t(loff_t, from + block_size, + round_up(from + 1, SZ_4K)) - from; + writel(from, ispi->base + FADDR); val = readl(ispi->base + HSFSTS_CTL); @@ -685,6 +689,10 @@ static ssize_t intel_spi_write(struct spi_nor *nor, loff_t to, size_t len, while (len > 0) { block_size = min_t(size_t, len, INTEL_SPI_FIFO_SZ); + /* Write cannot cross 4K boundary */ + block_size = min_t(loff_t, to + block_size, + round_up(to + 1, SZ_4K)) - to; + writel(to, ispi->base + FADDR); val = readl(ispi->base + HSFSTS_CTL); From d90824ecb88752664be195a4c76e2b51ea9112ab Mon Sep 17 00:00:00 2001 From: Yifeng Li Date: Tue, 5 Mar 2019 07:02:49 +0800 Subject: [PATCH 112/152] tty: vt.c: Fix TIOCL_BLANKSCREEN console blanking if blankinterval == 0 commit 75ddbc1fb11efac87b611d48e9802f6fe2bb2163 upstream. Previously, in the userspace, it was possible to use the "setterm" command from util-linux to blank the VT console by default, using the following command. According to the man page, > The force option keeps the screen blank even if a key is pressed. It was implemented by calling TIOCL_BLANKSCREEN. case BLANKSCREEN: ioctlarg = TIOCL_BLANKSCREEN; if (ioctl(STDIN_FILENO, TIOCLINUX, &ioctlarg)) warn(_("cannot force blank")); break; However, after Linux 4.12, this command ceased to work anymore, which is unexpected. By inspecting the kernel source, it shows that the issue was triggered by the side-effect from commit a4199f5eb809 ("tty: Disable default console blanking interval"). The console blanking is implemented by function do_blank_screen() in vt.c: "blank_state" will be initialized to "blank_normal_wait" in con_init() if AND ONLY IF ("blankinterval" > 0). If "blankinterval" is 0, "blank_state" will be "blank_off" (== 0), and a call to do_blank_screen() will always abort, even if a forced blanking is required from the user by calling TIOCL_BLANKSCREEN, the console won't be blanked. This behavior is unexpected from a user's point-of-view, since it's not mentioned in any documentation. The setterm man page suggests it will always work, and the kernel comments in uapi/linux/tiocl.h says > /* keep screen blank even if a key is pressed */ > #define TIOCL_BLANKSCREEN 14 To fix it, we simply remove the "blank_state != blank_off" check, as pointed out by Nicolas Pitre, this check doesn't logically make sense and it's safe to remove. Suggested-by: Nicolas Pitre Fixes: a4199f5eb809 ("tty: Disable default console blanking interval") Signed-off-by: Yifeng Li Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 3e5ec1cee059..f93b948acfa5 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4155,8 +4155,6 @@ void do_blank_screen(int entering_gfx) return; } - if (blank_state != blank_normal_wait) - return; blank_state = blank_off; /* don't blank graphics */ From 0fd2df64f14297eb469987059929e3ae793f124a Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 10 Mar 2019 21:24:15 +0000 Subject: [PATCH 113/152] tty/vt: fix write/write race in ioctl(KDSKBSENT) handler commit 46ca3f735f345c9d87383dd3a09fa5d43870770e upstream. The bug manifests as an attempt to access deallocated memory: BUG: unable to handle kernel paging request at ffff9c8735448000 #PF error: [PROT] [WRITE] PGD 288a05067 P4D 288a05067 PUD 288a07067 PMD 7f60c2063 PTE 80000007f5448161 Oops: 0003 [#1] PREEMPT SMP CPU: 6 PID: 388 Comm: loadkeys Tainted: G C 5.0.0-rc6-00153-g5ded5871030e #91 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77M-D3H, BIOS F12 11/14/2013 RIP: 0010:__memmove+0x81/0x1a0 Code: 4c 89 4f 10 4c 89 47 18 48 8d 7f 20 73 d4 48 83 c2 20 e9 a2 00 00 00 66 90 48 89 d1 4c 8b 5c 16 f8 4c 8d 54 17 f8 48 c1 e9 03 48 a5 4d 89 1a e9 0c 01 00 00 0f 1f 40 00 48 89 d1 4c 8b 1e 49 RSP: 0018:ffffa1b9002d7d08 EFLAGS: 00010203 RAX: ffff9c873541af43 RBX: ffff9c873541af43 RCX: 00000c6f105cd6bf RDX: 0000637882e986b6 RSI: ffff9c8735447ffb RDI: ffff9c8735447ffb RBP: ffff9c8739cd3800 R08: ffff9c873b802f00 R09: 00000000fffff73b R10: ffffffffb82b35f1 R11: 00505b1b004d5b1b R12: 0000000000000000 R13: ffff9c873541af3d R14: 000000000000000b R15: 000000000000000c FS: 00007f450c390580(0000) GS:ffff9c873f180000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff9c8735448000 CR3: 00000007e213c002 CR4: 00000000000606e0 Call Trace: vt_do_kdgkb_ioctl+0x34d/0x440 vt_ioctl+0xba3/0x1190 ? __bpf_prog_run32+0x39/0x60 ? mem_cgroup_commit_charge+0x7b/0x4e0 tty_ioctl+0x23f/0x920 ? preempt_count_sub+0x98/0xe0 ? __seccomp_filter+0x67/0x600 do_vfs_ioctl+0xa2/0x6a0 ? syscall_trace_enter+0x192/0x2d0 ksys_ioctl+0x3a/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x54/0xe0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The bug manifests on systemd systems with multiple vtcon devices: # cat /sys/devices/virtual/vtconsole/vtcon0/name (S) dummy device # cat /sys/devices/virtual/vtconsole/vtcon1/name (M) frame buffer device There systemd runs 'loadkeys' tool in tapallel for each vtcon instance. This causes two parallel ioctl(KDSKBSENT) calls to race into adding the same entry into 'func_table' array at: drivers/tty/vt/keyboard.c:vt_do_kdgkb_ioctl() The function has no locking around writes to 'func_table'. The simplest reproducer is to have initrams with the following init on a 8-CPU machine x86_64: #!/bin/sh loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & loadkeys -q windowkeys ru4 & wait The change adds lock on write path only. Reads are still racy. CC: Greg Kroah-Hartman CC: Jiri Slaby Link: https://lkml.org/lkml/2019/2/17/256 Signed-off-by: Sergei Trofimovich Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 88312c6c92cc..0617e87ab343 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -123,6 +123,7 @@ static const int NR_TYPES = ARRAY_SIZE(max_vals); static struct input_handler kbd_handler; static DEFINE_SPINLOCK(kbd_event_lock); static DEFINE_SPINLOCK(led_lock); +static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf' and friends */ static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */ static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ static bool dead_key_next; @@ -1990,11 +1991,12 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) char *p; u_char *q; u_char __user *up; - int sz; + int sz, fnw_sz; int delta; char *first_free, *fj, *fnw; int i, j, k; int ret; + unsigned long flags; if (!capable(CAP_SYS_TTY_CONFIG)) perm = 0; @@ -2037,7 +2039,14 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) goto reterr; } + fnw = NULL; + fnw_sz = 0; + /* race aginst other writers */ + again: + spin_lock_irqsave(&func_buf_lock, flags); q = func_table[i]; + + /* fj pointer to next entry after 'q' */ first_free = funcbufptr + (funcbufsize - funcbufleft); for (j = i+1; j < MAX_NR_FUNC && !func_table[j]; j++) ; @@ -2045,10 +2054,12 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) fj = func_table[j]; else fj = first_free; - + /* buffer usage increase by new entry */ delta = (q ? -strlen(q) : 1) + strlen(kbs->kb_string); + if (delta <= funcbufleft) { /* it fits in current buf */ if (j < MAX_NR_FUNC) { + /* make enough space for new entry at 'fj' */ memmove(fj + delta, fj, first_free - fj); for (k = j; k < MAX_NR_FUNC; k++) if (func_table[k]) @@ -2061,20 +2072,28 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) sz = 256; while (sz < funcbufsize - funcbufleft + delta) sz <<= 1; - fnw = kmalloc(sz, GFP_KERNEL); - if(!fnw) { - ret = -ENOMEM; - goto reterr; + if (fnw_sz != sz) { + spin_unlock_irqrestore(&func_buf_lock, flags); + kfree(fnw); + fnw = kmalloc(sz, GFP_KERNEL); + fnw_sz = sz; + if (!fnw) { + ret = -ENOMEM; + goto reterr; + } + goto again; } if (!q) func_table[i] = fj; + /* copy data before insertion point to new location */ if (fj > funcbufptr) memmove(fnw, funcbufptr, fj - funcbufptr); for (k = 0; k < j; k++) if (func_table[k]) func_table[k] = fnw + (func_table[k] - funcbufptr); + /* copy data after insertion point to new location */ if (first_free > fj) { memmove(fnw + (fj - funcbufptr) + delta, fj, first_free - fj); for (k = j; k < MAX_NR_FUNC; k++) @@ -2087,7 +2106,9 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) funcbufleft = funcbufleft - delta + sz - funcbufsize; funcbufsize = sz; } + /* finally insert item itself */ strcpy(func_table[i], kbs->kb_string); + spin_unlock_irqrestore(&func_buf_lock, flags); break; } ret = 0; From 001fe0dab4eacd9e714b7c9de5f13af2bb3c3e1a Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Sat, 6 Apr 2019 18:57:40 -0400 Subject: [PATCH 114/152] jbd2: check superblock mapped prior to committing commit 742b06b5628f2cd23cb51a034cb54dc33c6162c5 upstream. We hit a BUG at fs/buffer.c:3057 if we detached the nbd device before unmounting ext4 filesystem. The typical chain of events leading to the BUG: jbd2_write_superblock submit_bh submit_bh_wbc BUG_ON(!buffer_mapped(bh)); The block device is removed and all the pages are invalidated. JBD2 was trying to write journal superblock to the block device which is no longer present. Fix this by checking the journal superblock's buffer head prior to submitting. Reported-by: Eric Ren Signed-off-by: Jiufei Xue Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 88f2a49338a1..9e618777c699 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1366,6 +1366,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) journal_superblock_t *sb = journal->j_superblock; int ret; + /* Buffer got discarded which means block device got invalidated */ + if (!buffer_mapped(bh)) + return -EIO; + trace_jbd2_write_superblock(journal, write_flags); if (!(journal->j_flags & JBD2_BARRIER)) write_flags &= ~(REQ_FUA | REQ_PREFLUSH); From 71478ef67d7c39ba735c07ce3e39eae21ad8681a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 6 Apr 2019 18:33:06 -0400 Subject: [PATCH 115/152] ext4: make sanity check in mballoc more strict commit 31562b954b60f02acb91b7349dc6432d3f8c3c5f upstream. The sanity check in mb_find_extent() only checked that returned extent does not extend past blocksize * 8, however it should not extend past EXT4_CLUSTERS_PER_GROUP(sb). This can happen when clusters_per_group < blocksize * 8 and the tail of the bitmap is not properly filled by 1s which happened e.g. when ancient kernels have grown the filesystem. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e29fce2fbf25..cc229f3357f7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1539,7 +1539,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block, ex->fe_len += 1 << order; } - if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) { + if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) { /* Should never happen! (but apparently sometimes does?!?) */ WARN_ON(1); ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent " From f0f805f8b9e72be1d77bc1cc6f85eab0c3fd637a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 10 Apr 2019 00:37:36 -0400 Subject: [PATCH 116/152] ext4: ignore e_value_offs for xattrs with value-in-ea-inode commit e5d01196c0428a206f307e9ee5f6842964098ff0 upstream. In other places in fs/ext4/xattr.c, if e_value_inum is non-zero, the code ignores the value in e_value_offs. The e_value_offs *should* be zero, but we shouldn't depend upon it, since it might not be true in a corrupted/fuzzed file system. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202897 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202877 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 006c277dc22e..f73fc90e5daa 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1700,7 +1700,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* No failures allowed past this point. */ - if (!s->not_found && here->e_value_size && here->e_value_offs) { + if (!s->not_found && here->e_value_size && !here->e_value_inum) { /* Remove the old value. */ void *first_val = s->base + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); From b12a8d80a46e8ab02eb9124a36562a93bbd7224b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 25 Apr 2019 11:44:15 -0400 Subject: [PATCH 117/152] ext4: avoid drop reference to iloc.bh twice commit 8c380ab4b7b59c0c602743810be1b712514eaebc upstream. The reference to iloc.bh has been dropped in ext4_mark_iloc_dirty. However, the reference is dropped again if error occurs during ext4_handle_dirty_metadata, which may result in use-after-free bugs. Fixes: fb265c9cb49e("ext4: add ext4_sb_bread() to disambiguate ENOMEM cases") Signed-off-by: Pan Bian Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index e7ae26e36c9c..4d5c0fc9d23a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -874,6 +874,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); + iloc.bh = NULL; goto errout; } brelse(dind); From 2a18c9c76718f77a08177f1fc20007fab2fdafdd Mon Sep 17 00:00:00 2001 From: Barret Rhoden Date: Thu, 25 Apr 2019 11:55:50 -0400 Subject: [PATCH 118/152] ext4: fix use-after-free race with debug_want_extra_isize commit 7bc04c5c2cc467c5b40f2b03ba08da174a0d5fa7 upstream. When remounting with debug_want_extra_isize, we were not performing the same checks that we do during a normal mount. That allowed us to set a value for s_want_extra_isize that reached outside the s_inode_size. Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate functions") Reported-by: syzbot+f584efa0ac7213c226b7@syzkaller.appspotmail.com Reviewed-by: Jan Kara Signed-off-by: Barret Rhoden Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 58 +++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index abba7ece78e9..a2437d352e09 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3514,6 +3514,37 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } +static void ext4_clamp_want_extra_isize(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + + /* determine the minimum size of new large inodes, if present */ + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && + sbi->s_want_extra_isize == 0) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (ext4_has_feature_extra_isize(sb)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + ext4_msg(sb, KERN_INFO, + "required extra inode space not available"); + } +} + static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; @@ -4388,30 +4419,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } else if (ret) goto failed_mount4a; - /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && - sbi->s_want_extra_isize == 0) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - if (ext4_has_feature_extra_isize(sb)) { - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_want_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_want_extra_isize); - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_min_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_min_extra_isize); - } - } - /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - ext4_msg(sb, KERN_INFO, "required extra inode space not" - "available"); - } + ext4_clamp_want_extra_isize(sb); ext4_set_resv_clusters(sb); @@ -5197,6 +5205,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + ext4_clamp_want_extra_isize(sb); + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "changing journal_checksum " From f795247578aaf398d2af4de902264d194c12222f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 25 Apr 2019 13:06:18 -0400 Subject: [PATCH 119/152] ext4: actually request zeroing of inode table after grow commit 310a997fd74de778b9a4848a64be9cda9f18764a upstream. It is never possible, that number of block groups decreases, since only online grow is supported. But after a growing occured, we have to zero inode tables for just created new block groups. Fixes: 19c5246d2516 ("ext4: add new online resize interface") Signed-off-by: Kirill Tkhai Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5f24fdc140ad..53d57cdf3c4d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -977,7 +977,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err == 0) err = err2; mnt_drop_write_file(filp); - if (!err && (o_group > EXT4_SB(sb)->s_groups_count) && + if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, o_group); From 45123ae534e042e4e21d7e5882f2f22c92f35f46 Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Tue, 30 Apr 2019 23:08:15 -0400 Subject: [PATCH 120/152] ext4: fix ext4_show_options for file systems w/o journal commit 50b29d8f033a7c88c5bc011abc2068b1691ab755 upstream. Instead of removing EXT4_MOUNT_JOURNAL_CHECKSUM from s_def_mount_opt as I assume was intended, all other options were blown away leading to _ext4_show_options() output being incorrect. Fixes: 1e381f60dad9 ("ext4: do not allow journal_opts for fs w/o journal") Signed-off-by: Debabrata Banerjee Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a2437d352e09..cac6ea956659 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4270,7 +4270,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "data=, fs mounted w/o journal"); goto failed_mount_wq; } - sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; From d8925a1fee71acb0fe1496a5fa80bda4c978d257 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 12 Mar 2019 17:10:40 +0800 Subject: [PATCH 121/152] btrfs: Check the first key and level for cached extent buffer commit 448de471cd4cab0cedd15770082567a69a784a11 upstream. [BUG] When reading a file from a fuzzed image, kernel can panic like: BTRFS warning (device loop0): csum failed root 5 ino 270 off 0 csum 0x98f94189 expected csum 0x00000000 mirror 1 assertion failed: !memcmp_extent_buffer(b, &disk_key, offsetof(struct btrfs_leaf, items[0].key), sizeof(disk_key)), file: fs/btrfs/ctree.c, line: 2544 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3500! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:btrfs_search_slot.cold.24+0x61/0x63 [btrfs] Call Trace: btrfs_lookup_csum+0x52/0x150 [btrfs] __btrfs_lookup_bio_sums+0x209/0x640 [btrfs] btrfs_submit_bio_hook+0x103/0x170 [btrfs] submit_one_bio+0x59/0x80 [btrfs] extent_read_full_page+0x58/0x80 [btrfs] generic_file_read_iter+0x2f6/0x9d0 __vfs_read+0x14d/0x1a0 vfs_read+0x8d/0x140 ksys_read+0x52/0xc0 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe [CAUSE] The fuzzed image has a corrupted leaf whose first key doesn't match its parent: checksum tree key (CSUM_TREE ROOT_ITEM 0) node 29741056 level 1 items 14 free 107 generation 19 owner CSUM_TREE fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c ... key (EXTENT_CSUM EXTENT_CSUM 79691776) block 29761536 gen 19 leaf 29761536 items 1 free space 1726 generation 19 owner CSUM_TREE leaf 29761536 flags 0x1(WRITTEN) backref revision 1 fs uuid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 chunk uuid 9af1c3c7-2af5-488b-8553-530bd515f14c item 0 key (EXTENT_CSUM EXTENT_CSUM 8798638964736) itemoff 1751 itemsize 2244 range start 8798638964736 end 8798641262592 length 2297856 When reading the above tree block, we have extent_buffer->refs = 2 in the context: - initial one from __alloc_extent_buffer() alloc_extent_buffer() |- __alloc_extent_buffer() |- atomic_set(&eb->refs, 1) - one being added to fs_info->buffer_radix alloc_extent_buffer() |- check_buffer_tree_ref() |- atomic_inc(&eb->refs) So if even we call free_extent_buffer() in read_tree_block or other similar situation, we only decrease the refs by 1, it doesn't reach 0 and won't be freed right now. The staled eb and its corrupted content will still be kept cached. Furthermore, we have several extra cases where we either don't do first key check or the check is not proper for all callers: - scrub We just don't have first key in this context. - shared tree block One tree block can be shared by several snapshot/subvolume trees. In that case, the first key check for one subvolume doesn't apply to another. So for the above reasons, a corrupted extent buffer can sneak into the buffer cache. [FIX] Call verify_level_key in read_block_for_search to do another verification. For that purpose the function is exported. Due to above reasons, although we can free corrupted extent buffer from cache, we still need the check in read_block_for_search(), for scrub and shared tree blocks. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202757 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202759 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202761 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202767 Link: https://bugzilla.kernel.org/show_bug.cgi?id=202769 Reported-by: Yoon Jungyeon CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 10 ++++++++++ fs/btrfs/disk-io.c | 10 +++++----- fs/btrfs/disk-io.h | 3 +++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 48ac8b7c43a5..79ac1ebabaf7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2436,6 +2436,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (tmp) { /* first we do an atomic uptodate check */ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { + /* + * Do extra check for first_key, eb can be stale due to + * being cached, read from scrub, or have multiple + * parents (shared tree blocks). + */ + if (btrfs_verify_level_key(fs_info, tmp, + parent_level - 1, &first_key, gen)) { + free_extent_buffer(tmp); + return -EUCLEAN; + } *eb_ret = tmp; return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b4f61a3d560a..d67dbbd9e6ed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -408,9 +408,9 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return ret; } -static int verify_level_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int level, - struct btrfs_key *first_key, u64 parent_transid) +int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid) { int found_level; struct btrfs_key found_key; @@ -487,8 +487,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, if (verify_parent_transid(io_tree, eb, parent_transid, 0)) ret = -EIO; - else if (verify_level_key(fs_info, eb, level, - first_key, parent_transid)) + else if (btrfs_verify_level_key(fs_info, eb, level, + first_key, parent_transid)) ret = -EUCLEAN; else break; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 4cccba22640f..7a4a60f26dbf 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,6 +39,9 @@ static inline u64 btrfs_sb_offset(int mirror) struct btrfs_device; struct btrfs_fs_devices; +int btrfs_verify_level_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid); struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 parent_transid, int level, struct btrfs_key *first_key); From 87dcf0c61985e4e6b44d9a95de54373438eb31cd Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 14 Mar 2019 09:52:35 +0200 Subject: [PATCH 122/152] btrfs: Correctly free extent buffer in case btree_read_extent_buffer_pages fails commit 537f38f019fa0b762dbb4c0fc95d7fcce9db8e2d upstream. If a an eb fails to be read for whatever reason - it's corrupted on disk and parent transid/key validations fail or IO for eb pages fail then this buffer must be removed from the buffer cache. Currently the code calls free_extent_buffer if an error occurs. Unfortunately this doesn't achieve the desired behavior since btrfs_find_create_tree_block returns with eb->refs == 2. On the other hand free_extent_buffer will only decrement the refs once leaving it added to the buffer cache radix tree. This enables later code to look up the buffer from the cache and utilize it potentially leading to a crash. The correct way to free the buffer is call free_extent_buffer_stale. This function will correctly call atomic_dec explicitly for the buffer and subsequently call release_extent_buffer which will decrement the final reference thus correctly remove the invalid buffer from buffer cache. This change affects only newly allocated buffers since they have eb->refs == 2. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202755 Reported-by: Jungyeon CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d67dbbd9e6ed..b2dc613ebed2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -995,13 +995,18 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) { struct extent_buffer *buf = NULL; struct inode *btree_inode = fs_info->btree_inode; + int ret; buf = btrfs_find_create_tree_block(fs_info, bytenr); if (IS_ERR(buf)) return; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, WAIT_NONE, 0); - free_extent_buffer(buf); + + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, + WAIT_NONE, 0); + if (ret < 0) + free_extent_buffer_stale(buf); + else + free_extent_buffer(buf); } int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, @@ -1021,12 +1026,12 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, mirror_num); if (ret) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return ret; } if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return -EIO; } else if (extent_buffer_uptodate(buf)) { *eb = buf; @@ -1080,7 +1085,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, level, first_key); if (ret) { - free_extent_buffer(buf); + free_extent_buffer_stale(buf); return ERR_PTR(ret); } return buf; From 8b13bb911f0c0c77d41e5ddc41ad3c127c356b8a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 25 Mar 2019 14:31:21 +0200 Subject: [PATCH 123/152] btrfs: Honour FITRIM range constraints during free space trim commit c2d1b3aae33605a61cbab445d8ae1c708ccd2698 upstream. Up until now trimming the freespace was done irrespective of what the arguments of the FITRIM ioctl were. For example fstrim's -o/-l arguments will be entirely ignored. Fix it by correctly handling those paramter. This requires breaking if the found freespace extent is after the end of the passed range as well as completing trim after trimming fstrim_range::len bytes. Fixes: 499f377f49f0 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c0db7785cede..809c2c307c64 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10789,9 +10789,9 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * held back allocations. */ static int btrfs_trim_free_extents(struct btrfs_device *device, - u64 minlen, u64 *trimmed) + struct fstrim_range *range, u64 *trimmed) { - u64 start = 0, len = 0; + u64 start = range->start, len = 0; int ret; *trimmed = 0; @@ -10834,8 +10834,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, if (!trans) up_read(&fs_info->commit_root_sem); - ret = find_free_dev_extent_start(trans, device, minlen, start, - &start, &len); + ret = find_free_dev_extent_start(trans, device, range->minlen, + start, &start, &len); if (trans) { up_read(&fs_info->commit_root_sem); btrfs_put_transaction(trans); @@ -10848,6 +10848,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } + /* If we are out of the passed range break */ + if (start > range->start + range->len - 1) { + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + + start = max(range->start, start); + len = min(range->len, len); + ret = btrfs_issue_discard(device->bdev, start, len, &bytes); mutex_unlock(&fs_info->chunk_mutex); @@ -10857,6 +10867,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; + /* We've trimmed enough */ + if (*trimmed >= range->len) + break; + if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -10940,8 +10954,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range->minlen, - &group_trimmed); + ret = btrfs_trim_free_extents(device, range, &group_trimmed); if (ret) { dev_failed++; dev_ret = ret; From 74ca0a7671ccbaf5ed93e3d601c645d3f9335ad7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Apr 2019 09:29:36 +0100 Subject: [PATCH 124/152] Btrfs: send, flush dellaloc in order to avoid data loss commit 9f89d5de8631c7930898a601b6612e271aa2261c upstream. When we set a subvolume to read-only mode we do not flush dellaloc for any of its inodes (except if the filesystem is mounted with -o flushoncommit), since it does not affect correctness for any subsequent operations - except for a future send operation. The send operation will not be able to see the delalloc data since the respective file extent items, inode item updates, backreferences, etc, have not hit yet the subvolume and extent trees. Effectively this means data loss, since the send stream will not contain any data from existing delalloc. Another problem from this is that if the writeback starts and finishes while the send operation is in progress, we have the subvolume tree being being modified concurrently which can result in send failing unexpectedly with EIO or hitting runtime errors, assertion failures or hitting BUG_ONs, etc. Simple reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv $ xfs_io -f -c "pwrite -S 0xea 0 108K" /mnt/sv/foo $ btrfs property set /mnt/sv ro true $ btrfs send -f /tmp/send.stream /mnt/sv $ od -t x1 -A d /mnt/sv/foo 0000000 ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea ea * 0110592 $ umount /mnt $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ btrfs receive -f /tmp/send.stream /mnt $ echo $? 0 $ od -t x1 -A d /mnt/sv/foo 0000000 # ---> empty file Since this a problem that affects send only, fix it in send by flushing dellaloc for all the roots used by the send operation before send starts to process the commit roots. This is a problem that affects send since it was introduced (commit 31db9f7c23fbf7 ("Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive")) but backporting it to older kernels has some dependencies: - For kernels between 3.19 and 4.20, it depends on commit 3cd24c698004d2 ("btrfs: use tagged writepage to mitigate livelock of snapshot") because the function btrfs_start_delalloc_snapshot() does not exist before that commit. So one has to either pick that commit or replace the calls to btrfs_start_delalloc_snapshot() in this patch with calls to btrfs_start_delalloc_inodes(). - For kernels older than 3.19 it also requires commit e5fa8f865b3324 ("Btrfs: ensure send always works on roots without orphans") because it depends on the function ensure_commit_roots_uptodate() which that commits introduced. - No dependencies for 5.0+ kernels. A test case for fstests follows soon. CC: stable@vger.kernel.org # 3.19+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 84cb6e5ef36c..635e419f2a2d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6583,6 +6583,38 @@ static int ensure_commit_roots_uptodate(struct send_ctx *sctx) return btrfs_commit_transaction(trans); } +/* + * Make sure any existing dellaloc is flushed for any root used by a send + * operation so that we do not miss any data and we do not race with writeback + * finishing and changing a tree while send is using the tree. This could + * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and + * a send operation then uses the subvolume. + * After flushing delalloc ensure_commit_roots_uptodate() must be called. + */ +static int flush_delalloc_roots(struct send_ctx *sctx) +{ + struct btrfs_root *root = sctx->parent_root; + int ret; + int i; + + if (root) { + ret = btrfs_start_delalloc_snapshot(root); + if (ret) + return ret; + btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); + } + + for (i = 0; i < sctx->clone_roots_cnt; i++) { + root = sctx->clone_roots[i].root; + ret = btrfs_start_delalloc_snapshot(root); + if (ret) + return ret; + btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); + } + + return 0; +} + static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) { spin_lock(&root->root_item_lock); @@ -6807,6 +6839,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) NULL); sort_clone_roots = 1; + ret = flush_delalloc_roots(sctx); + if (ret) + goto out; + ret = ensure_commit_roots_uptodate(sctx); if (ret) goto out; From 0388d45afc5097c8141303d356b3bc1aaad80509 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Apr 2019 14:50:51 +0100 Subject: [PATCH 125/152] Btrfs: do not start a transaction during fiemap commit 03628cdbc64db6262e50d0357960a4e9562676a1 upstream. During fiemap, for regular extents (non inline) we need to check if they are shared and if they are, set the shared bit. Checking if an extent is shared requires checking the delayed references of the currently running transaction, since some reference might have not yet hit the extent tree and be only in the in-memory delayed references. However we were using a transaction join for this, which creates a new transaction when there is no transaction currently running. That means that two more potential failures can happen: creating the transaction and committing it. Further, if no write activity is currently happening in the system, and fiemap calls keep being done, we end up creating and committing transactions that do nothing. In some extreme cases this can result in the commit of the transaction created by fiemap to fail with ENOSPC when updating the root item of a subvolume tree because a join does not reserve any space, leading to a trace like the following: heisenberg kernel: ------------[ cut here ]------------ heisenberg kernel: BTRFS: Transaction aborted (error -28) heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 heisenberg kernel: Call Trace: heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] heisenberg kernel: ? _cond_resched+0x15/0x30 heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] heisenberg kernel: kthread+0x112/0x130 heisenberg kernel: ? kthread_bind+0x30/0x30 heisenberg kernel: ret_from_fork+0x35/0x40 heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- Since fiemap (and btrfs_check_shared()) is a read-only operation, do not do a transaction join to avoid the overhead of creating a new transaction (if there is currently no running transaction) and introducing a potential point of failure when the new transaction gets committed, instead use a transaction attach to grab a handle for the currently running transaction if any. Reported-by: Christoph Anton Mitterer Link: https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ Fixes: afce772e87c36c ("btrfs: fix check_shared for fiemap ioctl") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/backref.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ae750b1574a2..73e715d1a24e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1452,8 +1452,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, * callers (such as fiemap) which want to know whether the extent is * shared but do not need a ref count. * - * This attempts to allocate a transaction in order to account for - * delayed refs, but continues on even when the alloc fails. + * This attempts to attach to the running transaction in order to account for + * delayed refs, but continues on even when no running transaction exists. * * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ @@ -1476,13 +1476,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) tmp = ulist_alloc(GFP_NOFS); roots = ulist_alloc(GFP_NOFS); if (!tmp || !roots) { - ulist_free(tmp); - ulist_free(roots); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - trans = btrfs_join_transaction(root); + trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { + ret = PTR_ERR(trans); + goto out; + } trans = NULL; down_read(&fs_info->commit_root_sem); } else { @@ -1515,6 +1518,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) } else { up_read(&fs_info->commit_root_sem); } +out: ulist_free(tmp); ulist_free(roots); return ret; From 8a8f671b3dadf878967d917682563e9f0028da53 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Apr 2019 11:30:30 +0100 Subject: [PATCH 126/152] Btrfs: do not start a transaction at iterate_extent_inodes() commit bfc61c36260ca990937539cd648ede3cd749bc10 upstream. When finding out which inodes have references on a particular extent, done by backref.c:iterate_extent_inodes(), from the BTRFS_IOC_LOGICAL_INO (both v1 and v2) ioctl and from scrub we use the transaction join API to grab a reference on the currently running transaction, since in order to give accurate results we need to inspect the delayed references of the currently running transaction. However, if there is currently no running transaction, the join operation will create a new transaction. This is inefficient as the transaction will eventually be committed, doing unnecessary IO and introducing a potential point of failure that will lead to a transaction abort due to -ENOSPC, as recently reported [1]. That's because the join, creates the transaction but does not reserve any space, so when attempting to update the root item of the root passed to btrfs_join_transaction(), during the transaction commit, we can end up failling with -ENOSPC. Users of a join operation are supposed to actually do some filesystem changes and reserve space by some means, which is not the case of iterate_extent_inodes(), it is a read-only operation for all contextes from which it is called. The reported [1] -ENOSPC failure stack trace is the following: heisenberg kernel: ------------[ cut here ]------------ heisenberg kernel: BTRFS: Transaction aborted (error -28) heisenberg kernel: WARNING: CPU: 0 PID: 7137 at fs/btrfs/root-tree.c:136 btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: CPU: 0 PID: 7137 Comm: btrfs-transacti Not tainted 4.19.0-4-amd64 #1 Debian 4.19.28-2 heisenberg kernel: Hardware name: FUJITSU LIFEBOOK U757/FJNB2A5, BIOS Version 1.21 03/19/2018 heisenberg kernel: RIP: 0010:btrfs_update_root+0x22b/0x320 [btrfs] (...) heisenberg kernel: RSP: 0018:ffffb5448828bd40 EFLAGS: 00010286 heisenberg kernel: RAX: 0000000000000000 RBX: ffff8ed56bccef50 RCX: 0000000000000006 heisenberg kernel: RDX: 0000000000000007 RSI: 0000000000000092 RDI: ffff8ed6bda166a0 heisenberg kernel: RBP: 00000000ffffffe4 R08: 00000000000003df R09: 0000000000000007 heisenberg kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff8ed63396a078 heisenberg kernel: R13: ffff8ed092d7c800 R14: ffff8ed64f5db028 R15: ffff8ed6bd03d068 heisenberg kernel: FS: 0000000000000000(0000) GS:ffff8ed6bda00000(0000) knlGS:0000000000000000 heisenberg kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 heisenberg kernel: CR2: 00007f46f75f8000 CR3: 0000000310a0a002 CR4: 00000000003606f0 heisenberg kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 heisenberg kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 heisenberg kernel: Call Trace: heisenberg kernel: commit_fs_roots+0x166/0x1d0 [btrfs] heisenberg kernel: ? _cond_resched+0x15/0x30 heisenberg kernel: ? btrfs_run_delayed_refs+0xac/0x180 [btrfs] heisenberg kernel: btrfs_commit_transaction+0x2bd/0x870 [btrfs] heisenberg kernel: ? start_transaction+0x9d/0x3f0 [btrfs] heisenberg kernel: transaction_kthread+0x147/0x180 [btrfs] heisenberg kernel: ? btrfs_cleanup_transaction+0x530/0x530 [btrfs] heisenberg kernel: kthread+0x112/0x130 heisenberg kernel: ? kthread_bind+0x30/0x30 heisenberg kernel: ret_from_fork+0x35/0x40 heisenberg kernel: ---[ end trace 05de912e30e012d9 ]--- So fix that by using the attach API, which does not create a transaction when there is currently no running transaction. [1] https://lore.kernel.org/linux-btrfs/b2a668d7124f1d3e410367f587926f622b3f03a4.camel@scientia.net/ Reported-by: Zygo Blaxell CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/backref.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 73e715d1a24e..2a4f52c7be22 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1908,14 +1908,20 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, extent_item_objectid); if (!search_commit_root) { - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); - } else { - down_read(&fs_info->commit_root_sem); + trans = btrfs_attach_transaction(fs_info->extent_root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && + PTR_ERR(trans) != -EROFS) + return PTR_ERR(trans); + trans = NULL; + } } + if (trans) + btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); + else + down_read(&fs_info->commit_root_sem); + ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, tree_mod_seq_elem.seq, &refs, &extent_item_pos, ignore_offset); @@ -1947,7 +1953,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, free_leaf_list(refs); out: - if (!search_commit_root) { + if (trans) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_end_transaction(trans); } else { From ecfc882f644190e09eaba7cda7dca8a327a1b020 Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Thu, 25 Apr 2019 00:48:31 +0800 Subject: [PATCH 127/152] bcache: fix a race between cache register and cacheset unregister commit a4b732a248d12cbdb46999daf0bf288c011335eb upstream. There is a race between cache device register and cache set unregister. For an already registered cache device, register_bcache will call bch_is_open to iterate through all cachesets and check every cache there. The race occurs if cache_set_free executes at the same time and clears the caches right before ca is dereferenced in bch_is_open_cache. To close the race, let's make sure the clean up work is protected by the bch_register_lock as well. This issue can be reproduced as follows, while true; do echo /dev/XXX> /sys/fs/bcache/register ; done& while true; do echo 1> /sys/block/XXX/bcache/set/unregister ; done & and results in the following oops, [ +0.000053] BUG: unable to handle kernel NULL pointer dereference at 0000000000000998 [ +0.000457] #PF error: [normal kernel read fault] [ +0.000464] PGD 800000003ca9d067 P4D 800000003ca9d067 PUD 3ca9c067 PMD 0 [ +0.000388] Oops: 0000 [#1] SMP PTI [ +0.000269] CPU: 1 PID: 3266 Comm: bash Not tainted 5.0.0+ #6 [ +0.000346] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014 [ +0.000472] RIP: 0010:register_bcache+0x1829/0x1990 [bcache] [ +0.000344] Code: b0 48 83 e8 50 48 81 fa e0 e1 10 c0 0f 84 a9 00 00 00 48 89 c6 48 89 ca 0f b7 ba 54 04 00 00 4c 8b 82 60 0c 00 00 85 ff 74 2f <49> 3b a8 98 09 00 00 74 4e 44 8d 47 ff 31 ff 49 c1 e0 03 eb 0d [ +0.000839] RSP: 0018:ffff92ee804cbd88 EFLAGS: 00010202 [ +0.000328] RAX: ffffffffc010e190 RBX: ffff918b5c6b5000 RCX: ffff918b7d8e0000 [ +0.000399] RDX: ffff918b7d8e0000 RSI: ffffffffc010e190 RDI: 0000000000000001 [ +0.000398] RBP: ffff918b7d318340 R08: 0000000000000000 R09: ffffffffb9bd2d7a [ +0.000385] R10: ffff918b7eb253c0 R11: ffffb95980f51200 R12: ffffffffc010e1a0 [ +0.000411] R13: fffffffffffffff2 R14: 000000000000000b R15: ffff918b7e232620 [ +0.000384] FS: 00007f955bec2740(0000) GS:ffff918b7eb00000(0000) knlGS:0000000000000000 [ +0.000420] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000801] CR2: 0000000000000998 CR3: 000000003cad6000 CR4: 00000000001406e0 [ +0.000837] Call Trace: [ +0.000682] ? _cond_resched+0x10/0x20 [ +0.000691] ? __kmalloc+0x131/0x1b0 [ +0.000710] kernfs_fop_write+0xfa/0x170 [ +0.000733] __vfs_write+0x2e/0x190 [ +0.000688] ? inode_security+0x10/0x30 [ +0.000698] ? selinux_file_permission+0xd2/0x120 [ +0.000752] ? security_file_permission+0x2b/0x100 [ +0.000753] vfs_write+0xa8/0x1a0 [ +0.000676] ksys_write+0x4d/0xb0 [ +0.000699] do_syscall_64+0x3a/0xf0 [ +0.000692] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Liang Chen Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 03bb5cee2b83..2c0d35c882ed 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1511,6 +1511,7 @@ static void cache_set_free(struct closure *cl) bch_btree_cache_free(c); bch_journal_free(c); + mutex_lock(&bch_register_lock); for_each_cache(ca, c, i) if (ca) { ca->set = NULL; @@ -1529,7 +1530,6 @@ static void cache_set_free(struct closure *cl) mempool_exit(&c->search); kfree(c->devices); - mutex_lock(&bch_register_lock); list_del(&c->list); mutex_unlock(&bch_register_lock); From 88681649ed8c88cfe565a55b47728edee91d6d53 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 25 Apr 2019 00:48:33 +0800 Subject: [PATCH 128/152] bcache: never set KEY_PTRS of journal key to 0 in journal_reclaim() commit 1bee2addc0c8470c8aaa65ef0599eeae96dd88bc upstream. In journal_reclaim() ja->cur_idx of each cache will be update to reclaim available journal buckets. Variable 'int n' is used to count how many cache is successfully reclaimed, then n is set to c->journal.key by SET_KEY_PTRS(). Later in journal_write_unlocked(), a for_each_cache() loop will write the jset data onto each cache. The problem is, if all jouranl buckets on each cache is full, the following code in journal_reclaim(), 529 for_each_cache(ca, c, iter) { 530 struct journal_device *ja = &ca->journal; 531 unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; 532 533 /* No space available on this device */ 534 if (next == ja->discard_idx) 535 continue; 536 537 ja->cur_idx = next; 538 k->ptr[n++] = MAKE_PTR(0, 539 bucket_to_sector(c, ca->sb.d[ja->cur_idx]), 540 ca->sb.nr_this_dev); 541 } 542 543 bkey_init(k); 544 SET_KEY_PTRS(k, n); If there is no available bucket to reclaim, the if() condition at line 534 will always true, and n remains 0. Then at line 544, SET_KEY_PTRS() will set KEY_PTRS field of c->journal.key to 0. Setting KEY_PTRS field of c->journal.key to 0 is wrong. Because in journal_write_unlocked() the journal data is written in following loop, 649 for (i = 0; i < KEY_PTRS(k); i++) { 650-671 submit journal data to cache device 672 } If KEY_PTRS field is set to 0 in jouranl_reclaim(), the journal data won't be written to cache device here. If system crahed or rebooted before bkeys of the lost journal entries written into btree nodes, data corruption will be reported during bcache reload after rebooting the system. Indeed there is only one cache in a cache set, there is no need to set KEY_PTRS field in journal_reclaim() at all. But in order to keep the for_each_cache() logic consistent for now, this patch fixes the above problem by not setting 0 KEY_PTRS of journal key, if there is no bucket available to reclaim. Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/journal.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 522c7426f3a0..772258ee1f51 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -540,11 +540,11 @@ static void journal_reclaim(struct cache_set *c) ca->sb.nr_this_dev); } - bkey_init(k); - SET_KEY_PTRS(k, n); - - if (n) + if (n) { + bkey_init(k); + SET_KEY_PTRS(k, n); c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; + } out: if (!journal_full(&c->journal)) __closure_wake_up(&c->journal.wait); @@ -671,6 +671,9 @@ static void journal_write_unlocked(struct closure *cl) ca->journal.seq[ca->journal.cur_idx] = w->data->seq; } + /* If KEY_PTRS(k) == 0, this jset gets lost in air */ + BUG_ON(i == 0); + atomic_dec_bug(&fifo_back(&c->journal.pin)); bch_journal_next(&c->journal); journal_reclaim(c); From f6de0a3b1e66d544c1e4b28b5e864c574baa6dcc Mon Sep 17 00:00:00 2001 From: Kamlakant Patel Date: Wed, 24 Apr 2019 11:50:43 +0000 Subject: [PATCH 129/152] ipmi:ssif: compare block number correctly for multi-part return messages commit 55be8658c7e2feb11a5b5b33ee031791dbd23a69 upstream. According to ipmi spec, block number is a number that is incremented, starting with 0, for each new block of message data returned using the middle transaction. Here, the 'blocknum' is data[0] which always starts from zero(0) and 'ssif_info->multi_pos' starts from 1. So, we need to add +1 to blocknum while comparing with multi_pos. Fixes: 7d6380cd40f79 ("ipmi:ssif: Fix handling of multi-part return messages"). Reported-by: Kiran Kolukuluru Signed-off-by: Kamlakant Patel Message-Id: <1556106615-18722-1-git-send-email-kamlakantp@marvell.com> [Also added a debug log if the block numbers don't match.] Signed-off-by: Corey Minyard Cc: stable@vger.kernel.org # 4.4 Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_ssif.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 76c2010ba672..af44db2dfb68 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -688,12 +688,16 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, /* End of read */ len = ssif_info->multi_len; data = ssif_info->data; - } else if (blocknum != ssif_info->multi_pos) { + } else if (blocknum + 1 != ssif_info->multi_pos) { /* * Out of sequence block, just abort. Block * numbers start at zero for the second block, * but multi_pos starts at one, so the +1. */ + if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) + dev_dbg(&ssif_info->client->dev, + "Received message out of sequence, expected %u, got %u\n", + ssif_info->multi_pos - 1, blocknum); result = -EIO; } else { ssif_inc_stat(ssif_info, received_message_parts); From a80da82d0840b08188252ae262ef77bdf0b08cff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 18 Apr 2019 14:44:27 -0700 Subject: [PATCH 130/152] crypto: ccm - fix incompatibility between "ccm" and "ccm_base" commit 6a1faa4a43f5fabf9cbeaa742d916e7b5e73120f upstream. CCM instances can be created by either the "ccm" template, which only allows choosing the block cipher, e.g. "ccm(aes)"; or by "ccm_base", which allows choosing the ctr and cbcmac implementations, e.g. "ccm_base(ctr(aes-generic),cbcmac(aes-generic))". However, a "ccm_base" instance prevents a "ccm" instance from being registered using the same implementations. Nor will the instance be found by lookups of "ccm". This can be used as a denial of service. Moreover, "ccm_base" instances are never tested by the crypto self-tests, even if there are compatible "ccm" tests. The root cause of these problems is that instances of the two templates use different cra_names. Therefore, fix these problems by making "ccm_base" instances set the same cra_name as "ccm" instances, e.g. "ccm(aes)" instead of "ccm_base(ctr(aes-generic),cbcmac(aes-generic))". This requires extracting the block cipher name from the name of the ctr and cbcmac algorithms. It also requires starting to verify that the algorithms are really ctr and cbcmac using the same block cipher, not something else entirely. But it would be bizarre if anyone were actually using non-ccm-compatible algorithms with ccm_base, so this shouldn't break anyone in practice. Fixes: 4a49b499dfa0 ("[CRYPTO] ccm: Added CCM mode") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ccm.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 0a083342ec8c..8104c564dd31 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -455,7 +455,6 @@ static void crypto_ccm_free(struct aead_instance *inst) static int crypto_ccm_create_common(struct crypto_template *tmpl, struct rtattr **tb, - const char *full_name, const char *ctr_name, const char *mac_name) { @@ -483,7 +482,8 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, mac = __crypto_hash_alg_common(mac_alg); err = -EINVAL; - if (mac->digestsize != 16) + if (strncmp(mac->base.cra_name, "cbcmac(", 7) != 0 || + mac->digestsize != 16) goto out_put_mac; inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); @@ -506,23 +506,27 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, ctr = crypto_spawn_skcipher_alg(&ictx->ctr); - /* Not a stream cipher? */ + /* The skcipher algorithm must be CTR mode, using 16-byte blocks. */ err = -EINVAL; - if (ctr->base.cra_blocksize != 1) + if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 || + crypto_skcipher_alg_ivsize(ctr) != 16 || + ctr->base.cra_blocksize != 1) goto err_drop_ctr; - /* We want the real thing! */ - if (crypto_skcipher_alg_ivsize(ctr) != 16) + /* ctr and cbcmac must use the same underlying block cipher. */ + if (strcmp(ctr->base.cra_name + 4, mac->base.cra_name + 7) != 0) goto err_drop_ctr; err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "ccm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME) + goto err_drop_ctr; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", ctr->base.cra_driver_name, mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_drop_ctr; - memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME); - inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = (mac->base.cra_priority + ctr->base.cra_priority) / 2; @@ -564,7 +568,6 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; char mac_name[CRYPTO_MAX_ALG_NAME]; - char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) @@ -578,12 +581,7 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) cipher_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >= - CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name, - mac_name); + return crypto_ccm_create_common(tmpl, tb, ctr_name, mac_name); } static struct crypto_template crypto_ccm_tmpl = { @@ -596,23 +594,17 @@ static int crypto_ccm_base_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *ctr_name; - const char *cipher_name; - char full_name[CRYPTO_MAX_ALG_NAME]; + const char *mac_name; ctr_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(ctr_name)) return PTR_ERR(ctr_name); - cipher_name = crypto_attr_alg_name(tb[2]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); + mac_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(mac_name)) + return PTR_ERR(mac_name); - if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", - ctr_name, cipher_name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name, - cipher_name); + return crypto_ccm_create_common(tmpl, tb, ctr_name, mac_name); } static struct crypto_template crypto_ccm_base_tmpl = { From 986d3453bee4e932b76261679e65881021eb2a8b Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Fri, 17 May 2019 14:31:44 -0700 Subject: [PATCH 131/152] fs/writeback.c: use rcu_barrier() to wait for inflight wb switches going into workqueue when umount commit ec084de929e419e51bcdafaafe567d9e7d0273b7 upstream. synchronize_rcu() didn't wait for call_rcu() callbacks, so inode wb switch may not go to the workqueue after synchronize_rcu(). Thus previous scheduled switches was not finished even flushing the workqueue, which will cause a NULL pointer dereferenced followed below. VFS: Busy inodes after unmount of vdd. Self-destruct in 5 seconds. Have a nice day... BUG: unable to handle kernel NULL pointer dereference at 0000000000000278 evict+0xb3/0x180 iput+0x1b0/0x230 inode_switch_wbs_work_fn+0x3c0/0x6a0 worker_thread+0x4e/0x490 ? process_one_work+0x410/0x410 kthread+0xe6/0x100 ret_from_fork+0x39/0x50 Replace the synchronize_rcu() call with a rcu_barrier() to wait for all pending callbacks to finish. And inc isw_nr_in_flight after call_rcu() in inode_switch_wbs() to make more sense. Link: http://lkml.kernel.org/r/20190429024108.54150-1-jiufei.xue@linux.alibaba.com Signed-off-by: Jiufei Xue Acked-by: Tejun Heo Suggested-by: Tejun Heo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 82ce6d4f7e31..9544e2f8b79f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -530,8 +530,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) isw->inode = inode; - atomic_inc(&isw_nr_in_flight); - /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the i_page @@ -539,6 +537,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); + + atomic_inc(&isw_nr_in_flight); + goto out_unlock; out_free: @@ -908,7 +909,11 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, void cgroup_writeback_umount(void) { if (atomic_read(&isw_nr_in_flight)) { - synchronize_rcu(); + /* + * Use rcu_barrier() to wait for all pending callbacks to + * ensure that all in-flight wb switches are in the workqueue. + */ + rcu_barrier(); flush_workqueue(isw_wq); } } From c907ce3fd5528cf0483bcac7db55a3d82d35e000 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 25 Apr 2019 06:35:06 -0700 Subject: [PATCH 132/152] tty: Don't force RISCV SBI console as preferred console commit f91253a3d005796404ae0e578b3394459b5f9b71 upstream. The Linux kernel will auto-disables all boot consoles whenever it gets a preferred real console. Currently on RISC-V systems, if we have a real console which is not RISCV SBI console then boot consoles (such as earlycon=sbi) are not auto-disabled when a real console (ttyS0 or ttySIF0) is available. This results in duplicate prints at boot-time after kernel starts using real console (i.e. ttyS0 or ttySIF0) if "earlycon=" kernel parameter was passed by bootloader. The reason for above issue is that RISCV SBI console always adds itself as preferred console which is causing other real consoles to be not used as preferred console. Ideally "console=" kernel parameter passed by bootloaders should be the one selecting a preferred real console. This patch fixes above issue by not forcing RISCV SBI console as preferred console. Fixes: afa6b1ccfad5 ("tty: New RISC-V SBI console driver") Cc: stable@vger.kernel.org Signed-off-by: Anup Patel Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_riscv_sbi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_riscv_sbi.c b/drivers/tty/hvc/hvc_riscv_sbi.c index 75155bde2b88..31f53fa77e4a 100644 --- a/drivers/tty/hvc/hvc_riscv_sbi.c +++ b/drivers/tty/hvc/hvc_riscv_sbi.c @@ -53,7 +53,6 @@ device_initcall(hvc_sbi_init); static int __init hvc_sbi_console_init(void) { hvc_instantiate(0, 0, &hvc_sbi_ops); - add_preferred_console("hvc", 0, NULL); return 0; } From 25d010f4e0ece1ddf0d8d57942c0b0f1568fe498 Mon Sep 17 00:00:00 2001 From: Sriram Rajagopalan Date: Fri, 10 May 2019 19:28:06 -0400 Subject: [PATCH 133/152] ext4: zero out the unused memory region in the extent tree block commit 592acbf16821288ecdc4192c47e3774a4c48bb64 upstream. This commit zeroes out the unused memory region in the buffer_head corresponding to the extent metablock after writing the extent header and the corresponding extent node entries. This is done to prevent random uninitialized data from getting into the filesystem when the extent block is synced. This fixes CVE-2019-11833. Signed-off-by: Sriram Rajagopalan Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 72a361d5ef74..45aea792d22a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1035,6 +1035,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ int err = 0; + size_t ext_size = 0; /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ @@ -1126,6 +1127,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries); + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1205,6 +1210,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries)); + memset(bh->b_data + ext_size, 0, + inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1270,6 +1280,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock, goal = 0; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0; + size_t ext_size = 0; /* Try to prepend new index to old one */ if (ext_depth(inode)) @@ -1295,9 +1306,11 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, goto out; } + ext_size = sizeof(EXT4_I(inode)->i_data); /* move top-level index/leaf into new block */ - memmove(bh->b_data, EXT4_I(inode)->i_data, - sizeof(EXT4_I(inode)->i_data)); + memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size); + /* zero out unused area in the extent block */ + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); /* set size of new block */ neh = ext_block_hdr(bh); From 0db24122bd7f06e9a7ae24dbda6411b477f5b9dd Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 10 May 2019 21:45:33 -0400 Subject: [PATCH 134/152] ext4: fix data corruption caused by overlapping unaligned and aligned IO commit 57a0da28ced8707cb9f79f071a016b9d005caf5a upstream. Unaligned AIO must be serialized because the zeroing of partial blocks of unaligned AIO can result in data corruption in case it's overlapping another in flight IO. Currently we wait for all unwritten extents before we submit unaligned AIO which protects data in case of unaligned AIO is following overlapping IO. However if a unaligned AIO is followed by overlapping aligned AIO we can still end up corrupting data. To fix this, we must make sure that the unaligned AIO is the only IO in flight by waiting for unwritten extents conversion not just before the IO submission, but right after it as well. This problem can be reproduced by xfstest generic/538 Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 98ec11f69cd4..2c5baa5e8291 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -264,6 +264,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } ret = __generic_file_write_iter(iocb, from); + /* + * Unaligned direct AIO must be the only IO in flight. Otherwise + * overlapping aligned IO after unaligned might result in data + * corruption. + */ + if (ret == -EIOCBQUEUED && unaligned_aio) + ext4_unwritten_wait(inode); inode_unlock(inode); if (ret > 0) From c19db366c0a809027289c470051fba31fbddbe76 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 10 May 2019 22:00:33 -0400 Subject: [PATCH 135/152] ext4: fix use-after-free in dx_release() commit 08fc98a4d6424af66eb3ac4e2cedd2fc927ed436 upstream. The buffer_head (frames[0].bh) and it's corresping page can be potentially free'd once brelse() is done inside the for loop but before the for loop exits in dx_release(). It can be free'd in another context, when the page cache is flushed via drop_caches_sysctl_handler(). This results into below data abort when accessing info->indirect_levels in dx_release(). Unable to handle kernel paging request at virtual address ffffffc17ac3e01e Call trace: dx_release+0x70/0x90 ext4_htree_fill_tree+0x2d4/0x300 ext4_readdir+0x244/0x6f8 iterate_dir+0xbc/0x160 SyS_getdents64+0x94/0x174 Signed-off-by: Sahitya Tummala Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4f8de2b9e87e..4c5aa5df6573 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -871,12 +871,15 @@ static void dx_release(struct dx_frame *frames) { struct dx_root_info *info; int i; + unsigned int indirect_levels; if (frames[0].bh == NULL) return; info = &((struct dx_root *)frames[0].bh->b_data)->info; - for (i = 0; i <= info->indirect_levels; i++) { + /* save local copy, "info" may be freed after brelse() */ + indirect_levels = info->indirect_levels; + for (i = 0; i <= indirect_levels; i++) { if (frames[i].bh == NULL) break; brelse(frames[i].bh); From 316063bf7d11c6a6feb8aba51987cf23181544aa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 17 May 2019 17:37:18 -0400 Subject: [PATCH 136/152] ext4: avoid panic during forced reboot due to aborted journal commit 2c1d0e3631e5732dba98ef49ac0bec1388776793 upstream. Handling of aborted journal is a special code path different from standard ext4_error() one and it can call panic() as well. Commit 1dc1097ff60e ("ext4: avoid panic during forced reboot") forgot to update this path so fix that omission. Fixes: 1dc1097ff60e ("ext4: avoid panic during forced reboot") Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org # 5.1 Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cac6ea956659..07d73fb22b60 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -698,7 +698,7 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) { + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { if (EXT4_SB(sb)->s_journal && !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) return; From ae31551237041e10bba456c2ed0b82939872ff40 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Fri, 10 May 2019 10:15:07 -0400 Subject: [PATCH 137/152] ALSA: hda/realtek - Corrected fixup for System76 Gazelle (gaze14) commit 891afcf2462d2cc4ef7caf94215358ca61fa32cb upstream. A mistake was made in the identification of the four variants of the System76 Gazelle (gaze14). This patch corrects the PCI ID of the 17-inch, GTX 1660 Ti variant from 0x8560 to 0x8551. This patch also adds the correct fixups for the 15-inch and 17-inch GTX 1650 variants with PCI IDs 0x8560 and 0x8561. Tests were done on all four variants ensuring full audio capability. Fixes: 80a5052db751 ("ALSA: hdea/realtek - Headset fixup for System76 Gazelle (gaze14)") Signed-off-by: Jeremy Soller Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c72a8ec5dc2f..3ffb6d99c271 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6841,7 +6841,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8550, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), From e0e1dc65bb13255157dbdc795ecbf899589f5016 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 10 May 2019 16:28:57 +0800 Subject: [PATCH 138/152] ALSA: hda/realtek - Fixup headphone noise via runtime suspend commit dad3197da7a3817f27bb24f7fd3c135ffa707202 upstream. Dell platform with ALC298. system enter to runtime suspend. Headphone had noise. Let Headset Mic not shutup will solve this issue. [ Fixed minor coding style issues by tiwai ] Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 59 +++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3ffb6d99c271..73157e8a8548 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -477,12 +477,45 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on) set_eapd(codec, *p, on); } +static int find_ext_mic_pin(struct hda_codec *codec); + +static void alc_headset_mic_no_shutup(struct hda_codec *codec) +{ + const struct hda_pincfg *pin; + int mic_pin = find_ext_mic_pin(codec); + int i; + + /* don't shut up pins when unloading the driver; otherwise it breaks + * the default pin setup at the next load of the driver + */ + if (codec->bus->shutdown) + return; + + snd_array_for_each(&codec->init_pins, i, pin) { + /* use read here for syncing after issuing each verb */ + if (pin->nid != mic_pin) + snd_hda_codec_read(codec, pin->nid, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0); + } + + codec->pins_shutup = 1; +} + static void alc_shutup_pins(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->no_shutup_pins) - snd_hda_shutup_pins(codec); + switch (codec->core.vendor_id) { + case 0x10ec0286: + case 0x10ec0288: + case 0x10ec0298: + alc_headset_mic_no_shutup(codec); + break; + default: + if (!spec->no_shutup_pins) + snd_hda_shutup_pins(codec); + break; + } } /* generic shutup callback; @@ -2923,27 +2956,6 @@ static int alc269_parse_auto_config(struct hda_codec *codec) return alc_parse_auto_config(codec, alc269_ignore, ssids); } -static int find_ext_mic_pin(struct hda_codec *codec); - -static void alc286_shutup(struct hda_codec *codec) -{ - const struct hda_pincfg *pin; - int i; - int mic_pin = find_ext_mic_pin(codec); - /* don't shut up pins when unloading the driver; otherwise it breaks - * the default pin setup at the next load of the driver - */ - if (codec->bus->shutdown) - return; - snd_array_for_each(&codec->init_pins, i, pin) { - /* use read here for syncing after issuing each verb */ - if (pin->nid != mic_pin) - snd_hda_codec_read(codec, pin->nid, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0); - } - codec->pins_shutup = 1; -} - static void alc269vb_toggle_power_output(struct hda_codec *codec, int power_up) { alc_update_coef_idx(codec, 0x04, 1 << 11, power_up ? (1 << 11) : 0); @@ -7611,7 +7623,6 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0286: case 0x10ec0288: spec->codec_variant = ALC269_TYPE_ALC286; - spec->shutup = alc286_shutup; break; case 0x10ec0298: spec->codec_variant = ALC269_TYPE_ALC298; From 95482af27161083976c7f5bdf347992deb3767ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wadowski?= Date: Tue, 14 May 2019 16:58:00 +0200 Subject: [PATCH 139/152] ALSA: hda/realtek - Fix for Lenovo B50-70 inverted internal microphone bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56df90b631fc027fe28b70d41352d820797239bb upstream. Add patch for realtek codec in Lenovo B50-70 that fixes inverted internal microphone channel. Device IdeaPad Y410P has the same PCI SSID as Lenovo B50-70, but first one is about fix the noise and it didn't seem help in a later kernel version. So I replaced IdeaPad Y410P device description with B50-70 and apply inverted microphone fix. Bugzilla: https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1524215 Signed-off-by: Michał Wadowski Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 73157e8a8548..adce5b60d5b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6898,7 +6898,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), - SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x501e, "Thinkpad L440", ALC292_FIXUP_TPT440_DOCK), From 5b8567682489dbbb0742c9fc3f6711bb6a01f540 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 10 May 2019 21:15:47 -0400 Subject: [PATCH 140/152] jbd2: fix potential double free commit 0d52154bb0a700abb459a2cbce0a30fc2549b67e upstream. When failing from creating cache jbd2_inode_cache, we will destroy the previously created cache jbd2_handle_cache twice. This patch fixes this by moving each cache initialization/destruction to its own separate, individual function. Signed-off-by: Chengguang Xu Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/journal.c | 53 +++++++++++++++++++++++++++---------------- fs/jbd2/revoke.c | 32 ++++++++++++++++---------- fs/jbd2/transaction.c | 8 ++++--- include/linux/jbd2.h | 8 ++++--- 4 files changed, 63 insertions(+), 38 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9e618777c699..e9cf88f0bc29 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2389,22 +2389,19 @@ static struct kmem_cache *jbd2_journal_head_cache; static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif -static int jbd2_journal_init_journal_head_cache(void) +static int __init jbd2_journal_init_journal_head_cache(void) { - int retval; - - J_ASSERT(jbd2_journal_head_cache == NULL); + J_ASSERT(!jbd2_journal_head_cache); jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, NULL); /* ctor */ - retval = 0; if (!jbd2_journal_head_cache) { - retval = -ENOMEM; printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); + return -ENOMEM; } - return retval; + return 0; } static void jbd2_journal_destroy_journal_head_cache(void) @@ -2650,28 +2647,38 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void) struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; -static int __init jbd2_journal_init_handle_cache(void) +static int __init jbd2_journal_init_inode_cache(void) { - jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); - if (jbd2_handle_cache == NULL) { - printk(KERN_EMERG "JBD2: failed to create handle cache\n"); - return -ENOMEM; - } + J_ASSERT(!jbd2_inode_cache); jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); - if (jbd2_inode_cache == NULL) { - printk(KERN_EMERG "JBD2: failed to create inode cache\n"); - kmem_cache_destroy(jbd2_handle_cache); + if (!jbd2_inode_cache) { + pr_emerg("JBD2: failed to create inode cache\n"); return -ENOMEM; } return 0; } +static int __init jbd2_journal_init_handle_cache(void) +{ + J_ASSERT(!jbd2_handle_cache); + jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); + if (!jbd2_handle_cache) { + printk(KERN_EMERG "JBD2: failed to create handle cache\n"); + return -ENOMEM; + } + return 0; +} + +static void jbd2_journal_destroy_inode_cache(void) +{ + kmem_cache_destroy(jbd2_inode_cache); + jbd2_inode_cache = NULL; +} + static void jbd2_journal_destroy_handle_cache(void) { kmem_cache_destroy(jbd2_handle_cache); jbd2_handle_cache = NULL; - kmem_cache_destroy(jbd2_inode_cache); - jbd2_inode_cache = NULL; } /* @@ -2682,11 +2689,15 @@ static int __init journal_init_caches(void) { int ret; - ret = jbd2_journal_init_revoke_caches(); + ret = jbd2_journal_init_revoke_record_cache(); + if (ret == 0) + ret = jbd2_journal_init_revoke_table_cache(); if (ret == 0) ret = jbd2_journal_init_journal_head_cache(); if (ret == 0) ret = jbd2_journal_init_handle_cache(); + if (ret == 0) + ret = jbd2_journal_init_inode_cache(); if (ret == 0) ret = jbd2_journal_init_transaction_cache(); return ret; @@ -2694,9 +2705,11 @@ static int __init journal_init_caches(void) static void jbd2_journal_destroy_caches(void) { - jbd2_journal_destroy_revoke_caches(); + jbd2_journal_destroy_revoke_record_cache(); + jbd2_journal_destroy_revoke_table_cache(); jbd2_journal_destroy_journal_head_cache(); jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_inode_cache(); jbd2_journal_destroy_transaction_cache(); jbd2_journal_destroy_slabs(); } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a1143e57a718..69b9bc329964 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -178,33 +178,41 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } -void jbd2_journal_destroy_revoke_caches(void) +void jbd2_journal_destroy_revoke_record_cache(void) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; +} + +void jbd2_journal_destroy_revoke_table_cache(void) +{ kmem_cache_destroy(jbd2_revoke_table_cache); jbd2_revoke_table_cache = NULL; } -int __init jbd2_journal_init_revoke_caches(void) +int __init jbd2_journal_init_revoke_record_cache(void) { J_ASSERT(!jbd2_revoke_record_cache); - J_ASSERT(!jbd2_revoke_table_cache); - jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); - if (!jbd2_revoke_record_cache) - goto record_cache_failure; + if (!jbd2_revoke_record_cache) { + pr_emerg("JBD2: failed to create revoke_record cache\n"); + return -ENOMEM; + } + return 0; +} + +int __init jbd2_journal_init_revoke_table_cache(void) +{ + J_ASSERT(!jbd2_revoke_table_cache); jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, SLAB_TEMPORARY); - if (!jbd2_revoke_table_cache) - goto table_cache_failure; - return 0; -table_cache_failure: - jbd2_journal_destroy_revoke_caches(); -record_cache_failure: + if (!jbd2_revoke_table_cache) { + pr_emerg("JBD2: failed to create revoke_table cache\n"); return -ENOMEM; + } + return 0; } static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 914e725c82c4..e20a6703531f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -42,9 +42,11 @@ int __init jbd2_journal_init_transaction_cache(void) 0, SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, NULL); - if (transaction_cache) - return 0; - return -ENOMEM; + if (!transaction_cache) { + pr_emerg("JBD2: failed to create transaction cache\n"); + return -ENOMEM; + } + return 0; } void jbd2_journal_destroy_transaction_cache(void) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b708e5169d1d..583b82b5a1e9 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1317,7 +1317,7 @@ extern void __wait_on_journal (journal_t *); /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); -extern int jbd2_journal_init_transaction_cache(void); +extern int __init jbd2_journal_init_transaction_cache(void); extern void jbd2_journal_free_transaction(transaction_t *); /* @@ -1445,8 +1445,10 @@ static inline void jbd2_free_inode(struct jbd2_inode *jinode) /* Primary revoke support */ #define JOURNAL_REVOKE_DEFAULT_HASH 256 extern int jbd2_journal_init_revoke(journal_t *, int); -extern void jbd2_journal_destroy_revoke_caches(void); -extern int jbd2_journal_init_revoke_caches(void); +extern void jbd2_journal_destroy_revoke_record_cache(void); +extern void jbd2_journal_destroy_revoke_table_cache(void); +extern int __init jbd2_journal_init_revoke_record_cache(void); +extern int __init jbd2_journal_init_revoke_table_cache(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); From 3b5ea2df6cf6b901cea1e6ff050bf5575717a3fd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 2 Apr 2019 08:19:15 -0700 Subject: [PATCH 141/152] KVM: x86: Skip EFER vs. guest CPUID checks for host-initiated writes commit 11988499e62b310f3bf6f6d0a807a06d3f9ccc96 upstream. KVM allows userspace to violate consistency checks related to the guest's CPUID model to some degree. Generally speaking, userspace has carte blanche when it comes to guest state so long as jamming invalid state won't negatively affect the host. Currently this is seems to be a non-issue as most of the interesting EFER checks are missing, e.g. NX and LME, but those will be added shortly. Proactively exempt userspace from the CPUID checks so as not to break userspace. Note, the efer_reserved_bits check still applies to userspace writes as that mask reflects the host's capabilities, e.g. KVM shouldn't allow a guest to run with NX=1 if it has been disabled in the host. Fixes: d80174745ba39 ("KVM: SVM: Only allow setting of EFER_SVME when CPUID SVM is set") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f3337adaf9b3..d0eb37c069b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1162,31 +1162,42 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return 0; } +static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) + return false; + + if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + return false; + + return true; + +} bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) return false; - if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) - return false; - - if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) - return false; - - return true; + return __kvm_valid_efer(vcpu, efer); } EXPORT_SYMBOL_GPL(kvm_valid_efer); -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 old_efer = vcpu->arch.efer; + u64 efer = msr_info->data; - if (!kvm_valid_efer(vcpu, efer)) - return 1; + if (efer & efer_reserved_bits) + return false; - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + if (!msr_info->host_initiated) { + if (!__kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) && + (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + } efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; @@ -2356,7 +2367,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.arch_capabilities = data; break; case MSR_EFER: - return set_efer(vcpu, data); + return set_efer(vcpu, msr_info); case MSR_K7_HWCR: data &= ~(u64)0x40; /* ignore flush filter disable */ data &= ~(u64)0x100; /* ignore ignne emulation enable */ From 38f114887ca48048a0da736607166a62d5813342 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Apr 2019 13:32:44 -0700 Subject: [PATCH 142/152] KVM: lapic: Busy wait for timer to expire when using hv_timer commit ee66e453db13d4837a0dcf9d43efa7a88603161b upstream. ...now that VMX's preemption timer, i.e. the hv_timer, also adjusts its programmed time based on lapic_timer_advance_ns. Without the delay, a guest can see a timer interrupt arrive before the requested time when KVM is using the hv_timer to emulate the guest's interrupt. Fixes: c5ce8235cffa0 ("KVM: VMX: Optimize tscdeadline timer latency") Cc: Cc: Wanpeng Li Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d2f5aa220355..cba414db14cb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1449,7 +1449,7 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (swait_active(q)) swake_up_one(q); - if (apic_lvtt_tscdeadline(apic)) + if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; } From 98bdd33883db43b1612ca0dfc3563313497935c5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 12 May 2019 11:13:48 +0900 Subject: [PATCH 143/152] kbuild: turn auto.conf.cmd into a mandatory include file commit d2f8ae0e4c5c754f1b2a7b8388d19a1a977e698a upstream. syncconfig is responsible for keeping auto.conf up-to-date, so if it fails for any reason, the build must be terminated immediately. However, since commit 9390dff66a52 ("kbuild: invoke syncconfig if include/config/auto.conf.cmd is missing"), Kbuild continues running even after syncconfig fails. You can confirm this by intentionally making syncconfig error out: # diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c # index 08ba146..307b9de 100644 # --- a/scripts/kconfig/confdata.c # +++ b/scripts/kconfig/confdata.c # @@ -1023,6 +1023,9 @@ int conf_write_autoconf(int overwrite) # FILE *out, *tristate, *out_h; # int i; # # + if (overwrite) # + return 1; # + # if (!overwrite && is_present(autoconf_name)) # return 0; Then, syncconfig fails, but Make would not stop: $ make -s mrproper allyesconfig defconfig $ make scripts/kconfig/conf --syncconfig Kconfig *** Error during sync of the configuration. make[2]: *** [scripts/kconfig/Makefile;69: syncconfig] Error 1 make[1]: *** [Makefile;557: syncconfig] Error 2 make: *** [include/config/auto.conf.cmd] Deleting file 'include/config/tristate.conf' make: Failed to remake makefile 'include/config/auto.conf'. SYSTBL arch/x86/include/generated/asm/syscalls_32.h SYSHDR arch/x86/include/generated/asm/unistd_32_ia32.h SYSHDR arch/x86/include/generated/asm/unistd_64_x32.h SYSTBL arch/x86/include/generated/asm/syscalls_64.h [ continue running ... ] The reason is in the behavior of a pattern rule with multi-targets. %/auto.conf %/auto.conf.cmd %/tristate.conf: $(KCONFIG_CONFIG) $(Q)$(MAKE) -f $(srctree)/Makefile syncconfig GNU Make knows this rule is responsible for making all the three files simultaneously. As far as examined, auto.conf.cmd is the target in question when this rule is invoked. It is probably because auto.conf.cmd is included below the inclusion of auto.conf. The inclusion of auto.conf is mandatory, while that of auto.conf.cmd is optional. GNU Make does not care about the failure in the process of updating optional include files. I filed this issue (https://savannah.gnu.org/bugs/?56301) in case this behavior could be improved somehow in future releases of GNU Make. Anyway, it is quite easy to fix our Makefile. Given that auto.conf is already a mandatory include file, there is no reason to stick auto.conf.cmd optional. Make it mandatory as well. Cc: linux-stable # 5.0+ Fixes: 9390dff66a52 ("kbuild: invoke syncconfig if include/config/auto.conf.cmd is missing") Signed-off-by: Masahiro Yamada [commented out diff above to keep patch happy - gregkh] Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dd11f5a83d2f..d87c833ba9af 100644 --- a/Makefile +++ b/Makefile @@ -623,7 +623,7 @@ ifeq ($(may-sync-config),1) # Read in dependencies to all Kconfig* files, make sure to run syncconfig if # changes are detected. This should be included after arch/$(SRCARCH)/Makefile # because some architectures define CROSS_COMPILE there. --include include/config/auto.conf.cmd +include include/config/auto.conf.cmd # To avoid any implicit rule to kick in, define an empty command $(KCONFIG_CONFIG): ; From 756eda9bc8b7bcd58b0f6a1333dc46fc9c22397a Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 23 Apr 2019 15:04:15 +0200 Subject: [PATCH 144/152] xen/pvh: set xen_domain_type to HVM in xen_pvh_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c9f804d64bb93c8dbf957df1d7e9de11380e522d upstream. Or else xen_domain() returns false despite xen_pvh being set. Signed-off-by: Roger Pau Monné Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten_pvh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index f7f77023288a..dab07827d25e 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void) } xen_pvh = 1; + xen_domain_type = XEN_HVM_DOMAIN; xen_start_flags = pvh_start_info.flags; msr = cpuid_ebx(xen_cpuid_base() + 2); From 866f011181ffb9f4da5044dda316bbac26c78819 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Apr 2019 21:51:21 -0700 Subject: [PATCH 145/152] libnvdimm/namespace: Fix label tracking error commit c4703ce11c23423d4b46e3d59aef7979814fd608 upstream. Users have reported intermittent occurrences of DIMM initialization failures due to duplicate allocations of address capacity detected in the labels, or errors of the form below, both have the same root cause. nd namespace1.4: failed to track label: 0 WARNING: CPU: 17 PID: 1381 at drivers/nvdimm/label.c:863 RIP: 0010:__pmem_label_update+0x56c/0x590 [libnvdimm] Call Trace: ? nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm] nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm] uuid_store+0x17e/0x190 [libnvdimm] kernfs_fop_write+0xf0/0x1a0 vfs_write+0xb7/0x1b0 ksys_write+0x57/0xd0 do_syscall_64+0x60/0x210 Unfortunately those reports were typically with a busy parallel namespace creation / destruction loop making it difficult to see the components of the bug. However, Jane provided a simple reproducer using the work-in-progress sub-section implementation. When ndctl is reconfiguring a namespace it may take an existing defunct / disabled namespace and reconfigure it with a new uuid and other parameters. Critically namespace_update_uuid() takes existing address resources and renames them for the new namespace to use / reconfigure as it sees fit. The bug is that this rename only happens in the resource tracking tree. Existing labels with the old uuid are not reaped leading to a scenario where multiple active labels reference the same span of address range. Teach namespace_update_uuid() to flag any references to the old uuid for reaping at the next label update attempt. Cc: Fixes: bf9bccc14c05 ("libnvdimm: pmem label sets and namespace instantiation") Link: https://github.com/pmem/ndctl/issues/91 Reported-by: Jane Chu Reported-by: Jeff Moyer Reported-by: Erwin Tsaur Cc: Johannes Thumshirn Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/label.c | 29 ++++++++++++++++------------- drivers/nvdimm/namespace_devs.c | 15 +++++++++++++++ drivers/nvdimm/nd.h | 4 ++++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 1eeb7be6aa34..452ad379ed70 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -623,6 +623,17 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, return &guid_null; } +static void reap_victim(struct nd_mapping *nd_mapping, + struct nd_label_ent *victim) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + u32 slot = to_slot(ndd, victim->label); + + dev_dbg(ndd->dev, "free: %d\n", slot); + nd_label_free_slot(ndd, slot); + victim->label = NULL; +} + static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, int pos, unsigned long flags) @@ -630,9 +641,9 @@ static int __pmem_label_update(struct nd_region *nd_region, struct nd_namespace_common *ndns = &nspm->nsio.common; struct nd_interleave_set *nd_set = nd_region->nd_set; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - struct nd_label_ent *label_ent, *victim = NULL; struct nd_namespace_label *nd_label; struct nd_namespace_index *nsindex; + struct nd_label_ent *label_ent; struct nd_label_id label_id; struct resource *res; unsigned long *free; @@ -701,18 +712,10 @@ static int __pmem_label_update(struct nd_region *nd_region, list_for_each_entry(label_ent, &nd_mapping->labels, list) { if (!label_ent->label) continue; - if (memcmp(nspm->uuid, label_ent->label->uuid, - NSLABEL_UUID_LEN) != 0) - continue; - victim = label_ent; - list_move_tail(&victim->list, &nd_mapping->labels); - break; - } - if (victim) { - dev_dbg(ndd->dev, "free: %d\n", slot); - slot = to_slot(ndd, victim->label); - nd_label_free_slot(ndd, slot); - victim->label = NULL; + if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags) + || memcmp(nspm->uuid, label_ent->label->uuid, + NSLABEL_UUID_LEN) == 0) + reap_victim(nd_mapping, label_ent); } /* update index */ diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 73a444c41cde..5dc3b407d7bd 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1248,12 +1248,27 @@ static int namespace_update_uuid(struct nd_region *nd_region, for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_label_ent *label_ent; struct resource *res; for_each_dpa_resource(ndd, res) if (strcmp(res->name, old_label_id.id) == 0) sprintf((void *) res->name, "%s", new_label_id.id); + + mutex_lock(&nd_mapping->lock); + list_for_each_entry(label_ent, &nd_mapping->labels, list) { + struct nd_namespace_label *nd_label = label_ent->label; + struct nd_label_id label_id; + + if (!nd_label) + continue; + nd_label_gen_id(&label_id, nd_label->uuid, + __le32_to_cpu(nd_label->flags)); + if (strcmp(old_label_id.id, label_id.id) == 0) + set_bit(ND_LABEL_REAP, &label_ent->flags); + } + mutex_unlock(&nd_mapping->lock); } kfree(*old_uuid); out: diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 98317e7ce5b5..01e194a5824e 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -113,8 +113,12 @@ struct nd_percpu_lane { spinlock_t lock; }; +enum nd_label_flags { + ND_LABEL_REAP, +}; struct nd_label_ent { struct list_head list; + unsigned long flags; struct nd_namespace_label *label; }; From 627bb2d93b4d48f89e2087b3149c0a8444e5a1d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Feb 2019 10:42:39 -0800 Subject: [PATCH 146/152] iov_iter: optimize page_copy_sane() commit 6daef95b8c914866a46247232a048447fff97279 upstream. Avoid cache line miss dereferencing struct page if we can. page_copy_sane() mostly deals with order-0 pages. Extra cache line miss is visible on TCP recvmsg() calls dealing with GRO packets (typically 45 page frags are attached to one skb). Bringing the 45 struct pages into cpu cache while copying the data is not free, since the freeing of the skb (and associated page frags put_page()) can happen after cache lines have been evicted. Signed-off-by: Eric Dumazet Cc: Al Viro Signed-off-by: Al Viro Cc: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- lib/iov_iter.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8be175df3075..acd7b97c16f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -817,8 +817,21 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache); static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) { - struct page *head = compound_head(page); - size_t v = n + offset + page_address(page) - page_address(head); + struct page *head; + size_t v = n + offset; + + /* + * The general case needs to access the page order in order + * to compute the page size. + * However, we mostly deal with order-0 pages and thus can + * avoid a possible cache line miss for requests that fit all + * page orders. + */ + if (n <= v && v <= PAGE_SIZE) + return true; + + head = compound_head(page); + v += (page - head) << PAGE_SHIFT; if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head)))) return true; From f4bf101be366d1075767f21a10b1bf587ce2a52f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 18 Oct 2018 11:17:42 -0700 Subject: [PATCH 147/152] pstore: Centralize init/exit routines commit cb095afd44768bf495894b9ad063bd078e4bb201 upstream. In preparation for having additional actions during init/exit, this moves the init/exit into platform.c, centralizing the logic to make call outs to the fs init/exit. Signed-off-by: Kees Cook Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/pstore/inode.c | 11 ++--------- fs/pstore/internal.h | 5 +++-- fs/pstore/platform.c | 23 +++++++++++++++++++++++ 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 5fcb845b9fec..8cf2218b46a7 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -482,12 +482,10 @@ static struct file_system_type pstore_fs_type = { .kill_sb = pstore_kill_sb, }; -static int __init init_pstore_fs(void) +int __init pstore_init_fs(void) { int err; - pstore_choose_compression(); - /* Create a convenient mount point for people to access pstore */ err = sysfs_create_mount_point(fs_kobj, "pstore"); if (err) @@ -500,14 +498,9 @@ static int __init init_pstore_fs(void) out: return err; } -module_init(init_pstore_fs) -static void __exit exit_pstore_fs(void) +void __exit pstore_exit_fs(void) { unregister_filesystem(&pstore_fs_type); sysfs_remove_mount_point(fs_kobj, "pstore"); } -module_exit(exit_pstore_fs) - -MODULE_AUTHOR("Tony Luck "); -MODULE_LICENSE("GPL"); diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index fb767e28aeb2..7062ea4bc57c 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -37,7 +37,8 @@ extern bool pstore_is_mounted(void); extern void pstore_record_init(struct pstore_record *record, struct pstore_info *psi); -/* Called during module_init() */ -extern void __init pstore_choose_compression(void); +/* Called during pstore init/exit. */ +int __init pstore_init_fs(void); +void __exit pstore_exit_fs(void); #endif diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 15e99d5a681d..d61e26812af6 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -780,8 +780,31 @@ void __init pstore_choose_compression(void) } } +static int __init pstore_init(void) +{ + int ret; + + pstore_choose_compression(); + + ret = pstore_init_fs(); + if (ret) + return ret; + + return 0; +} +module_init(pstore_init) + +static void __exit pstore_exit(void) +{ + pstore_exit_fs(); +} +module_exit(pstore_exit) + module_param(compress, charp, 0444); MODULE_PARM_DESC(compress, "Pstore compression to use"); module_param(backend, charp, 0444); MODULE_PARM_DESC(backend, "Pstore backend to use"); + +MODULE_AUTHOR("Tony Luck "); +MODULE_LICENSE("GPL"); From fea8b84765a12c5c2aef32602381d9ffcc6d3507 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Wed, 17 Oct 2018 03:13:55 -0700 Subject: [PATCH 148/152] pstore: Allocate compression during late_initcall() commit 416031653eb55f844e3547fb8f8576399a800da0 upstream. ramoops's call of pstore_register() was recently moved to run during late_initcall() because the crypto backend may not have been ready during postcore_initcall(). This meant early-boot crash dumps were not getting caught by pstore any more. Instead, lets allow calls to pstore_register() earlier, and once crypto is ready we can initialize the compression. Reported-by: Sai Prakash Ranjan Signed-off-by: Joel Fernandes (Google) Tested-by: Sai Prakash Ranjan Fixes: cb3bee0369bc ("pstore: Use crypto compress API") [kees: trivial rebase] Signed-off-by: Kees Cook Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/pstore/platform.c | 10 +++++++++- fs/pstore/ram.c | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index d61e26812af6..578f178a695f 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -786,13 +786,21 @@ static int __init pstore_init(void) pstore_choose_compression(); + /* + * Check if any pstore backends registered earlier but did not + * initialize compression because crypto was not ready. If so, + * initialize compression now. + */ + if (psinfo && !tfm) + allocate_buf_for_compression(); + ret = pstore_init_fs(); if (ret) return ret; return 0; } -module_init(pstore_init) +late_initcall(pstore_init); static void __exit pstore_exit(void) { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index eb67bb7f04de..44ed6b193d2e 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -956,7 +956,7 @@ static int __init ramoops_init(void) return ret; } -late_initcall(ramoops_init); +postcore_initcall(ramoops_init); static void __exit ramoops_exit(void) { From 953e826e8d0f24904241e7cd3633a54af511b42a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 17 Oct 2018 14:00:12 -0700 Subject: [PATCH 149/152] pstore: Refactor compression initialization commit 95047b0519c17a28e09df5f38750f5354e3db4c4 upstream. This refactors compression initialization slightly to better handle getting potentially called twice (via early pstore_register() calls and later pstore_init()) and improves the comments and reporting to be more verbose. Signed-off-by: Kees Cook Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- fs/pstore/platform.c | 52 +++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 578f178a695f..b821054ca3ed 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -274,36 +274,56 @@ static int pstore_decompress(void *in, void *out, static void allocate_buf_for_compression(void) { + struct crypto_comp *ctx; + int size; + char *buf; + + /* Skip if not built-in or compression backend not selected yet. */ if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend) return; + /* Skip if no pstore backend yet or compression init already done. */ + if (!psinfo || tfm) + return; + if (!crypto_has_comp(zbackend->name, 0, 0)) { - pr_err("No %s compression\n", zbackend->name); + pr_err("Unknown compression: %s\n", zbackend->name); return; } - big_oops_buf_sz = zbackend->zbufsize(psinfo->bufsize); - if (big_oops_buf_sz <= 0) - return; - - big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); - if (!big_oops_buf) { - pr_err("allocate compression buffer error!\n"); + size = zbackend->zbufsize(psinfo->bufsize); + if (size <= 0) { + pr_err("Invalid compression size for %s: %d\n", + zbackend->name, size); return; } - tfm = crypto_alloc_comp(zbackend->name, 0, 0); - if (IS_ERR_OR_NULL(tfm)) { - kfree(big_oops_buf); - big_oops_buf = NULL; - pr_err("crypto_alloc_comp() failed!\n"); + buf = kmalloc(size, GFP_KERNEL); + if (!buf) { + pr_err("Failed %d byte compression buffer allocation for: %s\n", + size, zbackend->name); return; } + + ctx = crypto_alloc_comp(zbackend->name, 0, 0); + if (IS_ERR_OR_NULL(ctx)) { + kfree(buf); + pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name, + PTR_ERR(ctx)); + return; + } + + /* A non-NULL big_oops_buf indicates compression is available. */ + tfm = ctx; + big_oops_buf_sz = size; + big_oops_buf = buf; + + pr_info("Using compression: %s\n", zbackend->name); } static void free_buf_for_compression(void) { - if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && !IS_ERR_OR_NULL(tfm)) + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) crypto_free_comp(tfm); kfree(big_oops_buf); big_oops_buf = NULL; @@ -774,7 +794,6 @@ void __init pstore_choose_compression(void) for (step = zbackends; step->name; step++) { if (!strcmp(compress, step->name)) { zbackend = step; - pr_info("using %s compression\n", zbackend->name); return; } } @@ -791,8 +810,7 @@ static int __init pstore_init(void) * initialize compression because crypto was not ready. If so, * initialize compression now. */ - if (psinfo && !tfm) - allocate_buf_for_compression(); + allocate_buf_for_compression(); ret = pstore_init_fs(); if (ret) From 6172ae55a187378b091282c7a5b4895635e63371 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Thu, 21 Feb 2019 11:29:10 -0500 Subject: [PATCH 150/152] ext4: fix compile error when using BUFFER_TRACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ddccb6dbe780d68133191477571cb7c69e17bb8c upstream. Fix compile error below when using BUFFER_TRACE. fs/ext4/inode.c: In function ‘ext4_expand_extra_isize’: fs/ext4/inode.c:5979:19: error: request for member ‘bh’ in something not a structure or union BUFFER_TRACE(iloc.bh, "get_write_access"); Fixes: c03b45b853f58 ("ext4, project: expand inode extra size if possible") Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2c43c5b92229..18884f39610f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5971,7 +5971,7 @@ int ext4_expand_extra_isize(struct inode *inode, ext4_write_lock_xattr(inode, &no_expand); - BUFFER_TRACE(iloc.bh, "get_write_access"); + BUFFER_TRACE(iloc->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, iloc->bh); if (error) { brelse(iloc->bh); From e8816d3bc5956a3d4d0f00651138a8a3b36a07bf Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Thu, 14 Feb 2019 17:52:18 -0500 Subject: [PATCH 151/152] ext4: don't update s_rev_level if not required commit c9e716eb9b3455a83ed7c5f5a81256a3da779a95 upstream. Don't update the superblock s_rev_level during mount if it isn't actually necessary, only if superblock features are being set by the kernel. This was originally added for ext3 since it always set the INCOMPAT_RECOVER and HAS_JOURNAL features during mount, but this is not needed since no journal mode was added to ext4. That will allow Geert to mount his 20-year-old ext2 rev 0.0 m68k filesystem, as a testament of the backward compatibility of ext4. Fixes: 0390131ba84f ("ext4: Allow ext4 to run without a journal") Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 6 +++++- fs/ext4/inode.c | 1 - fs/ext4/super.c | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2ddf7833350d..1ee51d3a978a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1670,6 +1670,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 +extern void ext4_update_dynamic_rev(struct super_block *sb); + #define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline bool ext4_has_feature_##name(struct super_block *sb) \ { \ @@ -1678,6 +1680,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \ } \ static inline void ext4_set_feature_##name(struct super_block *sb) \ { \ + ext4_update_dynamic_rev(sb); \ EXT4_SB(sb)->s_es->s_feature_compat |= \ cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ } \ @@ -1695,6 +1698,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \ } \ static inline void ext4_set_feature_##name(struct super_block *sb) \ { \ + ext4_update_dynamic_rev(sb); \ EXT4_SB(sb)->s_es->s_feature_ro_compat |= \ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ } \ @@ -1712,6 +1716,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \ } \ static inline void ext4_set_feature_##name(struct super_block *sb) \ { \ + ext4_update_dynamic_rev(sb); \ EXT4_SB(sb)->s_es->s_feature_incompat |= \ cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ } \ @@ -2679,7 +2684,6 @@ do { \ #endif -extern void ext4_update_dynamic_rev(struct super_block *sb); extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, __u32 compat); extern int ext4_update_rocompat_feature(handle_t *handle, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 18884f39610f..67e8aa35197e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5320,7 +5320,6 @@ static int ext4_do_update_inode(handle_t *handle, err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) goto out_brelse; - ext4_update_dynamic_rev(sb); ext4_set_feature_large_file(sb); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 07d73fb22b60..a270391228af 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2259,7 +2259,6 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); ext4_update_tstamp(es, s_mtime); - ext4_update_dynamic_rev(sb); if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); From c3a0725977484ea2d7f17746d7e168d2b19f99a2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 May 2019 07:37:46 +0200 Subject: [PATCH 152/152] Linux 4.19.45 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d87c833ba9af..b21dd3866b63 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 44 +SUBLEVEL = 45 EXTRAVERSION = NAME = "People's Front"