From d83d42d071b6c58e71e54b8778ca5b279be98f7d Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Oct 2021 14:57:41 -0600 Subject: module: fix validate_section_offset() overflow bug on 64-bit validate_section_offset() uses unsigned long local variable to add/store shdr->sh_offset and shdr->sh_size on all platforms. unsigned long is too short when sh_offset is Elf64_Off which would be the case on 64bit ELF headers. Without this fix applied we were shorting the design of modules to have section headers placed within the 32-bit boundary (4 GiB) instead of 64-bits when on 64-bit architectures (which allows for up to 16,777,216 TiB). In practice this just meant we were limiting modules sections to below 4 GiB even on 64-bit systems. This then should not really affect any real-world use case as modules these days obviously should likely never exceed 1 GiB in size overall. A specially crafted invalid module might succeed to skip validation in validate_section_offset() due to this mistake, but in such case no impact is observed through code inspection given the correct data types are used for the copy of the module when needed on move_module() when the section type is not SHT_NOBITS (which indicates no the section occupies no space on the file). Fix the overflow problem using the right size local variable when CONFIG_64BIT is defined. Signed-off-by: Shuah Khan [mcgrof: expand commit log with possible impact if not applied] Signed-off-by: Luis Chamberlain --- kernel/module.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 5c26a76e800b..c7ad73807446 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2942,7 +2942,11 @@ static int module_sig_check(struct load_info *info, int flags) static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) { +#if defined(CONFIG_64BIT) + unsigned long long secend; +#else unsigned long secend; +#endif /* * Check for both overflow and offset/size being -- cgit v1.2.3-71-gd317 From 7fd982f394c42f25a73fe9dfbf1e6b11fa26b40a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Oct 2021 14:57:40 -0600 Subject: module: change to print useful messages from elf_validity_check() elf_validity_check() checks ELF headers for errors and ELF Spec. compliance and if any of them fail it returns -ENOEXEC from all of these error paths. Almost all of them don't print any messages. When elf_validity_check() returns an error, load_module() prints an error message without error code. It is hard to determine why the module ELF structure is invalid, even if load_module() prints the error code which is -ENOEXEC in all of these cases. Change to print useful error messages from elf_validity_check() to clearly say what went wrong and why the ELF validity checks failed. Remove the load_module() error message which is no longer needed. This patch includes changes to fix build warns on 32-bit platforms: warning: format '%llu' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Off' {aka 'unsigned int'} Reported-by: kernel test robot Signed-off-by: Shuah Khan Signed-off-by: Luis Chamberlain --- kernel/module.c | 75 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 21 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c7ad73807446..84a9141a5e15 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2971,14 +2971,29 @@ static int elf_validity_check(struct load_info *info) Elf_Shdr *shdr, *strhdr; int err; - if (info->len < sizeof(*(info->hdr))) - return -ENOEXEC; + if (info->len < sizeof(*(info->hdr))) { + pr_err("Invalid ELF header len %lu\n", info->len); + goto no_exec; + } - if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 - || info->hdr->e_type != ET_REL - || !elf_check_arch(info->hdr) - || info->hdr->e_shentsize != sizeof(Elf_Shdr)) - return -ENOEXEC; + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { + pr_err("Invalid ELF header magic: != %s\n", ELFMAG); + goto no_exec; + } + if (info->hdr->e_type != ET_REL) { + pr_err("Invalid ELF header type: %u != %u\n", + info->hdr->e_type, ET_REL); + goto no_exec; + } + if (!elf_check_arch(info->hdr)) { + pr_err("Invalid architecture in ELF header: %u\n", + info->hdr->e_machine); + goto no_exec; + } + if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { + pr_err("Invalid ELF section header size\n"); + goto no_exec; + } /* * e_shnum is 16 bits, and sizeof(Elf_Shdr) is @@ -2987,8 +3002,10 @@ static int elf_validity_check(struct load_info *info) */ if (info->hdr->e_shoff >= info->len || (info->hdr->e_shnum * sizeof(Elf_Shdr) > - info->len - info->hdr->e_shoff)) - return -ENOEXEC; + info->len - info->hdr->e_shoff)) { + pr_err("Invalid ELF section header overflow\n"); + goto no_exec; + } info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; @@ -2996,13 +3013,19 @@ static int elf_validity_check(struct load_info *info) * Verify if the section name table index is valid. */ if (info->hdr->e_shstrndx == SHN_UNDEF - || info->hdr->e_shstrndx >= info->hdr->e_shnum) - return -ENOEXEC; + || info->hdr->e_shstrndx >= info->hdr->e_shnum) { + pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", + info->hdr->e_shstrndx, info->hdr->e_shstrndx, + info->hdr->e_shnum); + goto no_exec; + } strhdr = &info->sechdrs[info->hdr->e_shstrndx]; err = validate_section_offset(info, strhdr); - if (err < 0) + if (err < 0) { + pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type); return err; + } /* * The section name table must be NUL-terminated, as required @@ -3010,8 +3033,10 @@ static int elf_validity_check(struct load_info *info) * strings in the section safe. */ info->secstrings = (void *)info->hdr + strhdr->sh_offset; - if (info->secstrings[strhdr->sh_size - 1] != '\0') - return -ENOEXEC; + if (info->secstrings[strhdr->sh_size - 1] != '\0') { + pr_err("ELF Spec violation: section name table isn't null terminated\n"); + goto no_exec; + } /* * The code assumes that section 0 has a length of zero and @@ -3019,8 +3044,11 @@ static int elf_validity_check(struct load_info *info) */ if (info->sechdrs[0].sh_type != SHT_NULL || info->sechdrs[0].sh_size != 0 - || info->sechdrs[0].sh_addr != 0) - return -ENOEXEC; + || info->sechdrs[0].sh_addr != 0) { + pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", + info->sechdrs[0].sh_type); + goto no_exec; + } for (i = 1; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; @@ -3030,8 +3058,12 @@ static int elf_validity_check(struct load_info *info) continue; case SHT_SYMTAB: if (shdr->sh_link == SHN_UNDEF - || shdr->sh_link >= info->hdr->e_shnum) - return -ENOEXEC; + || shdr->sh_link >= info->hdr->e_shnum) { + pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", + shdr->sh_link, shdr->sh_link, + info->hdr->e_shnum); + goto no_exec; + } fallthrough; default: err = validate_section_offset(info, shdr); @@ -3053,6 +3085,9 @@ static int elf_validity_check(struct load_info *info) } return 0; + +no_exec: + return -ENOEXEC; } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) @@ -3944,10 +3979,8 @@ static int load_module(struct load_info *info, const char __user *uargs, * sections. */ err = elf_validity_check(info); - if (err) { - pr_err("Module has invalid ELF structures\n"); + if (err) goto free_copy; - } /* * Everything checks out, so set up the section info -- cgit v1.2.3-71-gd317 From ca3574bd653aba234a4b31955f2778947403be16 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Dec 2021 11:00:19 -0600 Subject: exit: Rename module_put_and_exit to module_put_and_kthread_exit Update module_put_and_exit to call kthread_exit instead of do_exit. Change the name to reflect this change in functionality. All of the users of module_put_and_exit are causing the current kthread to exit so this change makes it clear what is happening. There is no functional change. Signed-off-by: "Eric W. Biederman" --- crypto/algboss.c | 4 ++-- fs/cifs/connect.c | 2 +- fs/nfs/callback.c | 4 ++-- fs/nfs/nfs4state.c | 2 +- fs/nfsd/nfssvc.c | 2 +- include/linux/module.h | 6 +++--- kernel/module.c | 6 +++--- net/bluetooth/bnep/core.c | 2 +- net/bluetooth/cmtp/core.c | 2 +- net/bluetooth/hidp/core.c | 2 +- tools/objtool/check.c | 2 +- 11 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel/module.c') diff --git a/crypto/algboss.c b/crypto/algboss.c index 1814d2c5188a..eb5fe84efb83 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -67,7 +67,7 @@ out: complete_all(¶m->larval->completion); crypto_alg_put(¶m->larval->alg); kfree(param); - module_put_and_exit(0); + module_put_and_kthread_exit(0); } static int cryptomgr_schedule_probe(struct crypto_larval *larval) @@ -190,7 +190,7 @@ skiptest: crypto_alg_tested(param->driver, err); kfree(param); - module_put_and_exit(0); + module_put_and_kthread_exit(0); } static int cryptomgr_schedule_test(struct crypto_alg *alg) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 82577a7a5bb1..39fbe9acbf51 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1139,7 +1139,7 @@ next_pdu: } memalloc_noreclaim_restore(noreclaim_flag); - module_put_and_exit(0); + module_put_and_kthread_exit(0); } /* diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 86d856de1389..3c86a559a321 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -93,7 +93,7 @@ nfs4_callback_svc(void *vrqstp) svc_process(rqstp); } svc_exit_thread(rqstp); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } @@ -137,7 +137,7 @@ nfs41_callback_svc(void *vrqstp) } } svc_exit_thread(rqstp); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ecc4594299d6..ea41af731978 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2689,6 +2689,6 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); nfs4_state_manager(clp); nfs_put_client(clp); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80431921e5d7..5ce9f14318c4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -986,7 +986,7 @@ out: /* Release module */ mutex_unlock(&nfsd_mutex); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/include/linux/module.h b/include/linux/module.h index c9f1200b2312..f03be97e9ec1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -595,9 +595,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); -extern void __noreturn __module_put_and_exit(struct module *mod, +extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); -#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) +#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); @@ -790,7 +790,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb) return 0; } -#define module_put_and_exit(code) do_exit(code) +#define module_put_and_kthread_exit(code) kthread_exit(code) static inline void print_modules(void) { diff --git a/kernel/module.c b/kernel/module.c index 84a9141a5e15..a3aa00bf270d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -337,12 +337,12 @@ static inline void add_taint_module(struct module *mod, unsigned flag, * A thread that wants to hold a reference to a module only while it * is running can call this to safely exit. nfsd and lockd use this. */ -void __noreturn __module_put_and_exit(struct module *mod, long code) +void __noreturn __module_put_and_kthread_exit(struct module *mod, long code) { module_put(mod); - do_exit(code); + kthread_exit(code); } -EXPORT_SYMBOL(__module_put_and_exit); +EXPORT_SYMBOL(__module_put_and_kthread_exit); /* Find a module section: 0 means not found. */ static unsigned int find_sec(const struct load_info *info, const char *name) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index c9add7753b9f..40baa6b7321a 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -535,7 +535,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0a2d78e811cf..9bfded6b74b3 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -323,7 +323,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 80848dfc01db..5940744a8cd8 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg) l2cap_unregister_user(session->conn, &session->user); hidp_session_put(session); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 90108fe5610d..120e9598c11a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -170,7 +170,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "do_task_dead", "kthread_exit", "make_task_dead", - "__module_put_and_exit", + "__module_put_and_kthread_exit", "complete_and_exit", "__reiserfs_panic", "lbug_with_loc", -- cgit v1.2.3-71-gd317 From f5bdb34bf0c9314548f2d8e2360b703ff3610303 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 29 Dec 2021 13:56:47 -0800 Subject: livepatch: Avoid CPU hogging with cond_resched When initializing a 'struct klp_object' in klp_init_object_loaded(), and performing relocations in klp_resolve_symbols(), klp_find_object_symbol() is invoked to look up the address of a symbol in an already-loaded module (or vmlinux). This, in turn, calls kallsyms_on_each_symbol() or module_kallsyms_on_each_symbol() to find the address of the symbol that is being patched. It turns out that symbol lookups often take up the most CPU time when enabling and disabling a patch, and may hog the CPU and cause other tasks on that CPU's runqueue to starve -- even in paths where interrupts are enabled. For example, under certain workloads, enabling a KLP patch with many objects or functions may cause ksoftirqd to be starved, and thus for interrupts to be backlogged and delayed. This may end up causing TCP retransmits on the host where the KLP patch is being applied, and in general, may cause any interrupts serviced by softirqd to be delayed while the patch is being applied. So as to ensure that kallsyms_on_each_symbol() does not end up hogging the CPU, this patch adds a call to cond_resched() in kallsyms_on_each_symbol() and module_kallsyms_on_each_symbol(), which are invoked when doing a symbol lookup in vmlinux and a module respectively. Without this patch, if a live-patch is applied on a 36-core Intel host with heavy TCP traffic, a ~10x spike is observed in TCP retransmits while the patch is being applied. Additionally, collecting sched events with perf indicates that ksoftirqd is awakened ~1.3 seconds before it's eventually scheduled. With the patch, no increase in TCP retransmit events is observed, and ksoftirqd is scheduled shortly after it's awakened. Signed-off-by: David Vernet Acked-by: Miroslav Benes Acked-by: Song Liu Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20211229215646.830451-1-void@manifault.com --- kernel/kallsyms.c | 1 + kernel/module.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 0ba87982d017..2a9afe484aec 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -223,6 +223,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, ret = fn(data, namebuf, NULL, kallsyms_sym_address(i)); if (ret != 0) return ret; + cond_resched(); } return 0; } diff --git a/kernel/module.c b/kernel/module.c index 40ec9a030eec..c96160f7f3f5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4462,6 +4462,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, mod, kallsyms_symbol_value(sym)); if (ret != 0) goto out; + + cond_resched(); } } out: -- cgit v1.2.3-71-gd317 From 9dc3c3f691bca10d3aa94887eee33bf629840b23 Mon Sep 17 00:00:00 2001 From: Yu Chen Date: Mon, 22 Nov 2021 06:26:48 -0800 Subject: module: Remove outdated comment Since commit e513cc1c07e2 ("module: Remove stop_machine from module unloading") this comment is no longer correct. Remove it. Signed-off-by: Yu Chen Signed-off-by: Luis Chamberlain --- kernel/module.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 84a9141a5e15..320ec908045f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -958,7 +958,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, } } - /* Stop the machine so refcounts can't move and disable module. */ ret = try_stop_module(mod, flags, &forced); if (ret != 0) goto out; -- cgit v1.2.3-71-gd317 From b1ae6dc41eaaa98bb75671e0f3665bfda248c3e7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 5 Jan 2022 13:55:12 -0800 Subject: module: add in-kernel support for decompressing Current scheme of having userspace decompress kernel modules before loading them into the kernel runs afoul of LoadPin security policy, as it loses link between the source of kernel module on the disk and binary blob that is being loaded into the kernel. To solve this issue let's implement decompression in kernel, so that we can pass a file descriptor of compressed module file into finit_module() which will keep LoadPin happy. To let userspace know what compression/decompression scheme kernel supports it will create /sys/module/compression attribute. kmod can read this attribute and decide if it can pass compressed file to finit_module(). New MODULE_INIT_COMPRESSED_DATA flag indicates that the kernel should attempt to decompress the data read from file descriptor prior to trying load the module. To simplify things kernel will only implement single decompression method matching compression method selected when generating modules. This patch implements gzip and xz; more can be added later, Signed-off-by: Dmitry Torokhov Signed-off-by: Luis Chamberlain --- include/uapi/linux/module.h | 1 + init/Kconfig | 13 +++ kernel/Makefile | 1 + kernel/module-internal.h | 19 ++++ kernel/module.c | 35 ++++-- kernel/module_decompress.c | 271 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 329 insertions(+), 11 deletions(-) create mode 100644 kernel/module_decompress.c (limited to 'kernel/module.c') diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h index 50d98ec5e866..03a33ffffcba 100644 --- a/include/uapi/linux/module.h +++ b/include/uapi/linux/module.h @@ -5,5 +5,6 @@ /* Flags for sys_finit_module: */ #define MODULE_INIT_IGNORE_MODVERSIONS 1 #define MODULE_INIT_IGNORE_VERMAGIC 2 +#define MODULE_INIT_COMPRESSED_FILE 4 #endif /* _UAPI_LINUX_MODULE_H */ diff --git a/init/Kconfig b/init/Kconfig index f2ae41e6717f..faf3a4b5cc8f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2274,6 +2274,19 @@ config MODULE_COMPRESS_ZSTD endchoice +config MODULE_DECOMPRESS + bool "Support in-kernel module decompression" + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP + select XZ_DEC if MODULE_COMPRESS_XZ + help + + Support for decompressing kernel modules by the kernel itself + instead of relying on userspace to perform this task. Useful when + load pinning security policy is enabled. + + If unsure, say N. + config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS bool "Allow loading of modules with missing namespace imports" help diff --git a/kernel/Makefile b/kernel/Makefile index 186c49582f45..56f4ee97f328 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -67,6 +67,7 @@ obj-y += up.o endif obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_MODULE_DECOMPRESS) += module_decompress.o obj-$(CONFIG_MODULE_SIG) += module_signing.o obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o obj-$(CONFIG_KALLSYMS) += kallsyms.o diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 33783abc377b..8c381c99062f 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -22,6 +22,11 @@ struct load_info { bool sig_ok; #ifdef CONFIG_KALLSYMS unsigned long mod_kallsyms_init_off; +#endif +#ifdef CONFIG_MODULE_DECOMPRESS + struct page **pages; + unsigned int max_pages; + unsigned int used_pages; #endif struct { unsigned int sym, str, mod, vers, info, pcpu; @@ -29,3 +34,17 @@ struct load_info { }; extern int mod_verify_sig(const void *mod, struct load_info *info); + +#ifdef CONFIG_MODULE_DECOMPRESS +int module_decompress(struct load_info *info, const void *buf, size_t size); +void module_decompress_cleanup(struct load_info *info); +#else +static inline int module_decompress(struct load_info *info, + const void *buf, size_t size) +{ + return -EOPNOTSUPP; +} +static inline void module_decompress_cleanup(struct load_info *info) +{ +} +#endif diff --git a/kernel/module.c b/kernel/module.c index 320ec908045f..34fe2824eb56 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3173,9 +3173,12 @@ out: return err; } -static void free_copy(struct load_info *info) +static void free_copy(struct load_info *info, int flags) { - vfree(info->hdr); + if (flags & MODULE_INIT_COMPRESSED_FILE) + module_decompress_cleanup(info); + else + vfree(info->hdr); } static int rewrite_section_headers(struct load_info *info, int flags) @@ -4124,7 +4127,7 @@ static int load_module(struct load_info *info, const char __user *uargs, } /* Get rid of temporary copy. */ - free_copy(info); + free_copy(info, flags); /* Done! */ trace_module_load(mod); @@ -4173,7 +4176,7 @@ static int load_module(struct load_info *info, const char __user *uargs, module_deallocate(mod, info); free_copy: - free_copy(info); + free_copy(info, flags); return err; } @@ -4200,7 +4203,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { struct load_info info = { }; - void *hdr = NULL; + void *buf = NULL; + int len; int err; err = may_init_module(); @@ -4210,15 +4214,24 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS - |MODULE_INIT_IGNORE_VERMAGIC)) + |MODULE_INIT_IGNORE_VERMAGIC + |MODULE_INIT_COMPRESSED_FILE)) return -EINVAL; - err = kernel_read_file_from_fd(fd, 0, &hdr, INT_MAX, NULL, + len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL, READING_MODULE); - if (err < 0) - return err; - info.hdr = hdr; - info.len = err; + if (len < 0) + return len; + + if (flags & MODULE_INIT_COMPRESSED_FILE) { + err = module_decompress(&info, buf, len); + vfree(buf); /* compressed data is no longer needed */ + if (err) + return err; + } else { + info.hdr = buf; + info.len = len; + } return load_module(&info, uargs, flags); } diff --git a/kernel/module_decompress.c b/kernel/module_decompress.c new file mode 100644 index 000000000000..aeefd95a3337 --- /dev/null +++ b/kernel/module_decompress.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2021 Google LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "module-internal.h" + +static int module_extend_max_pages(struct load_info *info, unsigned int extent) +{ + struct page **new_pages; + + new_pages = kvmalloc_array(info->max_pages + extent, + sizeof(info->pages), GFP_KERNEL); + if (!new_pages) + return -ENOMEM; + + memcpy(new_pages, info->pages, info->max_pages * sizeof(info->pages)); + kvfree(info->pages); + info->pages = new_pages; + info->max_pages += extent; + + return 0; +} + +static struct page *module_get_next_page(struct load_info *info) +{ + struct page *page; + int error; + + if (info->max_pages == info->used_pages) { + error = module_extend_max_pages(info, info->used_pages); + if (error) + return ERR_PTR(error); + } + + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!page) + return ERR_PTR(-ENOMEM); + + info->pages[info->used_pages++] = page; + return page; +} + +#ifdef CONFIG_MODULE_COMPRESS_GZIP +#include +#define MODULE_COMPRESSION gzip +#define MODULE_DECOMPRESS_FN module_gzip_decompress + +/* + * Calculate length of the header which consists of signature, header + * flags, time stamp and operating system ID (10 bytes total), plus + * an optional filename. + */ +static size_t module_gzip_header_len(const u8 *buf, size_t size) +{ + const u8 signature[] = { 0x1f, 0x8b, 0x08 }; + size_t len = 10; + + if (size < len || memcmp(buf, signature, sizeof(signature))) + return 0; + + if (buf[3] & 0x08) { + do { + /* + * If we can't find the end of the file name we must + * be dealing with a corrupted file. + */ + if (len == size) + return 0; + } while (buf[len++] != '\0'); + } + + return len; +} + +static ssize_t module_gzip_decompress(struct load_info *info, + const void *buf, size_t size) +{ + struct z_stream_s s = { 0 }; + size_t new_size = 0; + size_t gzip_hdr_len; + ssize_t retval; + int rc; + + gzip_hdr_len = module_gzip_header_len(buf, size); + if (!gzip_hdr_len) { + pr_err("not a gzip compressed module\n"); + return -EINVAL; + } + + s.next_in = buf + gzip_hdr_len; + s.avail_in = size - gzip_hdr_len; + + s.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL); + if (!s.workspace) + return -ENOMEM; + + rc = zlib_inflateInit2(&s, -MAX_WBITS); + if (rc != Z_OK) { + pr_err("failed to initialize decompresser: %d\n", rc); + retval = -EINVAL; + goto out; + } + + do { + struct page *page = module_get_next_page(info); + if (!page) { + retval = -ENOMEM; + goto out_inflate_end; + } + + s.next_out = kmap(page); + s.avail_out = PAGE_SIZE; + rc = zlib_inflate(&s, 0); + kunmap(page); + + new_size += PAGE_SIZE - s.avail_out; + } while (rc == Z_OK); + + if (rc != Z_STREAM_END) { + pr_err("decompression failed with status %d\n", rc); + retval = -EINVAL; + goto out_inflate_end; + } + + retval = new_size; + +out_inflate_end: + zlib_inflateEnd(&s); +out: + kfree(s.workspace); + return retval; +} +#elif CONFIG_MODULE_COMPRESS_XZ +#include +#define MODULE_COMPRESSION xz +#define MODULE_DECOMPRESS_FN module_xz_decompress + +static ssize_t module_xz_decompress(struct load_info *info, + const void *buf, size_t size) +{ + static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 }; + struct xz_dec *xz_dec; + struct xz_buf xz_buf; + enum xz_ret xz_ret; + size_t new_size = 0; + ssize_t retval; + + if (size < sizeof(signature) || + memcmp(buf, signature, sizeof(signature))) { + pr_err("not an xz compressed module\n"); + return -EINVAL; + } + + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1); + if (!xz_dec) + return -ENOMEM; + + xz_buf.in_size = size; + xz_buf.in = buf; + xz_buf.in_pos = 0; + + do { + struct page *page = module_get_next_page(info); + if (!page) { + retval = -ENOMEM; + goto out; + } + + xz_buf.out = kmap(page); + xz_buf.out_pos = 0; + xz_buf.out_size = PAGE_SIZE; + xz_ret = xz_dec_run(xz_dec, &xz_buf); + kunmap(page); + + new_size += xz_buf.out_pos; + } while (xz_buf.out_pos == PAGE_SIZE && xz_ret == XZ_OK); + + if (xz_ret != XZ_STREAM_END) { + pr_err("decompression failed with status %d\n", xz_ret); + retval = -EINVAL; + goto out; + } + + retval = new_size; + + out: + xz_dec_end(xz_dec); + return retval; +} +#else +#error "Unexpected configuration for CONFIG_MODULE_DECOMPRESS" +#endif + +int module_decompress(struct load_info *info, const void *buf, size_t size) +{ + unsigned int n_pages; + ssize_t data_size; + int error; + + /* + * Start with number of pages twice as big as needed for + * compressed data. + */ + n_pages = DIV_ROUND_UP(size, PAGE_SIZE) * 2; + error = module_extend_max_pages(info, n_pages); + + data_size = MODULE_DECOMPRESS_FN(info, buf, size); + if (data_size < 0) { + error = data_size; + goto err; + } + + info->hdr = vmap(info->pages, info->used_pages, VM_MAP, PAGE_KERNEL); + if (!info->hdr) { + error = -ENOMEM; + goto err; + } + + info->len = data_size; + return 0; + +err: + module_decompress_cleanup(info); + return error; +} + +void module_decompress_cleanup(struct load_info *info) +{ + int i; + + if (info->hdr) + vunmap(info->hdr); + + for (i = 0; i < info->used_pages; i++) + __free_page(info->pages[i]); + + kvfree(info->pages); + + info->pages = NULL; + info->max_pages = info->used_pages = 0; +} + +static ssize_t compression_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", __stringify(MODULE_COMPRESSION)); +} +static struct kobj_attribute module_compression_attr = __ATTR_RO(compression); + +static int __init module_decompress_sysfs_init(void) +{ + int error; + + error = sysfs_create_file(&module_kset->kobj, + &module_compression_attr.attr); + if (error) + pr_warn("Failed to create 'compression' attribute"); + + return 0; +} +late_initcall(module_decompress_sysfs_init); -- cgit v1.2.3-71-gd317 From a97ac8cb24a3c3ad74794adb83717ef1605d1b47 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 13 Jan 2022 16:51:52 -0800 Subject: module: fix signature check failures when using in-kernel decompression The new flag MODULE_INIT_COMPRESSED_FILE unintentionally trips check in module_sig_check(). The check was supposed to catch case when version info or magic was removed from a signed module, making signature invalid, but it was coded too broadly and was catching this new flag as well. Change the check to only test the 2 particular flags affecting signature validity. Fixes: b1ae6dc41eaa ("module: add in-kernel support for decompressing") Signed-off-by: Dmitry Torokhov Reviewed-by: Douglas Anderson Signed-off-by: Luis Chamberlain --- kernel/module.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 34fe2824eb56..387ee77bdbd6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2883,12 +2883,13 @@ static int module_sig_check(struct load_info *info, int flags) const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const char *reason; const void *mod = info->hdr; - + bool mangled_module = flags & (MODULE_INIT_IGNORE_MODVERSIONS | + MODULE_INIT_IGNORE_VERMAGIC); /* - * Require flags == 0, as a module with version information - * removed is no longer the module that was signed + * Do not allow mangled modules as a module with version information + * removed is no longer the module that was signed. */ - if (flags == 0 && + if (!mangled_module && info->len > markerlen && memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ -- cgit v1.2.3-71-gd317 From 67d6212afda218d564890d1674bab28e8612170f Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 27 Jan 2022 15:39:53 -0800 Subject: Revert "module, async: async_synchronize_full() on module init iff async is used" This reverts commit 774a1221e862b343388347bac9b318767336b20b. We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule(). For example, PCI driver probing is invoked from a worker thread based on a node where device is attached: if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi); We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish. The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async) modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full(); Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix. Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed. Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested. Signed-off-by: Igor Pylypiv Reviewed-by: Changyuan Lyu Reviewed-by: Luis Chamberlain Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/async.c | 3 --- kernel/module.c | 25 +++++-------------------- 3 files changed, 5 insertions(+), 24 deletions(-) (limited to 'kernel/module.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index f5b2be39a78c..75ba8aa60248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1680,7 +1680,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ diff --git a/kernel/async.c b/kernel/async.c index b8d7a663497f..b2c4ba5686ee 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); - /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work); diff --git a/kernel/module.c b/kernel/module.c index 24dab046e16c..46a5c2ed1928 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3725,12 +3725,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; - /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3756,22 +3750,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + -- cgit v1.2.3-71-gd317