From 2f5a55c52c00fcded796db5f961057ba3fec8910 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 2 May 2020 14:18:35 +0200 Subject: i2c: use my kernel.org address from now on The old email is still active, but for easier handling, I am going to use my kernel.org address from now on. Also, add a mailmap for the now defunct Pengutronix address. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 45d36ba4826b..49d29054e657 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -2,7 +2,7 @@ /* * i2c.h - definitions for the Linux i2c bus interface * Copyright (C) 1995-2000 Simon G. Vogl - * Copyright (C) 2013-2019 Wolfram Sang + * Copyright (C) 2013-2019 Wolfram Sang * * With some changes from Kyösti Mälkki and * Frodo Looijaard -- cgit v1.2.3-71-gd317 From 54163a346d4a0a1b93f2ff6dc1f488419a605fa9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 May 2020 08:17:53 -0500 Subject: KVM: Introduce kvm_make_all_cpus_request_except() This allows making request to all other vcpus except the one specified in the parameter. Signed-off-by: Suravee Suthikulpanit Message-Id: <1588771076-73790-2-git-send-email-suravee.suthikulpanit@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 3 +++ virt/kvm/kvm_main.c | 14 +++++++++++--- 4 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index bcefa9d4e57e..54d4b98b49e1 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1427,7 +1427,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, */ kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, - vcpu_mask, &hv_vcpu->tlb_flush); + NULL, vcpu_mask, &hv_vcpu->tlb_flush); ret_success: /* We always do full TLB flush, set rep_done = rep_cnt. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f780af601c5f..ba8edf3b89f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8030,7 +8030,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, zalloc_cpumask_var(&cpus, GFP_ATOMIC); kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, - vcpu_bitmap, cpus); + NULL, vcpu_bitmap, cpus); free_cpumask_var(cpus); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 01276e3d01b9..131cc1527d68 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -813,8 +813,11 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except, unsigned long *vcpu_bitmap, cpumask_var_t tmp); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); +bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except); bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 74bdb7bf3295..731c1e517716 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -259,6 +259,7 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) } bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except, unsigned long *vcpu_bitmap, cpumask_var_t tmp) { int i, cpu, me; @@ -268,7 +269,8 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, me = get_cpu(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) + if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) || + vcpu == except) continue; kvm_make_request(req, vcpu); @@ -288,19 +290,25 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, return called; } -bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except) { cpumask_var_t cpus; bool called; zalloc_cpumask_var(&cpus, GFP_ATOMIC); - called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus); + called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus); free_cpumask_var(cpus); return called; } +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +{ + return kvm_make_all_cpus_request_except(kvm, req, NULL); +} + #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { -- cgit v1.2.3-71-gd317 From 3d8c11efd528d56972d44ed0de51c4e11a9a4fa9 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 12 May 2020 13:55:02 +0900 Subject: efi: cper: Add support for printing Firmware Error Record Reference While debugging a boot failure, the following unknown error record was seen in the boot logs. <...> BERT: Error records from previous boot: [Hardware Error]: event severity: fatal [Hardware Error]: Error 0, type: fatal [Hardware Error]: section type: unknown, 81212a96-09ed-4996-9471-8d729c8e69ed [Hardware Error]: section length: 0x290 [Hardware Error]: 00000000: 00000001 00000000 00000000 00020002 ................ [Hardware Error]: 00000010: 00020002 0000001f 00000320 00000000 ........ ....... [Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................ <...> On further investigation, it was found that the error record with UUID (81212a96-09ed-4996-9471-8d729c8e69ed) has been defined in the UEFI Specification at least since v2.4 and has recently had additional fields defined in v2.7 Section N.2.10 Firmware Error Record Reference. Add support for parsing and printing the defined fields to give users a chance to figure out what went wrong. Signed-off-by: Punit Agrawal Cc: Ard Biesheuvel Cc: "Rafael J. Wysocki" Cc: Borislav Petkov Cc: James Morse Cc: linux-acpi@vger.kernel.org Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20200512045502.3810339-1-punit1.agrawal@toshiba.co.jp Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/cper.c | 62 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cper.h | 9 +++++++ 2 files changed, 71 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 9d2512913d25..f564e15fbc7e 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -407,6 +407,58 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, } } +static const char * const fw_err_rec_type_strs[] = { + "IPF SAL Error Record", + "SOC Firmware Error Record Type1 (Legacy CrashLog Support)", + "SOC Firmware Error Record Type2", +}; + +static void cper_print_fw_err(const char *pfx, + struct acpi_hest_generic_data *gdata, + const struct cper_sec_fw_err_rec_ref *fw_err) +{ + void *buf = acpi_hest_get_payload(gdata); + u32 offset, length = gdata->error_data_length; + + printk("%s""Firmware Error Record Type: %s\n", pfx, + fw_err->record_type < ARRAY_SIZE(fw_err_rec_type_strs) ? + fw_err_rec_type_strs[fw_err->record_type] : "unknown"); + printk("%s""Revision: %d\n", pfx, fw_err->revision); + + /* Record Type based on UEFI 2.7 */ + if (fw_err->revision == 0) { + printk("%s""Record Identifier: %08llx\n", pfx, + fw_err->record_identifier); + } else if (fw_err->revision == 2) { + printk("%s""Record Identifier: %pUl\n", pfx, + &fw_err->record_identifier_guid); + } + + /* + * The FW error record may contain trailing data beyond the + * structure defined by the specification. As the fields + * defined (and hence the offset of any trailing data) vary + * with the revision, set the offset to account for this + * variation. + */ + if (fw_err->revision == 0) { + /* record_identifier_guid not defined */ + offset = offsetof(struct cper_sec_fw_err_rec_ref, + record_identifier_guid); + } else if (fw_err->revision == 1) { + /* record_identifier not defined */ + offset = offsetof(struct cper_sec_fw_err_rec_ref, + record_identifier); + } else { + offset = sizeof(*fw_err); + } + + buf += offset; + length -= offset; + + print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, buf, length, true); +} + static void cper_print_tstamp(const char *pfx, struct acpi_hest_generic_data_v300 *gdata) { @@ -494,6 +546,16 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata else goto err_section_too_small; #endif + } else if (guid_equal(sec_type, &CPER_SEC_FW_ERR_REC_REF)) { + struct cper_sec_fw_err_rec_ref *fw_err = acpi_hest_get_payload(gdata); + + printk("%ssection_type: Firmware Error Record Reference\n", + newpfx); + /* The minimal FW Error Record contains 16 bytes */ + if (gdata->error_data_length >= SZ_16) + cper_print_fw_err(newpfx, gdata, fw_err); + else + goto err_section_too_small; } else { const void *err = acpi_hest_get_payload(gdata); diff --git a/include/linux/cper.h b/include/linux/cper.h index 4f005d95ce88..8537e9282a65 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -521,6 +521,15 @@ struct cper_sec_pcie { u8 aer_info[96]; }; +/* Firmware Error Record Reference, UEFI v2.7 sec N.2.10 */ +struct cper_sec_fw_err_rec_ref { + u8 record_type; + u8 revision; + u8 reserved[6]; + u64 record_identifier; + guid_t record_identifier_guid; +}; + /* Reset to default packing */ #pragma pack() -- cgit v1.2.3-71-gd317 From 8695e0b1b964f6d7caee667f14dceb7e8a4a3b3c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:53:29 -0500 Subject: i2c: mux: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c-mux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index c5a977320f82..98ef73b7c8fd 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -29,7 +29,7 @@ struct i2c_mux_core { int num_adapters; int max_adapters; - struct i2c_adapter *adapter[0]; + struct i2c_adapter *adapter[]; }; struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, -- cgit v1.2.3-71-gd317 From a9a3ed1eff3601b63aea4fb462d8b3b92c7c1e7e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Apr 2020 18:11:30 +0200 Subject: x86: Fix early boot crash on gcc-10, third try MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... or the odyssey of trying to disable the stack protector for the function which generates the stack canary value. The whole story started with Sergei reporting a boot crash with a kernel built with gcc-10: Kernel panic — not syncing: stack-protector: Kernel stack is corrupted in: start_secondary CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.6.0-rc5—00235—gfffb08b37df9 #139 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77M—D3H, BIOS F12 11/14/2013 Call Trace: dump_stack panic ? start_secondary __stack_chk_fail start_secondary secondary_startup_64 -—-[ end Kernel panic — not syncing: stack—protector: Kernel stack is corrupted in: start_secondary This happens because gcc-10 tail-call optimizes the last function call in start_secondary() - cpu_startup_entry() - and thus emits a stack canary check which fails because the canary value changes after the boot_init_stack_canary() call. To fix that, the initial attempt was to mark the one function which generates the stack canary with: __attribute__((optimize("-fno-stack-protector"))) ... start_secondary(void *unused) however, using the optimize attribute doesn't work cumulatively as the attribute does not add to but rather replaces previously supplied optimization options - roughly all -fxxx options. The key one among them being -fno-omit-frame-pointer and thus leading to not present frame pointer - frame pointer which the kernel needs. The next attempt to prevent compilers from tail-call optimizing the last function call cpu_startup_entry(), shy of carving out start_secondary() into a separate compilation unit and building it with -fno-stack-protector, was to add an empty asm(""). This current solution was short and sweet, and reportedly, is supported by both compilers but we didn't get very far this time: future (LTO?) optimization passes could potentially eliminate this, which leads us to the third attempt: having an actual memory barrier there which the compiler cannot ignore or move around etc. That should hold for a long time, but hey we said that about the other two solutions too so... Reported-by: Sergei Trofimovich Signed-off-by: Borislav Petkov Tested-by: Kalle Valo Cc: Link: https://lkml.kernel.org/r/20200314164451.346497-1-slyfox@gentoo.org --- arch/x86/include/asm/stackprotector.h | 7 ++++++- arch/x86/kernel/smpboot.c | 8 ++++++++ arch/x86/xen/smp_pv.c | 1 + include/linux/compiler.h | 6 ++++++ init/main.c | 2 ++ 5 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 91e29b6a86a5..9804a7957f4e 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -55,8 +55,13 @@ /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c89e4d9ad28..2f24c334a938 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -266,6 +266,14 @@ static void notrace start_secondary(void *unused) wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + /* + * Prevent tail call to cpu_startup_entry() because the stack protector + * guard has been changed a couple of function calls up, in + * boot_init_stack_canary() and must not be checked before tail calling + * another function. + */ + prevent_tail_call_optimization(); } /** diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 8fb8a50a28b4..f2adb63b2d7c 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -93,6 +93,7 @@ asmlinkage __visible void cpu_bringup_and_idle(void) cpu_bringup(); boot_init_stack_canary(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + prevent_tail_call_optimization(); } void xen_smp_intr_free_pv(unsigned int cpu) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 034b0a644efc..448c91bf543b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -356,4 +356,10 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * This is needed in functions which generate the stack canary, see + * arch/x86/kernel/smpboot.c::start_secondary() for an example. + */ +#define prevent_tail_call_optimization() mb() + #endif /* __LINUX_COMPILER_H */ diff --git a/init/main.c b/init/main.c index 1a5da2c2660c..ad3812b5ae65 100644 --- a/init/main.c +++ b/init/main.c @@ -1036,6 +1036,8 @@ asmlinkage __visible void __init start_kernel(void) /* Do the rest non-__init'ed, we're now alive */ arch_call_rest_init(); + + prevent_tail_call_optimization(); } /* Call all constructor functions linked into the kernel. */ -- cgit v1.2.3-71-gd317 From e8da08a088236aff4b51d4ec97c750051f9fe417 Mon Sep 17 00:00:00 2001 From: Benjamin Thiel Date: Sat, 16 May 2020 15:26:47 +0200 Subject: efi: Pull up arch-specific prototype efi_systab_show_arch() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull up arch-specific prototype efi_systab_show_arch() in order to fix a -Wmissing-prototypes warning: arch/x86/platform/efi/efi.c:957:7: warning: no previous prototype for ‘efi_systab_show_arch’ [-Wmissing-prototypes] char *efi_systab_show_arch(char *str) Signed-off-by: Benjamin Thiel Link: https://lore.kernel.org/r/20200516132647.14568-1-b.thiel@posteo.de Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 5 +---- include/linux/efi.h | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 911a2bd0f6b7..4e3055238f31 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -130,11 +130,8 @@ static ssize_t systab_show(struct kobject *kobj, if (efi.smbios != EFI_INVALID_TABLE_ADDR) str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios); - if (IS_ENABLED(CONFIG_IA64) || IS_ENABLED(CONFIG_X86)) { - extern char *efi_systab_show_arch(char *str); - + if (IS_ENABLED(CONFIG_IA64) || IS_ENABLED(CONFIG_X86)) str = efi_systab_show_arch(str); - } return str - buf; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 251f1f783cdf..9430d01c0c3d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1245,4 +1245,6 @@ struct linux_efi_memreserve { void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); +char *efi_systab_show_arch(char *str); + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3-71-gd317 From 17d00e839d3b592da9659c1977d45f85b77f986a Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 27 Dec 2019 07:01:53 +0200 Subject: net/mlx5: Add command entry handling completion When FW response to commands is very slow and all command entries in use are waiting for completion we can have a race where commands can get timeout before they get out of the queue and handled. Timeout completion on uninitialized command will cause releasing command's buffers before accessing it for initialization and then we will get NULL pointer exception while trying access it. It may also cause releasing buffers of another command since we may have timeout completion before even allocating entry index for this command. Add entry handling completion to avoid this race. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 14 ++++++++++++++ include/linux/mlx5/driver.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index cede5bdfd598..d695b75bc0af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -861,6 +861,7 @@ static void cmd_work_handler(struct work_struct *work) int alloc_ret; int cmd_mode; + complete(&ent->handling); sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { @@ -978,6 +979,11 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) struct mlx5_cmd *cmd = &dev->cmd; int err; + if (!wait_for_completion_timeout(&ent->handling, timeout) && + cancel_work_sync(&ent->work)) { + ent->ret = -ECANCELED; + goto out_err; + } if (cmd->mode == CMD_MODE_POLLING || ent->polling) { wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { @@ -985,12 +991,17 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } +out_err: err = ent->ret; if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + } else if (err == -ECANCELED) { + mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); } mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, deliv_status_to_str(ent->status), ent->status); @@ -1026,6 +1037,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, ent->token = token; ent->polling = force_polling; + init_completion(&ent->handling); if (!callback) init_completion(&ent->done); @@ -1045,6 +1057,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, err = wait_func(dev, ent); if (err == -ETIMEDOUT) goto out; + if (err == -ECANCELED) + goto out_free; ds = ent->ts2 - ent->ts1; op = MLX5_GET(mbox_in, in->first.data, opcode); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6f8f79ef829b..9b1f29f26c27 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -743,6 +743,7 @@ struct mlx5_cmd_work_ent { struct delayed_work cb_timeout_work; void *context; int idx; + struct completion handling; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; -- cgit v1.2.3-71-gd317 From d43b7007dbd1195a5b6b83213e49b1516aaf6f5e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 18 Mar 2020 21:44:32 +0200 Subject: net/mlx5: Fix a race when moving command interface to events mode After driver creates (via FW command) an EQ for commands, the driver will be informed on new commands completion by EQE. However, due to a race in driver's internal command mode metadata update, some new commands will still be miss-handled by driver as if we are in polling mode. Such commands can get two non forced completion, leading to already freed command entry access. CREATE_EQ command, that maps EQ to the command queue must be posted to the command queue while it is empty and no other command should be posted. Add SW mechanism that once the CREATE_EQ command is about to be executed, all other commands will return error without being sent to the FW. Allow sending other commands only after successfully changing the driver's internal command mode metadata. We can safely return error to all other commands while creating the command EQ, as all other commands might be sent from the user/application during driver load. Application can rerun them later after driver's load was finished. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 35 ++++++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +++ include/linux/mlx5/driver.h | 6 +++++ 3 files changed, 40 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d695b75bc0af..2f3cafdc3b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -848,6 +848,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) +{ + if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL) + return true; + + return cmd->allowed_opcode == opcode; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -914,7 +922,8 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; u32 drv_synd; @@ -1405,6 +1414,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_init(dev); } +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + down(&cmd->pages_sem); + + cmd->allowed_opcode = opcode; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; @@ -1681,12 +1706,13 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int err; u8 status = 0; u32 drv_synd; + u16 opcode; u8 token; + opcode = MLX5_GET(mbox_in, in, opcode); if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); - + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + !opcode_allowed(&dev->cmd, opcode)) { err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, syndrome, drv_synd); @@ -1988,6 +2014,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); cmd->mode = CMD_MODE_POLLING; + cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; create_msg_cache(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index cccea3a8eddd..ce6c621af043 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -611,11 +611,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); if (err) goto err1; mlx5_cmd_use_events(dev); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { .irq_index = 0, @@ -645,6 +647,7 @@ err2: mlx5_cmd_use_polling(dev); cleanup_async_eq(dev, &table->cmd_eq, "cmd"); err1: + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9b1f29f26c27..c03778c75dfa 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -284,6 +284,7 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; int mode; + u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; struct dma_pool *pool; struct mlx5_cmd_debug dbg; @@ -875,10 +876,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); } +enum { + CMD_ALLOWED_OPCODE_ALL, +}; + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); struct mlx5_async_ctx { struct mlx5_core_dev *dev; -- cgit v1.2.3-71-gd317 From f7936ddd35d8b849daf0372770c7c9dbe7910fca Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 19 Mar 2020 21:43:13 +0200 Subject: net/mlx5: Avoid processing commands before cmdif is ready When driver is reloading during recovery flow, it can't get new commands till command interface is up again. Otherwise we may get to null pointer trying to access non initialized command structures. Add cmdif state to avoid processing commands while cmdif is not ready. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++ include/linux/mlx5/driver.h | 9 +++++++++ 3 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2f3cafdc3b1f..7a77fe40af3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -923,6 +923,7 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + cmd->state != MLX5_CMDIF_STATE_UP || !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; u32 drv_synd; @@ -1712,6 +1713,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, opcode = MLX5_GET(mbox_in, in, opcode); if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + dev->cmd.state != MLX5_CMDIF_STATE_UP || !opcode_allowed(&dev->cmd, opcode)) { err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); @@ -1977,6 +1979,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_free_page; } + cmd->state = MLX5_CMDIF_STATE_DOWN; cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1; @@ -2054,3 +2057,10 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) dma_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); + +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state) +{ + dev->cmd.state = cmdif_state; +} +EXPORT_SYMBOL(mlx5_cmd_set_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7af4210c1b96..a61e473db7e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -965,6 +965,8 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) goto err_cmd_cleanup; } + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); + err = mlx5_core_enable_hca(dev, 0); if (err) { mlx5_core_err(dev, "enable hca failed\n"); @@ -1026,6 +1028,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); return err; @@ -1043,6 +1046,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) } mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c03778c75dfa..8397b6558dc7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -213,6 +213,12 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; +enum mlx5_cmdif_state { + MLX5_CMDIF_STATE_UNINITIALIZED, + MLX5_CMDIF_STATE_UP, + MLX5_CMDIF_STATE_DOWN, +}; + struct mlx5_cmd_first { __be32 data[4]; }; @@ -258,6 +264,7 @@ struct mlx5_cmd_stats { struct mlx5_cmd { struct mlx5_nb nb; + enum mlx5_cmdif_state state; void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; @@ -882,6 +889,8 @@ enum { int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); -- cgit v1.2.3-71-gd317