From 9ea4dcf49878bb9546b8fa9319dcbdc9b7ee20f8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 22 Apr 2022 15:58:11 -0700 Subject: PM: CXL: Disable suspend The CXL specification claims S3 support at a hardware level, but at a system software level there are some missing pieces. Section 9.4 (CXL 2.0) rightly claims that "CXL mem adapters may need aux power to retain memory context across S3", but there is no enumeration mechanism for the OS to determine if a given adapter has that support. Moreover the save state and resume image for the system may inadvertantly end up in a CXL device that needs to be restored before the save state is recoverable. I.e. a circular dependency that is not resolvable without a third party save-area. Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly allows for suspend, but requires unbinding all CXL memory devices before the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind flow is intended to also tear down all CXL memory regions associated with a given cxl_memdev. It is reasonable to assume that any device participating in a System RAM range published in the EFI memory map is covered by aux power and save-area outside the device itself. So this restriction can be minimized in the future once pre-existing region enumeration support arrives, and perhaps a spec update to clarify if the EFI memory map is sufficent for determining the range of devices managed by platform-firmware for S3 support. Per Rafael, if the CXL configuration prevents suspend then it should fail early before tasks are frozen, and mem_sleep should stop showing 'mem' as an option [1]. Effectively CXL augments the platform suspend ->valid() op since, for example, the ACPI ops are not aware of the CXL / PCI dependencies. Given the split role of platform firmware vs OS provisioned CXL memory it is up to the cxl_mem driver to determine if the CXL configuration has elements that platform firmware may not be prepared to restore. Link: https://lore.kernel.org/r/CAJZ5v0hGVN_=3iU8OLpHY3Ak35T5+JcBM-qs8SbojKrpd0VXsA@mail.gmail.com [1] Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Len Brown Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/165066828317.3907920.5690432272182042556.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/pm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index e65b3ab28377..7911c4c9a7be 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -36,6 +36,15 @@ static inline void pm_vt_switch_unregister(struct device *dev) } #endif /* CONFIG_VT_CONSOLE_SLEEP */ +#ifdef CONFIG_CXL_SUSPEND +bool cxl_mem_active(void); +#else +static inline bool cxl_mem_active(void) +{ + return false; +} +#endif + /* * Device power management */ -- cgit v1.2.3-71-gd317 From cc10eee95204579fcd66fd5965073fdcbf629676 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 13 Apr 2022 01:36:16 -0600 Subject: PCI/ACPI: add a helper for retrieving _OSC Control DWORDs During _OSC negotiation, when the 'Control' DWORD is needed from the result buffer after running _OSC, a couple of places performed manual pointer arithmetic to offset into the right spot in the raw buffer. Add a acpi_osc_ctx_get_pci_control() helper to use the #define'd DWORD offsets to fetch the DWORDs needed from @acpi_osc_context, and replace the above instances of the open-coded arithmetic. Cc: "Rafael J. Wysocki" Suggested-by: Davidlohr Bueso Acked-by: Rafael J. Wysocki Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Reviewed by: Adam Manzanares Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-2-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/bus.c | 2 +- drivers/acpi/pci_root.c | 2 +- include/linux/acpi.h | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 3e58b613a2c4..7658acbbb2bd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -425,7 +425,7 @@ static void acpi_bus_osc_negotiate_usb_control(void) } osc_sb_native_usb4_control = - control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD]; + control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 6f9e75d14808..57be89cb3966 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -183,7 +183,7 @@ static acpi_status acpi_pci_run_osc(acpi_handle handle, status = acpi_run_osc(handle, &context); if (ACPI_SUCCESS(status)) { - *retval = *((u32 *)(context.ret.pointer + 8)); + *retval = acpi_osc_ctx_get_pci_control(&context); kfree(context.ret.pointer); } return status; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d7136d13aa44..04e5a038dd57 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -608,6 +608,13 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1004,6 +1011,12 @@ static inline int acpi_register_wakeup_handler(int wake_irq, static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } +struct acpi_osc_context; +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3-71-gd317 From 241d26bc26add2e2867c546f7474902406d37c60 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 13 Apr 2022 01:36:17 -0600 Subject: PCI/ACPI: Prefer CXL _OSC instead of PCIe _OSC for CXL host bridges OB In preparation for negotiating OS control of CXL _OSC features, do the minimal enabling to use CXL _OSC to handle the base PCIe feature negotiation. Recall that CXL _OSC is a super-set of PCIe _OSC and the CXL 2.0 specification mandates: "If a CXL Host Bridge device exposes CXL _OSC, CXL aware OSPM shall evaluate CXL _OSC and not evaluate PCIe _OSC." Rather than pass a boolean flag alongside @root to all the helper functions that need to consider PCIe specifics, add is_pcie() and is_cxl() helper functions to check the flavor of @root. This also allows for dynamic fallback to PCIe _OSC in cases where an attempt to use CXL _OXC fails. This can happen on CXL 1.1 platforms that publish ACPI0016 devices to indicate CXL host bridges, but do not publish the optional CXL _OSC method. CXL _OSC is mandatory for CXL 2.0 hosts. Cc: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Robert Moore Reviewed-by: Jonathan Cameron Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-3-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 67 ++++++++++++++++++++++++++++++++++++++----------- include/acpi/acpi_bus.h | 6 +++++ include/linux/acpi.h | 4 +++ 3 files changed, 63 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 57be89cb3966..1e683c115ee0 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -168,20 +168,45 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static inline bool is_pcie(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; +} + +static inline bool is_cxl(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_CXL; +} + static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766"; +static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC"; + +static char *to_uuid(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return cxl_osc_uuid_str; + return pci_osc_uuid_str; +} + +static int cap_length(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS; + return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS; +} -static acpi_status acpi_pci_run_osc(acpi_handle handle, +static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, const u32 *capbuf, u32 *retval) { struct acpi_osc_context context = { - .uuid_str = pci_osc_uuid_str, + .uuid_str = to_uuid(root), .rev = 1, - .cap.length = 12, + .cap.length = cap_length(root), .cap.pointer = (void *)capbuf, }; acpi_status status; - status = acpi_run_osc(handle, &context); + status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { *retval = acpi_osc_ctx_get_pci_control(&context); kfree(context.ret.pointer); @@ -194,7 +219,7 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 *control) { acpi_status status; - u32 result, capbuf[3]; + u32 result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -202,10 +227,18 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; - status = acpi_pci_run_osc(root->device->handle, capbuf, &result); +retry: + status = acpi_pci_run_osc(root, capbuf, &result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; *control = result; + } else if (is_cxl(root)) { + /* + * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC + * upon any failure using CXL _OSC. + */ + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + goto retry; } return status; } @@ -336,7 +369,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[3]; + u32 ctrl, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -373,7 +406,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(handle, capbuf, mask); + status = acpi_pci_run_osc(root, capbuf, mask); if (ACPI_FAILURE(status)) return status; @@ -452,8 +485,7 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) return true; } -static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, - bool is_pcie) +static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; acpi_status status; @@ -504,7 +536,7 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, *no_aspm = 1; /* _OSC is optional for PCI host bridges */ - if ((status == AE_NOT_FOUND) && !is_pcie) + if (status == AE_NOT_FOUND && !is_pcie(root)) return; if (control) { @@ -527,7 +559,7 @@ static int acpi_pci_root_add(struct acpi_device *device, acpi_handle handle = device->handle; int no_aspm = 0; bool hotadd = system_state == SYSTEM_RUNNING; - bool is_pcie; + const char *acpi_hid; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -585,8 +617,15 @@ static int acpi_pci_root_add(struct acpi_device *device, root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle); - is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0; - negotiate_os_control(root, &no_aspm, is_pcie); + acpi_hid = acpi_device_hid(root->device); + if (strcmp(acpi_hid, "PNP0A08") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + else if (strcmp(acpi_hid, "ACPI0016") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_CXL; + else + dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n"); + + negotiate_os_control(root, &no_aspm); /* * TBD: Need PCI interface for enumeration/configuration of roots. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c4b78c21d793..305ebf2a3fa7 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -582,10 +582,16 @@ int unregister_acpi_bus_type(struct acpi_bus_type *); int acpi_bind_one(struct device *dev, struct acpi_device *adev); int acpi_unbind_one(struct device *dev); +enum acpi_bridge_type { + ACPI_BRIDGE_TYPE_PCIE = 1, + ACPI_BRIDGE_TYPE_CXL, +}; + struct acpi_pci_root { struct acpi_device * device; struct pci_bus *bus; u16 segment; + int bridge_type; struct resource secondary; /* downstream bus range */ u32 osc_support_set; /* _OSC state of support bits */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 04e5a038dd57..82d91d9ccce5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -550,6 +550,10 @@ struct acpi_osc_context { acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); +/* Number of _OSC capability DWORDS depends on bridge type */ +#define OSC_PCI_CAPABILITY_DWORDS 3 +#define OSC_CXL_CAPABILITY_DWORDS 5 + /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ -- cgit v1.2.3-71-gd317 From 56368029d93bbb3246ee2e03268fa6dd9754be05 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 13 Apr 2022 01:36:18 -0600 Subject: PCI/ACPI: negotiate CXL _OSC Add full support for negotiating _OSC as defined in the CXL 2.0 spec, as applicable to CXL-enabled platforms. Advertise support for the CXL features we support - 'CXL 2.0 port/device register access', 'Protocol Error Reporting', and 'CXL Native Hot Plug'. Request control for 'CXL Memory Error Reporting'. The requests are dependent on CONFIG_* based prerequisites, and prior PCI enabling, similar to how the standard PCI _OSC bits are determined. The CXL specification does not define any additional constraints on the hotplug flow beyond PCIe native hotplug, so a kernel that supports native PCIe hotplug, supports CXL hotplug. For error handling protocol and link errors just use PCIe AER. There is nascent support for amending AER events with CXL specific status [1], but there's otherwise no additional OS responsibility for CXL errors beyond PCIe AER. CXL Memory Errors behave the same as typical memory errors so CONFIG_MEMORY_FAILURE is sufficient to indicate support to platform firmware. [1]: https://lore.kernel.org/linux-cxl/164740402242.3912056.8303625392871313860.stgit@dwillia2-desk3.amr.corp.intel.com/ Cc: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Robert Moore Cc: Dan Williams Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-4-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 179 +++++++++++++++++++++++++++++++++++++++++++----- include/acpi/acpi_bus.h | 6 +- include/linux/acpi.h | 25 ++++++- 3 files changed, 188 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 1e683c115ee0..c82ad63fffed 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -140,6 +140,17 @@ static struct pci_osc_bit_struct pci_osc_control_bit[] = { { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" }, }; +static struct pci_osc_bit_struct cxl_osc_support_bit[] = { + { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" }, + { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" }, + { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" }, + { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" }, +}; + +static struct pci_osc_bit_struct cxl_osc_control_bit[] = { + { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" }, +}; + static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word, struct pci_osc_bit_struct *table, int size) { @@ -168,6 +179,18 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_support_bit, + ARRAY_SIZE(cxl_osc_support_bit)); +} + +static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_control_bit, + ARRAY_SIZE(cxl_osc_control_bit)); +} + static inline bool is_pcie(struct acpi_pci_root *root) { return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; @@ -196,7 +219,8 @@ static int cap_length(struct acpi_pci_root *root) } static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, - const u32 *capbuf, u32 *retval) + const u32 *capbuf, u32 *pci_control, + u32 *cxl_control) { struct acpi_osc_context context = { .uuid_str = to_uuid(root), @@ -208,18 +232,20 @@ static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { - *retval = acpi_osc_ctx_get_pci_control(&context); + *pci_control = acpi_osc_ctx_get_pci_control(&context); + if (is_cxl(root)) + *cxl_control = acpi_osc_ctx_get_cxl_control(&context); kfree(context.ret.pointer); } return status; } -static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, - u32 support, - u32 *control) +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support, + u32 *control, u32 cxl_support, + u32 *cxl_control) { acpi_status status; - u32 result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; + u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -227,11 +253,21 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; + if (is_cxl(root)) { + cxl_support |= root->osc_ext_support_set; + capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support; + capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set; + } + retry: - status = acpi_pci_run_osc(root, capbuf, &result); + status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - *control = result; + *control = pci_result; + if (is_cxl(root)) { + root->osc_ext_support_set = cxl_support; + *cxl_control = cxl_result; + } } else if (is_cxl(root)) { /* * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC @@ -354,6 +390,8 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). * @mask: Mask of _OSC bits to request control of, place to store control mask. * @support: _OSC supported capability. + * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask. + * @cxl_support: CXL _OSC supported capability. * * Run _OSC query for @mask and if that is successful, compare the returned * mask of control bits with @req. If all of the @req bits are set in the @@ -364,12 +402,14 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * _OSC bits the BIOS has granted control of, but its contents are meaningless * on failure. **/ -static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support) +static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, + u32 support, u32 *cxl_mask, + u32 cxl_support) { u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[OSC_CXL_CAPABILITY_DWORDS]; + u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -381,20 +421,42 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s ctrl = *mask; *mask |= root->osc_control_set; + if (is_cxl(root)) { + cxl_ctrl = *cxl_mask; + *cxl_mask |= root->osc_ext_control_set; + } + /* Need to check the available controls bits before requesting them. */ do { - status = acpi_pci_query_osc(root, support, mask); + u32 pci_missing = 0, cxl_missing = 0; + + status = acpi_pci_query_osc(root, support, mask, cxl_support, + cxl_mask); if (ACPI_FAILURE(status)) return status; - if (ctrl == *mask) - break; - decode_osc_control(root, "platform does not support", - ctrl & ~(*mask)); + if (is_cxl(root)) { + if (ctrl == *mask && cxl_ctrl == *cxl_mask) + break; + pci_missing = ctrl & ~(*mask); + cxl_missing = cxl_ctrl & ~(*cxl_mask); + } else { + if (ctrl == *mask) + break; + pci_missing = ctrl & ~(*mask); + } + if (pci_missing) + decode_osc_control(root, "platform does not support", + pci_missing); + if (cxl_missing) + decode_cxl_osc_control(root, "CXL platform does not support", + cxl_missing); ctrl = *mask; - } while (*mask); + cxl_ctrl = *cxl_mask; + } while (*mask || *cxl_mask); /* No need to request _OSC if the control was already granted. */ - if ((root->osc_control_set & ctrl) == ctrl) + if ((root->osc_control_set & ctrl) == ctrl && + (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl) return AE_OK; if ((ctrl & req) != req) { @@ -406,11 +468,17 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(root, capbuf, mask); + if (is_cxl(root)) { + capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set; + capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl; + } + + status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask); if (ACPI_FAILURE(status)) return status; root->osc_control_set = *mask; + root->osc_ext_control_set = *cxl_mask; return AE_OK; } @@ -436,6 +504,53 @@ static u32 calculate_support(void) return support; } +/* + * Background on hotplug support, and making it depend on only + * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG: + * + * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but + * there is no existing _OSC for memory hotplug support. The reason is that + * ACPI memory hotplug requires the OS to acknowledge / coordinate with + * memory plug events via a scan handler. On the CXL side the equivalent + * would be if Linux supported the Mechanical Retention Lock [1], or + * otherwise had some coordination for the driver of a PCI device + * undergoing hotplug to be consulted on whether the hotplug should + * proceed or not. + * + * The concern is that if Linux says no to supporting CXL hotplug then + * the BIOS may say no to giving the OS hotplug control of any other PCIe + * device. So the question here is not whether hotplug is enabled, it's + * whether it is handled natively by the at all OS, and if + * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes". + * + * Otherwise, the plan for CXL coordinated remove, since the kernel does + * not support blocking hotplug, is to require the memory device to be + * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is + * disabled that step will fail and the remove attempt cancelled by the + * user. If that is not honored and the card is removed anyway then it + * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will + * cause a crash and other badness. + * + * Therefore, just say yes to CXL hotplug and require removal to + * be coordinated by userspace unless and until the kernel grows better + * mechanisms for doing "managed" removal of devices in consultation with + * the driver. + * + * [1]: https://lore.kernel.org/all/20201122014203.4706-1-ashok.raj@intel.com/ + */ +static u32 calculate_cxl_support(void) +{ + u32 support; + + support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + if (pci_aer_available()) + support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + support |= OSC_CXL_NATIVE_HP_SUPPORT; + + return support; +} + static u32 calculate_control(void) { u32 control; @@ -467,6 +582,16 @@ static u32 calculate_control(void) return control; } +static u32 calculate_cxl_control(void) +{ + u32 control = 0; + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) + control |= OSC_CXL_ERROR_REPORTING_CONTROL; + + return control; +} + static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) { struct acpi_device *device = root->device; @@ -488,6 +613,7 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; + u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0; acpi_status status; struct acpi_device *device = root->device; acpi_handle handle = device->handle; @@ -511,10 +637,20 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) if (os_control_query_checks(root, support)) requested = control = calculate_control(); - status = acpi_pci_osc_control_set(handle, &control, support); + if (is_cxl(root)) { + cxl_support = calculate_cxl_support(); + decode_cxl_osc_support(root, "OS supports", cxl_support); + cxl_requested = cxl_control = calculate_cxl_control(); + } + + status = acpi_pci_osc_control_set(handle, &control, support, + &cxl_control, cxl_support); if (ACPI_SUCCESS(status)) { if (control) decode_osc_control(root, "OS now controls", control); + if (cxl_control) + decode_cxl_osc_control(root, "OS now controls", + cxl_control); if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { /* @@ -543,6 +679,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) decode_osc_control(root, "OS requested", requested); decode_osc_control(root, "platform willing to grant", control); } + if (cxl_control) { + decode_cxl_osc_control(root, "OS requested", cxl_requested); + decode_cxl_osc_control(root, "platform willing to grant", + cxl_control); + } dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n", acpi_format_exception(status)); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 305ebf2a3fa7..4c463ae2777b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -594,8 +594,10 @@ struct acpi_pci_root { int bridge_type; struct resource secondary; /* downstream bus range */ - u32 osc_support_set; /* _OSC state of support bits */ - u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_ext_support_set; /* _OSC state of extended support bits */ + u32 osc_ext_control_set; /* _OSC state of extended control bits */ phys_addr_t mcfg_addr; }; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 82d91d9ccce5..378a431666b3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -554,10 +554,12 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_PCI_CAPABILITY_DWORDS 3 #define OSC_CXL_CAPABILITY_DWORDS 5 -/* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ +/* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ +#define OSC_EXT_SUPPORT_DWORD 3 /* DWORD 4 */ +#define OSC_EXT_CONTROL_DWORD 4 /* DWORD 5 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ @@ -612,6 +614,15 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +/* CXL _OSC: Capabilities DWORD 4: Support Field */ +#define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT 0x00000001 +#define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT 0x00000002 +#define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT 0x00000004 +#define OSC_CXL_NATIVE_HP_SUPPORT 0x00000008 + +/* CXL _OSC: Capabilities DWORD 5: Control Field */ +#define OSC_CXL_ERROR_REPORTING_CONTROL 0x00000001 + static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) { u32 *ret = context->ret.pointer; @@ -619,6 +630,13 @@ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) return ret[OSC_CONTROL_DWORD]; } +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_EXT_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1021,6 +1039,11 @@ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) return 0; } +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3-71-gd317 From d864b8ea6468cf1dce614a58eec92a23d8e07fec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 26 Apr 2022 12:22:44 -0700 Subject: cxl/acpi: Add root device lockdep validation The CXL "root" device, ACPI0017, is an attach point for coordinating platform level CXL resources and is the parent device for a CXL port topology tree. As such it has distinct locking rules relative to other CXL subsystem objects, but because it is an ACPI device the lock class is established well before it is given to the cxl_acpi driver. However, the lockdep API does support changing the lock class "live" for situations like this. Add a device_lock_set_class() helper that a driver can use in ->probe() to set a custom lock class, and device_lock_reset_class() to return to the default "no validate" class before the custom lock class key goes out of scope after ->remove(). Note the helpers are all macros to support dead code elimination in the CONFIG_PROVE_LOCKING=n case, however device_set_lock_class() still needs #ifdef CONFIG_PROVE_LOCKING since lockdep_match_class() explicitly does not have a helper in the CONFIG_PROVE_LOCKING=n case (see comment in lockdep.h). The lockdep API needs 2 small tweaks to prevent "unused" warnings for the @key argument to lock_set_class(), and a new lock_set_novalidate_class() is added to supplement lockdep_set_novalidate_class() in the cases where the lock class is converted while the lock is held. Suggested-by: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Alison Schofield Cc: Vishal Verma Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Greg Kroah-Hartman Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165100081305.1528964.11138612430659737238.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 13 +++++++++++++ include/linux/device.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/lockdep.h | 6 +++++- 3 files changed, 61 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d15a6aec0331..40286f5df812 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -275,6 +275,13 @@ static int add_root_nvdimm_bridge(struct device *match, void *data) return 1; } +static struct lock_class_key cxl_root_key; + +static void cxl_acpi_lock_reset_class(void *dev) +{ + device_lock_reset_class(dev); +} + static int cxl_acpi_probe(struct platform_device *pdev) { int rc; @@ -283,6 +290,12 @@ static int cxl_acpi_probe(struct platform_device *pdev) struct acpi_device *adev = ACPI_COMPANION(host); struct cxl_cfmws_context ctx; + device_lock_set_class(&pdev->dev, &cxl_root_key); + rc = devm_add_action_or_reset(&pdev->dev, cxl_acpi_lock_reset_class, + &pdev->dev); + if (rc) + return rc; + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); diff --git a/include/linux/device.h b/include/linux/device.h index 93459724dcde..833b0b3b0193 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -850,6 +850,49 @@ static inline bool device_supports_offline(struct device *dev) return dev->bus && dev->bus->offline && dev->bus->online; } +#define __device_lock_set_class(dev, name, key) \ +do { \ + struct device *__d2 __maybe_unused = dev; \ + lock_set_class(&__d2->mutex.dep_map, name, key, 0, _THIS_IP_); \ +} while (0) + +/** + * device_lock_set_class - Specify a temporary lock class while a device + * is attached to a driver + * @dev: device to modify + * @key: lock class key data + * + * This must be called with the device_lock() already held, for example + * from driver ->probe(). Take care to only override the default + * lockdep_no_validate class. + */ +#ifdef CONFIG_LOCKDEP +#define device_lock_set_class(dev, key) \ +do { \ + struct device *__d = dev; \ + dev_WARN_ONCE(__d, !lockdep_match_class(&__d->mutex, \ + &__lockdep_no_validate__), \ + "overriding existing custom lock class\n"); \ + __device_lock_set_class(__d, #key, key); \ +} while (0) +#else +#define device_lock_set_class(dev, key) __device_lock_set_class(dev, #key, key) +#endif + +/** + * device_lock_reset_class - Return a device to the default lockdep novalidate state + * @dev: device to modify + * + * This must be called with the device_lock() already held, for example + * from driver ->remove(). + */ +#define device_lock_reset_class(dev) \ +do { \ + struct device *__d __maybe_unused = dev; \ + lock_set_novalidate_class(&__d->mutex.dep_map, "&dev->mutex", \ + _THIS_IP_); \ +} while (0) + void lock_device_hotplug(void); void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..43b0dc6a0b21 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -290,6 +290,9 @@ extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); +#define lock_set_novalidate_class(l, n, i) \ + lock_set_class(l, n, &__lockdep_no_validate__, 0, i) + static inline void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, unsigned long ip) { @@ -357,7 +360,8 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) -# define lock_set_class(l, n, k, s, i) do { } while (0) +# define lock_set_class(l, n, key, s, i) do { (void)(key); } while (0) +# define lock_set_novalidate_class(l, n, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ -- cgit v1.2.3-71-gd317 From fd3abd2cafa46955846d731b9a6ded2c19ab73d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:45 -0700 Subject: device-core: Kill the lockdep_mutex Per Peter [1], the lockdep API has native support for all the use cases lockdep_mutex was attempting to enable. Now that all lockdep_mutex users have been converted to those APIs, drop this lock. Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [1] Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Suggested-by: Peter Zijlstra Reviewed-by: Ira Weiny Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/165055522548.3745911.14298368286915484086.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/base/core.c | 3 --- include/linux/device.h | 5 ----- 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 3d6430eb0c6a..2eede2ec3d64 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2864,9 +2864,6 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); -#ifdef CONFIG_PROVE_LOCKING - mutex_init(&dev->lockdep_mutex); -#endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/include/linux/device.h b/include/linux/device.h index 833b0b3b0193..073f1b0126ac 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -400,8 +400,6 @@ struct dev_msi_info { * This identifies the device type and carries type-specific * information. * @mutex: Mutex to synchronize calls to its driver. - * @lockdep_mutex: An optional debug lock that a subsystem can use as a - * peer lock to gain localized lockdep coverage of the device_lock. * @bus: Type of bus device is on. * @driver: Which driver has allocated this * @platform_data: Platform data specific to the device. @@ -499,9 +497,6 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ -#ifdef CONFIG_PROVE_LOCKING - struct mutex lockdep_mutex; -#endif struct mutex mutex; /* mutex to synchronize calls to * its driver. */ -- cgit v1.2.3-71-gd317