cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

adf_aer.c (6227B)


      1// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
      2/* Copyright(c) 2014 - 2020 Intel Corporation */
      3#include <linux/kernel.h>
      4#include <linux/pci.h>
      5#include <linux/aer.h>
      6#include <linux/completion.h>
      7#include <linux/workqueue.h>
      8#include <linux/delay.h>
      9#include "adf_accel_devices.h"
     10#include "adf_common_drv.h"
     11
     12static struct workqueue_struct *device_reset_wq;
     13
     14static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
     15					   pci_channel_state_t state)
     16{
     17	struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
     18
     19	dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
     20	if (!accel_dev) {
     21		dev_err(&pdev->dev, "Can't find acceleration device\n");
     22		return PCI_ERS_RESULT_DISCONNECT;
     23	}
     24
     25	if (state == pci_channel_io_perm_failure) {
     26		dev_err(&pdev->dev, "Can't recover from device error\n");
     27		return PCI_ERS_RESULT_DISCONNECT;
     28	}
     29
     30	return PCI_ERS_RESULT_NEED_RESET;
     31}
     32
     33/* reset dev data */
     34struct adf_reset_dev_data {
     35	int mode;
     36	struct adf_accel_dev *accel_dev;
     37	struct completion compl;
     38	struct work_struct reset_work;
     39};
     40
     41void adf_reset_sbr(struct adf_accel_dev *accel_dev)
     42{
     43	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
     44	struct pci_dev *parent = pdev->bus->self;
     45	u16 bridge_ctl = 0;
     46
     47	if (!parent)
     48		parent = pdev;
     49
     50	if (!pci_wait_for_pending_transaction(pdev))
     51		dev_info(&GET_DEV(accel_dev),
     52			 "Transaction still in progress. Proceeding\n");
     53
     54	dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n");
     55
     56	pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
     57	bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
     58	pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
     59	msleep(100);
     60	bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
     61	pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
     62	msleep(100);
     63}
     64EXPORT_SYMBOL_GPL(adf_reset_sbr);
     65
     66void adf_reset_flr(struct adf_accel_dev *accel_dev)
     67{
     68	pcie_flr(accel_to_pci_dev(accel_dev));
     69}
     70EXPORT_SYMBOL_GPL(adf_reset_flr);
     71
     72void adf_dev_restore(struct adf_accel_dev *accel_dev)
     73{
     74	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
     75	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
     76
     77	if (hw_device->reset_device) {
     78		dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
     79			 accel_dev->accel_id);
     80		hw_device->reset_device(accel_dev);
     81		pci_restore_state(pdev);
     82		pci_save_state(pdev);
     83	}
     84}
     85
     86static void adf_device_reset_worker(struct work_struct *work)
     87{
     88	struct adf_reset_dev_data *reset_data =
     89		  container_of(work, struct adf_reset_dev_data, reset_work);
     90	struct adf_accel_dev *accel_dev = reset_data->accel_dev;
     91
     92	adf_dev_restarting_notify(accel_dev);
     93	adf_dev_stop(accel_dev);
     94	adf_dev_shutdown(accel_dev);
     95	if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
     96		/* The device hanged and we can't restart it so stop here */
     97		dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
     98		kfree(reset_data);
     99		WARN(1, "QAT: device restart failed. Device is unusable\n");
    100		return;
    101	}
    102	adf_dev_restarted_notify(accel_dev);
    103	clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
    104
    105	/* The dev is back alive. Notify the caller if in sync mode */
    106	if (reset_data->mode == ADF_DEV_RESET_SYNC)
    107		complete(&reset_data->compl);
    108	else
    109		kfree(reset_data);
    110}
    111
    112static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
    113				      enum adf_dev_reset_mode mode)
    114{
    115	struct adf_reset_dev_data *reset_data;
    116
    117	if (!adf_dev_started(accel_dev) ||
    118	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
    119		return 0;
    120
    121	set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
    122	reset_data = kzalloc(sizeof(*reset_data), GFP_KERNEL);
    123	if (!reset_data)
    124		return -ENOMEM;
    125	reset_data->accel_dev = accel_dev;
    126	init_completion(&reset_data->compl);
    127	reset_data->mode = mode;
    128	INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
    129	queue_work(device_reset_wq, &reset_data->reset_work);
    130
    131	/* If in sync mode wait for the result */
    132	if (mode == ADF_DEV_RESET_SYNC) {
    133		int ret = 0;
    134		/* Maximum device reset time is 10 seconds */
    135		unsigned long wait_jiffies = msecs_to_jiffies(10000);
    136		unsigned long timeout = wait_for_completion_timeout(
    137				   &reset_data->compl, wait_jiffies);
    138		if (!timeout) {
    139			dev_err(&GET_DEV(accel_dev),
    140				"Reset device timeout expired\n");
    141			ret = -EFAULT;
    142		}
    143		kfree(reset_data);
    144		return ret;
    145	}
    146	return 0;
    147}
    148
    149static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
    150{
    151	struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
    152
    153	if (!accel_dev) {
    154		pr_err("QAT: Can't find acceleration device\n");
    155		return PCI_ERS_RESULT_DISCONNECT;
    156	}
    157	if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
    158		return PCI_ERS_RESULT_DISCONNECT;
    159
    160	return PCI_ERS_RESULT_RECOVERED;
    161}
    162
    163static void adf_resume(struct pci_dev *pdev)
    164{
    165	dev_info(&pdev->dev, "Acceleration driver reset completed\n");
    166	dev_info(&pdev->dev, "Device is up and running\n");
    167}
    168
    169const struct pci_error_handlers adf_err_handler = {
    170	.error_detected = adf_error_detected,
    171	.slot_reset = adf_slot_reset,
    172	.resume = adf_resume,
    173};
    174EXPORT_SYMBOL_GPL(adf_err_handler);
    175
    176/**
    177 * adf_enable_aer() - Enable Advance Error Reporting for acceleration device
    178 * @accel_dev:  Pointer to acceleration device.
    179 *
    180 * Function enables PCI Advance Error Reporting for the
    181 * QAT acceleration device accel_dev.
    182 * To be used by QAT device specific drivers.
    183 */
    184void adf_enable_aer(struct adf_accel_dev *accel_dev)
    185{
    186	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
    187
    188	pci_enable_pcie_error_reporting(pdev);
    189}
    190EXPORT_SYMBOL_GPL(adf_enable_aer);
    191
    192/**
    193 * adf_disable_aer() - Disable Advance Error Reporting for acceleration device
    194 * @accel_dev:  Pointer to acceleration device.
    195 *
    196 * Function disables PCI Advance Error Reporting for the
    197 * QAT acceleration device accel_dev.
    198 * To be used by QAT device specific drivers.
    199 *
    200 * Return: void
    201 */
    202void adf_disable_aer(struct adf_accel_dev *accel_dev)
    203{
    204	struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
    205
    206	pci_disable_pcie_error_reporting(pdev);
    207}
    208EXPORT_SYMBOL_GPL(adf_disable_aer);
    209
    210int adf_init_aer(void)
    211{
    212	device_reset_wq = alloc_workqueue("qat_device_reset_wq",
    213					  WQ_MEM_RECLAIM, 0);
    214	return !device_reset_wq ? -EFAULT : 0;
    215}
    216
    217void adf_exit_aer(void)
    218{
    219	if (device_reset_wq)
    220		destroy_workqueue(device_reset_wq);
    221	device_reset_wq = NULL;
    222}