cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vfio_ap_ops.c (41234B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * Adjunct processor matrix VFIO device driver callbacks.
      4 *
      5 * Copyright IBM Corp. 2018
      6 *
      7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
      8 *	      Halil Pasic <pasic@linux.ibm.com>
      9 *	      Pierre Morel <pmorel@linux.ibm.com>
     10 */
     11#include <linux/string.h>
     12#include <linux/vfio.h>
     13#include <linux/device.h>
     14#include <linux/list.h>
     15#include <linux/ctype.h>
     16#include <linux/bitops.h>
     17#include <linux/kvm_host.h>
     18#include <linux/module.h>
     19#include <linux/uuid.h>
     20#include <asm/kvm.h>
     21#include <asm/zcrypt.h>
     22
     23#include "vfio_ap_private.h"
     24#include "vfio_ap_debug.h"
     25
     26#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
     27#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
     28
     29static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
     30static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
     31static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
     32
     33static int match_apqn(struct device *dev, const void *data)
     34{
     35	struct vfio_ap_queue *q = dev_get_drvdata(dev);
     36
     37	return (q->apqn == *(int *)(data)) ? 1 : 0;
     38}
     39
     40/**
     41 * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
     42 * @matrix_mdev: the associated mediated matrix
     43 * @apqn: The queue APQN
     44 *
     45 * Retrieve a queue with a specific APQN from the list of the
     46 * devices of the vfio_ap_drv.
     47 * Verify that the APID and the APQI are set in the matrix.
     48 *
     49 * Return: the pointer to the associated vfio_ap_queue
     50 */
     51static struct vfio_ap_queue *vfio_ap_get_queue(
     52					struct ap_matrix_mdev *matrix_mdev,
     53					int apqn)
     54{
     55	struct vfio_ap_queue *q;
     56
     57	if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
     58		return NULL;
     59	if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
     60		return NULL;
     61
     62	q = vfio_ap_find_queue(apqn);
     63	if (q)
     64		q->matrix_mdev = matrix_mdev;
     65
     66	return q;
     67}
     68
     69/**
     70 * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
     71 * @apqn: The AP Queue number
     72 *
     73 * Checks the IRQ bit for the status of this APQN using ap_tapq.
     74 * Returns if the ap_tapq function succeeded and the bit is clear.
     75 * Returns if ap_tapq function failed with invalid, deconfigured or
     76 * checkstopped AP.
     77 * Otherwise retries up to 5 times after waiting 20ms.
     78 */
     79static void vfio_ap_wait_for_irqclear(int apqn)
     80{
     81	struct ap_queue_status status;
     82	int retry = 5;
     83
     84	do {
     85		status = ap_tapq(apqn, NULL);
     86		switch (status.response_code) {
     87		case AP_RESPONSE_NORMAL:
     88		case AP_RESPONSE_RESET_IN_PROGRESS:
     89			if (!status.irq_enabled)
     90				return;
     91			fallthrough;
     92		case AP_RESPONSE_BUSY:
     93			msleep(20);
     94			break;
     95		case AP_RESPONSE_Q_NOT_AVAIL:
     96		case AP_RESPONSE_DECONFIGURED:
     97		case AP_RESPONSE_CHECKSTOPPED:
     98		default:
     99			WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
    100				  status.response_code, apqn);
    101			return;
    102		}
    103	} while (--retry);
    104
    105	WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
    106		  __func__, status.response_code, apqn);
    107}
    108
    109/**
    110 * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
    111 * @q: The vfio_ap_queue
    112 *
    113 * Unregisters the ISC in the GIB when the saved ISC not invalid.
    114 * Unpins the guest's page holding the NIB when it exists.
    115 * Resets the saved_pfn and saved_isc to invalid values.
    116 */
    117static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
    118{
    119	if (!q)
    120		return;
    121	if (q->saved_isc != VFIO_AP_ISC_INVALID &&
    122	    !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
    123		kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
    124		q->saved_isc = VFIO_AP_ISC_INVALID;
    125	}
    126	if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
    127		vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1);
    128		q->saved_pfn = 0;
    129	}
    130}
    131
    132/**
    133 * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
    134 * @q: The vfio_ap_queue
    135 *
    136 * Uses ap_aqic to disable the interruption and in case of success, reset
    137 * in progress or IRQ disable command already proceeded: calls
    138 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
    139 * and calls vfio_ap_free_aqic_resources() to free the resources associated
    140 * with the AP interrupt handling.
    141 *
    142 * In the case the AP is busy, or a reset is in progress,
    143 * retries after 20ms, up to 5 times.
    144 *
    145 * Returns if ap_aqic function failed with invalid, deconfigured or
    146 * checkstopped AP.
    147 *
    148 * Return: &struct ap_queue_status
    149 */
    150static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
    151{
    152	struct ap_qirq_ctrl aqic_gisa = {};
    153	struct ap_queue_status status;
    154	int retries = 5;
    155
    156	do {
    157		status = ap_aqic(q->apqn, aqic_gisa, NULL);
    158		switch (status.response_code) {
    159		case AP_RESPONSE_OTHERWISE_CHANGED:
    160		case AP_RESPONSE_NORMAL:
    161			vfio_ap_wait_for_irqclear(q->apqn);
    162			goto end_free;
    163		case AP_RESPONSE_RESET_IN_PROGRESS:
    164		case AP_RESPONSE_BUSY:
    165			msleep(20);
    166			break;
    167		case AP_RESPONSE_Q_NOT_AVAIL:
    168		case AP_RESPONSE_DECONFIGURED:
    169		case AP_RESPONSE_CHECKSTOPPED:
    170		case AP_RESPONSE_INVALID_ADDRESS:
    171		default:
    172			/* All cases in default means AP not operational */
    173			WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
    174				  status.response_code);
    175			goto end_free;
    176		}
    177	} while (retries--);
    178
    179	WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
    180		  status.response_code);
    181end_free:
    182	vfio_ap_free_aqic_resources(q);
    183	q->matrix_mdev = NULL;
    184	return status;
    185}
    186
    187/**
    188 * vfio_ap_validate_nib - validate a notification indicator byte (nib) address.
    189 *
    190 * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction.
    191 * @nib: the location for storing the nib address.
    192 * @g_pfn: the location for storing the page frame number of the page containing
    193 *	   the nib.
    194 *
    195 * When the PQAP(AQIC) instruction is executed, general register 2 contains the
    196 * address of the notification indicator byte (nib) used for IRQ notification.
    197 * This function parses the nib from gr2 and calculates the page frame
    198 * number for the guest of the page containing the nib. The values are
    199 * stored in @nib and @g_pfn respectively.
    200 *
    201 * The g_pfn of the nib is then validated to ensure the nib address is valid.
    202 *
    203 * Return: returns zero if the nib address is a valid; otherwise, returns
    204 *	   -EINVAL.
    205 */
    206static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib,
    207				unsigned long *g_pfn)
    208{
    209	*nib = vcpu->run->s.regs.gprs[2];
    210	*g_pfn = *nib >> PAGE_SHIFT;
    211
    212	if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn)))
    213		return -EINVAL;
    214
    215	return 0;
    216}
    217
    218/**
    219 * vfio_ap_irq_enable - Enable Interruption for a APQN
    220 *
    221 * @q:	 the vfio_ap_queue holding AQIC parameters
    222 * @isc: the guest ISC to register with the GIB interface
    223 * @vcpu: the vcpu object containing the registers specifying the parameters
    224 *	  passed to the PQAP(AQIC) instruction.
    225 *
    226 * Pin the NIB saved in *q
    227 * Register the guest ISC to GIB interface and retrieve the
    228 * host ISC to issue the host side PQAP/AQIC
    229 *
    230 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
    231 * vfio_pin_pages failed.
    232 *
    233 * Otherwise return the ap_queue_status returned by the ap_aqic(),
    234 * all retry handling will be done by the guest.
    235 *
    236 * Return: &struct ap_queue_status
    237 */
    238static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
    239						 int isc,
    240						 struct kvm_vcpu *vcpu)
    241{
    242	unsigned long nib;
    243	struct ap_qirq_ctrl aqic_gisa = {};
    244	struct ap_queue_status status = {};
    245	struct kvm_s390_gisa *gisa;
    246	int nisc;
    247	struct kvm *kvm;
    248	unsigned long h_nib, g_pfn, h_pfn;
    249	int ret;
    250
    251	/* Verify that the notification indicator byte address is valid */
    252	if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) {
    253		VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
    254				 __func__, nib, g_pfn, q->apqn);
    255
    256		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
    257		return status;
    258	}
    259
    260	ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1,
    261			     IOMMU_READ | IOMMU_WRITE, &h_pfn);
    262	switch (ret) {
    263	case 1:
    264		break;
    265	default:
    266		VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d,"
    267				 "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
    268				 __func__, ret, nib, g_pfn, q->apqn);
    269
    270		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
    271		return status;
    272	}
    273
    274	kvm = q->matrix_mdev->kvm;
    275	gisa = kvm->arch.gisa_int.origin;
    276
    277	h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
    278	aqic_gisa.gisc = isc;
    279
    280	nisc = kvm_s390_gisc_register(kvm, isc);
    281	if (nisc < 0) {
    282		VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
    283				 __func__, nisc, isc, q->apqn);
    284
    285		status.response_code = AP_RESPONSE_INVALID_GISA;
    286		return status;
    287	}
    288
    289	aqic_gisa.isc = nisc;
    290	aqic_gisa.ir = 1;
    291	aqic_gisa.gisa = (uint64_t)gisa >> 4;
    292
    293	status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
    294	switch (status.response_code) {
    295	case AP_RESPONSE_NORMAL:
    296		/* See if we did clear older IRQ configuration */
    297		vfio_ap_free_aqic_resources(q);
    298		q->saved_pfn = g_pfn;
    299		q->saved_isc = isc;
    300		break;
    301	case AP_RESPONSE_OTHERWISE_CHANGED:
    302		/* We could not modify IRQ setings: clear new configuration */
    303		vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1);
    304		kvm_s390_gisc_unregister(kvm, isc);
    305		break;
    306	default:
    307		pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
    308			status.response_code);
    309		vfio_ap_irq_disable(q);
    310		break;
    311	}
    312
    313	if (status.response_code != AP_RESPONSE_NORMAL) {
    314		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: "
    315				 "zone=%#x, ir=%#x, gisc=%#x, f=%#x,"
    316				 "gisa=%#x, isc=%#x, apqn=%#04x\n",
    317				 __func__, status.response_code,
    318				 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc,
    319				 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc,
    320				 q->apqn);
    321	}
    322
    323	return status;
    324}
    325
    326/**
    327 * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array
    328 *				of big endian elements that can be passed by
    329 *				value to an s390dbf sprintf event function to
    330 *				format a UUID string.
    331 *
    332 * @guid: the object containing the little endian guid
    333 * @uuid: a six-element array of long values that can be passed by value as
    334 *	  arguments for a formatting string specifying a UUID.
    335 *
    336 * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf
    337 * event functions if the memory for the passed string is available as long as
    338 * the debug feature exists. Since a mediated device can be removed at any
    339 * time, it's name can not be used because %s passes the reference to the string
    340 * in memory and the reference will go stale once the device is removed .
    341 *
    342 * The s390dbf string formatting function allows a maximum of 9 arguments for a
    343 * message to be displayed in the 'sprintf' view. In order to use the bytes
    344 * comprising the mediated device's UUID to display the mediated device name,
    345 * they will have to be converted into an array whose elements can be passed by
    346 * value to sprintf. For example:
    347 *
    348 * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 }
    349 * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804
    350 * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 }
    351 * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx"
    352 */
    353static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid)
    354{
    355	/*
    356	 * The input guid is ordered in little endian, so it needs to be
    357	 * reordered for displaying a UUID as a string. This specifies the
    358	 * guid indices in proper order.
    359	 */
    360	uuid[0] = le32_to_cpup((__le32 *)guid);
    361	uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]);
    362	uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]);
    363	uuid[3] = *((__u16 *)&guid->b[8]);
    364	uuid[4] = *((__u16 *)&guid->b[10]);
    365	uuid[5] = *((__u32 *)&guid->b[12]);
    366}
    367
    368/**
    369 * handle_pqap - PQAP instruction callback
    370 *
    371 * @vcpu: The vcpu on which we received the PQAP instruction
    372 *
    373 * Get the general register contents to initialize internal variables.
    374 * REG[0]: APQN
    375 * REG[1]: IR and ISC
    376 * REG[2]: NIB
    377 *
    378 * Response.status may be set to following Response Code:
    379 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
    380 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
    381 * - AP_RESPONSE_NORMAL (0) : in case of successs
    382 *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
    383 * We take the matrix_dev lock to ensure serialization on queues and
    384 * mediated device access.
    385 *
    386 * Return: 0 if we could handle the request inside KVM.
    387 * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
    388 */
    389static int handle_pqap(struct kvm_vcpu *vcpu)
    390{
    391	uint64_t status;
    392	uint16_t apqn;
    393	unsigned long uuid[6];
    394	struct vfio_ap_queue *q;
    395	struct ap_queue_status qstatus = {
    396			       .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
    397	struct ap_matrix_mdev *matrix_mdev;
    398
    399	apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
    400
    401	/* If we do not use the AIV facility just go to userland */
    402	if (!(vcpu->arch.sie_block->eca & ECA_AIV)) {
    403		VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n",
    404				 __func__, apqn, vcpu->arch.sie_block->eca);
    405
    406		return -EOPNOTSUPP;
    407	}
    408
    409	mutex_lock(&matrix_dev->lock);
    410	if (!vcpu->kvm->arch.crypto.pqap_hook) {
    411		VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n",
    412				 __func__, apqn);
    413		goto out_unlock;
    414	}
    415
    416	matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
    417				   struct ap_matrix_mdev, pqap_hook);
    418
    419	/* If the there is no guest using the mdev, there is nothing to do */
    420	if (!matrix_mdev->kvm) {
    421		vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid);
    422		VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n",
    423				 __func__, uuid[0],  uuid[1], uuid[2],
    424				 uuid[3], uuid[4], uuid[5], apqn);
    425		goto out_unlock;
    426	}
    427
    428	q = vfio_ap_get_queue(matrix_mdev, apqn);
    429	if (!q) {
    430		VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n",
    431				 __func__, AP_QID_CARD(apqn),
    432				 AP_QID_QUEUE(apqn));
    433		goto out_unlock;
    434	}
    435
    436	status = vcpu->run->s.regs.gprs[1];
    437
    438	/* If IR bit(16) is set we enable the interrupt */
    439	if ((status >> (63 - 16)) & 0x01)
    440		qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu);
    441	else
    442		qstatus = vfio_ap_irq_disable(q);
    443
    444out_unlock:
    445	memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
    446	vcpu->run->s.regs.gprs[1] >>= 32;
    447	mutex_unlock(&matrix_dev->lock);
    448	return 0;
    449}
    450
    451static void vfio_ap_matrix_init(struct ap_config_info *info,
    452				struct ap_matrix *matrix)
    453{
    454	matrix->apm_max = info->apxa ? info->Na : 63;
    455	matrix->aqm_max = info->apxa ? info->Nd : 15;
    456	matrix->adm_max = info->apxa ? info->Nd : 15;
    457}
    458
    459static int vfio_ap_mdev_probe(struct mdev_device *mdev)
    460{
    461	struct ap_matrix_mdev *matrix_mdev;
    462	int ret;
    463
    464	if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
    465		return -EPERM;
    466
    467	matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
    468	if (!matrix_mdev) {
    469		ret = -ENOMEM;
    470		goto err_dec_available;
    471	}
    472	vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
    473			    &vfio_ap_matrix_dev_ops);
    474
    475	matrix_mdev->mdev = mdev;
    476	vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
    477	matrix_mdev->pqap_hook = handle_pqap;
    478	mutex_lock(&matrix_dev->lock);
    479	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
    480	mutex_unlock(&matrix_dev->lock);
    481
    482	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
    483	if (ret)
    484		goto err_list;
    485	dev_set_drvdata(&mdev->dev, matrix_mdev);
    486	return 0;
    487
    488err_list:
    489	mutex_lock(&matrix_dev->lock);
    490	list_del(&matrix_mdev->node);
    491	mutex_unlock(&matrix_dev->lock);
    492	vfio_uninit_group_dev(&matrix_mdev->vdev);
    493	kfree(matrix_mdev);
    494err_dec_available:
    495	atomic_inc(&matrix_dev->available_instances);
    496	return ret;
    497}
    498
    499static void vfio_ap_mdev_remove(struct mdev_device *mdev)
    500{
    501	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
    502
    503	vfio_unregister_group_dev(&matrix_mdev->vdev);
    504
    505	mutex_lock(&matrix_dev->lock);
    506	vfio_ap_mdev_reset_queues(matrix_mdev);
    507	list_del(&matrix_mdev->node);
    508	mutex_unlock(&matrix_dev->lock);
    509	vfio_uninit_group_dev(&matrix_mdev->vdev);
    510	kfree(matrix_mdev);
    511	atomic_inc(&matrix_dev->available_instances);
    512}
    513
    514static ssize_t name_show(struct mdev_type *mtype,
    515			 struct mdev_type_attribute *attr, char *buf)
    516{
    517	return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
    518}
    519
    520static MDEV_TYPE_ATTR_RO(name);
    521
    522static ssize_t available_instances_show(struct mdev_type *mtype,
    523					struct mdev_type_attribute *attr,
    524					char *buf)
    525{
    526	return sprintf(buf, "%d\n",
    527		       atomic_read(&matrix_dev->available_instances));
    528}
    529
    530static MDEV_TYPE_ATTR_RO(available_instances);
    531
    532static ssize_t device_api_show(struct mdev_type *mtype,
    533			       struct mdev_type_attribute *attr, char *buf)
    534{
    535	return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
    536}
    537
    538static MDEV_TYPE_ATTR_RO(device_api);
    539
    540static struct attribute *vfio_ap_mdev_type_attrs[] = {
    541	&mdev_type_attr_name.attr,
    542	&mdev_type_attr_device_api.attr,
    543	&mdev_type_attr_available_instances.attr,
    544	NULL,
    545};
    546
    547static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
    548	.name = VFIO_AP_MDEV_TYPE_HWVIRT,
    549	.attrs = vfio_ap_mdev_type_attrs,
    550};
    551
    552static struct attribute_group *vfio_ap_mdev_type_groups[] = {
    553	&vfio_ap_mdev_hwvirt_type_group,
    554	NULL,
    555};
    556
    557struct vfio_ap_queue_reserved {
    558	unsigned long *apid;
    559	unsigned long *apqi;
    560	bool reserved;
    561};
    562
    563/**
    564 * vfio_ap_has_queue - determines if the AP queue containing the target in @data
    565 *
    566 * @dev: an AP queue device
    567 * @data: a struct vfio_ap_queue_reserved reference
    568 *
    569 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
    570 * apid or apqi specified in @data:
    571 *
    572 * - If @data contains both an apid and apqi value, then @data will be flagged
    573 *   as reserved if the APID and APQI fields for the AP queue device matches
    574 *
    575 * - If @data contains only an apid value, @data will be flagged as
    576 *   reserved if the APID field in the AP queue device matches
    577 *
    578 * - If @data contains only an apqi value, @data will be flagged as
    579 *   reserved if the APQI field in the AP queue device matches
    580 *
    581 * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
    582 * @data does not contain either an apid or apqi.
    583 */
    584static int vfio_ap_has_queue(struct device *dev, void *data)
    585{
    586	struct vfio_ap_queue_reserved *qres = data;
    587	struct ap_queue *ap_queue = to_ap_queue(dev);
    588	ap_qid_t qid;
    589	unsigned long id;
    590
    591	if (qres->apid && qres->apqi) {
    592		qid = AP_MKQID(*qres->apid, *qres->apqi);
    593		if (qid == ap_queue->qid)
    594			qres->reserved = true;
    595	} else if (qres->apid && !qres->apqi) {
    596		id = AP_QID_CARD(ap_queue->qid);
    597		if (id == *qres->apid)
    598			qres->reserved = true;
    599	} else if (!qres->apid && qres->apqi) {
    600		id = AP_QID_QUEUE(ap_queue->qid);
    601		if (id == *qres->apqi)
    602			qres->reserved = true;
    603	} else {
    604		return -EINVAL;
    605	}
    606
    607	return 0;
    608}
    609
    610/**
    611 * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
    612 * @apid or @aqpi is reserved
    613 *
    614 * @apid: an AP adapter ID
    615 * @apqi: an AP queue index
    616 *
    617 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
    618 * driver according to the following rules:
    619 *
    620 * - If both @apid and @apqi are not NULL, then there must be an AP queue
    621 *   device bound to the vfio_ap driver with the APQN identified by @apid and
    622 *   @apqi
    623 *
    624 * - If only @apid is not NULL, then there must be an AP queue device bound
    625 *   to the vfio_ap driver with an APQN containing @apid
    626 *
    627 * - If only @apqi is not NULL, then there must be an AP queue device bound
    628 *   to the vfio_ap driver with an APQN containing @apqi
    629 *
    630 * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
    631 */
    632static int vfio_ap_verify_queue_reserved(unsigned long *apid,
    633					 unsigned long *apqi)
    634{
    635	int ret;
    636	struct vfio_ap_queue_reserved qres;
    637
    638	qres.apid = apid;
    639	qres.apqi = apqi;
    640	qres.reserved = false;
    641
    642	ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
    643				     &qres, vfio_ap_has_queue);
    644	if (ret)
    645		return ret;
    646
    647	if (qres.reserved)
    648		return 0;
    649
    650	return -EADDRNOTAVAIL;
    651}
    652
    653static int
    654vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
    655					     unsigned long apid)
    656{
    657	int ret;
    658	unsigned long apqi;
    659	unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
    660
    661	if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
    662		return vfio_ap_verify_queue_reserved(&apid, NULL);
    663
    664	for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
    665		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
    666		if (ret)
    667			return ret;
    668	}
    669
    670	return 0;
    671}
    672
    673/**
    674 * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
    675 *
    676 * @matrix_mdev: the mediated matrix device
    677 *
    678 * Verifies that the APQNs derived from the cross product of the AP adapter IDs
    679 * and AP queue indexes comprising the AP matrix are not configured for another
    680 * mediated device. AP queue sharing is not allowed.
    681 *
    682 * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
    683 */
    684static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
    685{
    686	struct ap_matrix_mdev *lstdev;
    687	DECLARE_BITMAP(apm, AP_DEVICES);
    688	DECLARE_BITMAP(aqm, AP_DOMAINS);
    689
    690	list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
    691		if (matrix_mdev == lstdev)
    692			continue;
    693
    694		memset(apm, 0, sizeof(apm));
    695		memset(aqm, 0, sizeof(aqm));
    696
    697		/*
    698		 * We work on full longs, as we can only exclude the leftover
    699		 * bits in non-inverse order. The leftover is all zeros.
    700		 */
    701		if (!bitmap_and(apm, matrix_mdev->matrix.apm,
    702				lstdev->matrix.apm, AP_DEVICES))
    703			continue;
    704
    705		if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
    706				lstdev->matrix.aqm, AP_DOMAINS))
    707			continue;
    708
    709		return -EADDRINUSE;
    710	}
    711
    712	return 0;
    713}
    714
    715/**
    716 * assign_adapter_store - parses the APID from @buf and sets the
    717 * corresponding bit in the mediated matrix device's APM
    718 *
    719 * @dev:	the matrix device
    720 * @attr:	the mediated matrix device's assign_adapter attribute
    721 * @buf:	a buffer containing the AP adapter number (APID) to
    722 *		be assigned
    723 * @count:	the number of bytes in @buf
    724 *
    725 * Return: the number of bytes processed if the APID is valid; otherwise,
    726 * returns one of the following errors:
    727 *
    728 *	1. -EINVAL
    729 *	   The APID is not a valid number
    730 *
    731 *	2. -ENODEV
    732 *	   The APID exceeds the maximum value configured for the system
    733 *
    734 *	3. -EADDRNOTAVAIL
    735 *	   An APQN derived from the cross product of the APID being assigned
    736 *	   and the APQIs previously assigned is not bound to the vfio_ap device
    737 *	   driver; or, if no APQIs have yet been assigned, the APID is not
    738 *	   contained in an APQN bound to the vfio_ap device driver.
    739 *
    740 *	4. -EADDRINUSE
    741 *	   An APQN derived from the cross product of the APID being assigned
    742 *	   and the APQIs previously assigned is being used by another mediated
    743 *	   matrix device
    744 */
    745static ssize_t assign_adapter_store(struct device *dev,
    746				    struct device_attribute *attr,
    747				    const char *buf, size_t count)
    748{
    749	int ret;
    750	unsigned long apid;
    751	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
    752
    753	mutex_lock(&matrix_dev->lock);
    754
    755	/* If the KVM guest is running, disallow assignment of adapter */
    756	if (matrix_mdev->kvm) {
    757		ret = -EBUSY;
    758		goto done;
    759	}
    760
    761	ret = kstrtoul(buf, 0, &apid);
    762	if (ret)
    763		goto done;
    764
    765	if (apid > matrix_mdev->matrix.apm_max) {
    766		ret = -ENODEV;
    767		goto done;
    768	}
    769
    770	/*
    771	 * Set the bit in the AP mask (APM) corresponding to the AP adapter
    772	 * number (APID). The bits in the mask, from most significant to least
    773	 * significant bit, correspond to APIDs 0-255.
    774	 */
    775	ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
    776	if (ret)
    777		goto done;
    778
    779	set_bit_inv(apid, matrix_mdev->matrix.apm);
    780
    781	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
    782	if (ret)
    783		goto share_err;
    784
    785	ret = count;
    786	goto done;
    787
    788share_err:
    789	clear_bit_inv(apid, matrix_mdev->matrix.apm);
    790done:
    791	mutex_unlock(&matrix_dev->lock);
    792
    793	return ret;
    794}
    795static DEVICE_ATTR_WO(assign_adapter);
    796
    797/**
    798 * unassign_adapter_store - parses the APID from @buf and clears the
    799 * corresponding bit in the mediated matrix device's APM
    800 *
    801 * @dev:	the matrix device
    802 * @attr:	the mediated matrix device's unassign_adapter attribute
    803 * @buf:	a buffer containing the adapter number (APID) to be unassigned
    804 * @count:	the number of bytes in @buf
    805 *
    806 * Return: the number of bytes processed if the APID is valid; otherwise,
    807 * returns one of the following errors:
    808 *	-EINVAL if the APID is not a number
    809 *	-ENODEV if the APID it exceeds the maximum value configured for the
    810 *		system
    811 */
    812static ssize_t unassign_adapter_store(struct device *dev,
    813				      struct device_attribute *attr,
    814				      const char *buf, size_t count)
    815{
    816	int ret;
    817	unsigned long apid;
    818	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
    819
    820	mutex_lock(&matrix_dev->lock);
    821
    822	/* If the KVM guest is running, disallow unassignment of adapter */
    823	if (matrix_mdev->kvm) {
    824		ret = -EBUSY;
    825		goto done;
    826	}
    827
    828	ret = kstrtoul(buf, 0, &apid);
    829	if (ret)
    830		goto done;
    831
    832	if (apid > matrix_mdev->matrix.apm_max) {
    833		ret = -ENODEV;
    834		goto done;
    835	}
    836
    837	clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
    838	ret = count;
    839done:
    840	mutex_unlock(&matrix_dev->lock);
    841	return ret;
    842}
    843static DEVICE_ATTR_WO(unassign_adapter);
    844
    845static int
    846vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
    847					     unsigned long apqi)
    848{
    849	int ret;
    850	unsigned long apid;
    851	unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
    852
    853	if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
    854		return vfio_ap_verify_queue_reserved(NULL, &apqi);
    855
    856	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
    857		ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
    858		if (ret)
    859			return ret;
    860	}
    861
    862	return 0;
    863}
    864
    865/**
    866 * assign_domain_store - parses the APQI from @buf and sets the
    867 * corresponding bit in the mediated matrix device's AQM
    868 *
    869 * @dev:	the matrix device
    870 * @attr:	the mediated matrix device's assign_domain attribute
    871 * @buf:	a buffer containing the AP queue index (APQI) of the domain to
    872 *		be assigned
    873 * @count:	the number of bytes in @buf
    874 *
    875 * Return: the number of bytes processed if the APQI is valid; otherwise returns
    876 * one of the following errors:
    877 *
    878 *	1. -EINVAL
    879 *	   The APQI is not a valid number
    880 *
    881 *	2. -ENODEV
    882 *	   The APQI exceeds the maximum value configured for the system
    883 *
    884 *	3. -EADDRNOTAVAIL
    885 *	   An APQN derived from the cross product of the APQI being assigned
    886 *	   and the APIDs previously assigned is not bound to the vfio_ap device
    887 *	   driver; or, if no APIDs have yet been assigned, the APQI is not
    888 *	   contained in an APQN bound to the vfio_ap device driver.
    889 *
    890 *	4. -EADDRINUSE
    891 *	   An APQN derived from the cross product of the APQI being assigned
    892 *	   and the APIDs previously assigned is being used by another mediated
    893 *	   matrix device
    894 */
    895static ssize_t assign_domain_store(struct device *dev,
    896				   struct device_attribute *attr,
    897				   const char *buf, size_t count)
    898{
    899	int ret;
    900	unsigned long apqi;
    901	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
    902	unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
    903
    904	mutex_lock(&matrix_dev->lock);
    905
    906	/* If the KVM guest is running, disallow assignment of domain */
    907	if (matrix_mdev->kvm) {
    908		ret = -EBUSY;
    909		goto done;
    910	}
    911
    912	ret = kstrtoul(buf, 0, &apqi);
    913	if (ret)
    914		goto done;
    915	if (apqi > max_apqi) {
    916		ret = -ENODEV;
    917		goto done;
    918	}
    919
    920	ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
    921	if (ret)
    922		goto done;
    923
    924	set_bit_inv(apqi, matrix_mdev->matrix.aqm);
    925
    926	ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
    927	if (ret)
    928		goto share_err;
    929
    930	ret = count;
    931	goto done;
    932
    933share_err:
    934	clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
    935done:
    936	mutex_unlock(&matrix_dev->lock);
    937
    938	return ret;
    939}
    940static DEVICE_ATTR_WO(assign_domain);
    941
    942
    943/**
    944 * unassign_domain_store - parses the APQI from @buf and clears the
    945 * corresponding bit in the mediated matrix device's AQM
    946 *
    947 * @dev:	the matrix device
    948 * @attr:	the mediated matrix device's unassign_domain attribute
    949 * @buf:	a buffer containing the AP queue index (APQI) of the domain to
    950 *		be unassigned
    951 * @count:	the number of bytes in @buf
    952 *
    953 * Return: the number of bytes processed if the APQI is valid; otherwise,
    954 * returns one of the following errors:
    955 *	-EINVAL if the APQI is not a number
    956 *	-ENODEV if the APQI exceeds the maximum value configured for the system
    957 */
    958static ssize_t unassign_domain_store(struct device *dev,
    959				     struct device_attribute *attr,
    960				     const char *buf, size_t count)
    961{
    962	int ret;
    963	unsigned long apqi;
    964	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
    965
    966	mutex_lock(&matrix_dev->lock);
    967
    968	/* If the KVM guest is running, disallow unassignment of domain */
    969	if (matrix_mdev->kvm) {
    970		ret = -EBUSY;
    971		goto done;
    972	}
    973
    974	ret = kstrtoul(buf, 0, &apqi);
    975	if (ret)
    976		goto done;
    977
    978	if (apqi > matrix_mdev->matrix.aqm_max) {
    979		ret = -ENODEV;
    980		goto done;
    981	}
    982
    983	clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
    984	ret = count;
    985
    986done:
    987	mutex_unlock(&matrix_dev->lock);
    988	return ret;
    989}
    990static DEVICE_ATTR_WO(unassign_domain);
    991
    992/**
    993 * assign_control_domain_store - parses the domain ID from @buf and sets
    994 * the corresponding bit in the mediated matrix device's ADM
    995 *
    996 * @dev:	the matrix device
    997 * @attr:	the mediated matrix device's assign_control_domain attribute
    998 * @buf:	a buffer containing the domain ID to be assigned
    999 * @count:	the number of bytes in @buf
   1000 *
   1001 * Return: the number of bytes processed if the domain ID is valid; otherwise,
   1002 * returns one of the following errors:
   1003 *	-EINVAL if the ID is not a number
   1004 *	-ENODEV if the ID exceeds the maximum value configured for the system
   1005 */
   1006static ssize_t assign_control_domain_store(struct device *dev,
   1007					   struct device_attribute *attr,
   1008					   const char *buf, size_t count)
   1009{
   1010	int ret;
   1011	unsigned long id;
   1012	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
   1013
   1014	mutex_lock(&matrix_dev->lock);
   1015
   1016	/* If the KVM guest is running, disallow assignment of control domain */
   1017	if (matrix_mdev->kvm) {
   1018		ret = -EBUSY;
   1019		goto done;
   1020	}
   1021
   1022	ret = kstrtoul(buf, 0, &id);
   1023	if (ret)
   1024		goto done;
   1025
   1026	if (id > matrix_mdev->matrix.adm_max) {
   1027		ret = -ENODEV;
   1028		goto done;
   1029	}
   1030
   1031	/* Set the bit in the ADM (bitmask) corresponding to the AP control
   1032	 * domain number (id). The bits in the mask, from most significant to
   1033	 * least significant, correspond to IDs 0 up to the one less than the
   1034	 * number of control domains that can be assigned.
   1035	 */
   1036	set_bit_inv(id, matrix_mdev->matrix.adm);
   1037	ret = count;
   1038done:
   1039	mutex_unlock(&matrix_dev->lock);
   1040	return ret;
   1041}
   1042static DEVICE_ATTR_WO(assign_control_domain);
   1043
   1044/**
   1045 * unassign_control_domain_store - parses the domain ID from @buf and
   1046 * clears the corresponding bit in the mediated matrix device's ADM
   1047 *
   1048 * @dev:	the matrix device
   1049 * @attr:	the mediated matrix device's unassign_control_domain attribute
   1050 * @buf:	a buffer containing the domain ID to be unassigned
   1051 * @count:	the number of bytes in @buf
   1052 *
   1053 * Return: the number of bytes processed if the domain ID is valid; otherwise,
   1054 * returns one of the following errors:
   1055 *	-EINVAL if the ID is not a number
   1056 *	-ENODEV if the ID exceeds the maximum value configured for the system
   1057 */
   1058static ssize_t unassign_control_domain_store(struct device *dev,
   1059					     struct device_attribute *attr,
   1060					     const char *buf, size_t count)
   1061{
   1062	int ret;
   1063	unsigned long domid;
   1064	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
   1065	unsigned long max_domid =  matrix_mdev->matrix.adm_max;
   1066
   1067	mutex_lock(&matrix_dev->lock);
   1068
   1069	/* If a KVM guest is running, disallow unassignment of control domain */
   1070	if (matrix_mdev->kvm) {
   1071		ret = -EBUSY;
   1072		goto done;
   1073	}
   1074
   1075	ret = kstrtoul(buf, 0, &domid);
   1076	if (ret)
   1077		goto done;
   1078	if (domid > max_domid) {
   1079		ret = -ENODEV;
   1080		goto done;
   1081	}
   1082
   1083	clear_bit_inv(domid, matrix_mdev->matrix.adm);
   1084	ret = count;
   1085done:
   1086	mutex_unlock(&matrix_dev->lock);
   1087	return ret;
   1088}
   1089static DEVICE_ATTR_WO(unassign_control_domain);
   1090
   1091static ssize_t control_domains_show(struct device *dev,
   1092				    struct device_attribute *dev_attr,
   1093				    char *buf)
   1094{
   1095	unsigned long id;
   1096	int nchars = 0;
   1097	int n;
   1098	char *bufpos = buf;
   1099	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
   1100	unsigned long max_domid = matrix_mdev->matrix.adm_max;
   1101
   1102	mutex_lock(&matrix_dev->lock);
   1103	for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
   1104		n = sprintf(bufpos, "%04lx\n", id);
   1105		bufpos += n;
   1106		nchars += n;
   1107	}
   1108	mutex_unlock(&matrix_dev->lock);
   1109
   1110	return nchars;
   1111}
   1112static DEVICE_ATTR_RO(control_domains);
   1113
   1114static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
   1115			   char *buf)
   1116{
   1117	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
   1118	char *bufpos = buf;
   1119	unsigned long apid;
   1120	unsigned long apqi;
   1121	unsigned long apid1;
   1122	unsigned long apqi1;
   1123	unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
   1124	unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
   1125	int nchars = 0;
   1126	int n;
   1127
   1128	apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
   1129	apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
   1130
   1131	mutex_lock(&matrix_dev->lock);
   1132
   1133	if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
   1134		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
   1135			for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
   1136					     naqm_bits) {
   1137				n = sprintf(bufpos, "%02lx.%04lx\n", apid,
   1138					    apqi);
   1139				bufpos += n;
   1140				nchars += n;
   1141			}
   1142		}
   1143	} else if (apid1 < napm_bits) {
   1144		for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
   1145			n = sprintf(bufpos, "%02lx.\n", apid);
   1146			bufpos += n;
   1147			nchars += n;
   1148		}
   1149	} else if (apqi1 < naqm_bits) {
   1150		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
   1151			n = sprintf(bufpos, ".%04lx\n", apqi);
   1152			bufpos += n;
   1153			nchars += n;
   1154		}
   1155	}
   1156
   1157	mutex_unlock(&matrix_dev->lock);
   1158
   1159	return nchars;
   1160}
   1161static DEVICE_ATTR_RO(matrix);
   1162
   1163static struct attribute *vfio_ap_mdev_attrs[] = {
   1164	&dev_attr_assign_adapter.attr,
   1165	&dev_attr_unassign_adapter.attr,
   1166	&dev_attr_assign_domain.attr,
   1167	&dev_attr_unassign_domain.attr,
   1168	&dev_attr_assign_control_domain.attr,
   1169	&dev_attr_unassign_control_domain.attr,
   1170	&dev_attr_control_domains.attr,
   1171	&dev_attr_matrix.attr,
   1172	NULL,
   1173};
   1174
   1175static struct attribute_group vfio_ap_mdev_attr_group = {
   1176	.attrs = vfio_ap_mdev_attrs
   1177};
   1178
   1179static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
   1180	&vfio_ap_mdev_attr_group,
   1181	NULL
   1182};
   1183
   1184/**
   1185 * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
   1186 * to manage AP resources for the guest whose state is represented by @kvm
   1187 *
   1188 * @matrix_mdev: a mediated matrix device
   1189 * @kvm: reference to KVM instance
   1190 *
   1191 * Return: 0 if no other mediated matrix device has a reference to @kvm;
   1192 * otherwise, returns an -EPERM.
   1193 */
   1194static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
   1195				struct kvm *kvm)
   1196{
   1197	struct ap_matrix_mdev *m;
   1198
   1199	if (kvm->arch.crypto.crycbd) {
   1200		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
   1201		kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
   1202		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
   1203
   1204		mutex_lock(&kvm->lock);
   1205		mutex_lock(&matrix_dev->lock);
   1206
   1207		list_for_each_entry(m, &matrix_dev->mdev_list, node) {
   1208			if (m != matrix_mdev && m->kvm == kvm) {
   1209				mutex_unlock(&kvm->lock);
   1210				mutex_unlock(&matrix_dev->lock);
   1211				return -EPERM;
   1212			}
   1213		}
   1214
   1215		kvm_get_kvm(kvm);
   1216		matrix_mdev->kvm = kvm;
   1217		kvm_arch_crypto_set_masks(kvm,
   1218					  matrix_mdev->matrix.apm,
   1219					  matrix_mdev->matrix.aqm,
   1220					  matrix_mdev->matrix.adm);
   1221
   1222		mutex_unlock(&kvm->lock);
   1223		mutex_unlock(&matrix_dev->lock);
   1224	}
   1225
   1226	return 0;
   1227}
   1228
   1229/**
   1230 * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
   1231 *
   1232 * @nb: The notifier block
   1233 * @action: Action to be taken
   1234 * @data: data associated with the request
   1235 *
   1236 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
   1237 * pinned before). Other requests are ignored.
   1238 *
   1239 * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
   1240 */
   1241static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
   1242				       unsigned long action, void *data)
   1243{
   1244	struct ap_matrix_mdev *matrix_mdev;
   1245
   1246	matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
   1247
   1248	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
   1249		struct vfio_iommu_type1_dma_unmap *unmap = data;
   1250		unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
   1251
   1252		vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
   1253		return NOTIFY_OK;
   1254	}
   1255
   1256	return NOTIFY_DONE;
   1257}
   1258
   1259/**
   1260 * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
   1261 * by @matrix_mdev.
   1262 *
   1263 * @matrix_mdev: a matrix mediated device
   1264 */
   1265static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
   1266{
   1267	struct kvm *kvm = matrix_mdev->kvm;
   1268
   1269	if (kvm && kvm->arch.crypto.crycbd) {
   1270		down_write(&kvm->arch.crypto.pqap_hook_rwsem);
   1271		kvm->arch.crypto.pqap_hook = NULL;
   1272		up_write(&kvm->arch.crypto.pqap_hook_rwsem);
   1273
   1274		mutex_lock(&kvm->lock);
   1275		mutex_lock(&matrix_dev->lock);
   1276
   1277		kvm_arch_crypto_clear_masks(kvm);
   1278		vfio_ap_mdev_reset_queues(matrix_mdev);
   1279		kvm_put_kvm(kvm);
   1280		matrix_mdev->kvm = NULL;
   1281
   1282		mutex_unlock(&kvm->lock);
   1283		mutex_unlock(&matrix_dev->lock);
   1284	}
   1285}
   1286
   1287static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
   1288{
   1289	struct device *dev;
   1290	struct vfio_ap_queue *q = NULL;
   1291
   1292	dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
   1293				 &apqn, match_apqn);
   1294	if (dev) {
   1295		q = dev_get_drvdata(dev);
   1296		put_device(dev);
   1297	}
   1298
   1299	return q;
   1300}
   1301
   1302int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
   1303			     unsigned int retry)
   1304{
   1305	struct ap_queue_status status;
   1306	int ret;
   1307	int retry2 = 2;
   1308
   1309	if (!q)
   1310		return 0;
   1311
   1312retry_zapq:
   1313	status = ap_zapq(q->apqn);
   1314	switch (status.response_code) {
   1315	case AP_RESPONSE_NORMAL:
   1316		ret = 0;
   1317		break;
   1318	case AP_RESPONSE_RESET_IN_PROGRESS:
   1319		if (retry--) {
   1320			msleep(20);
   1321			goto retry_zapq;
   1322		}
   1323		ret = -EBUSY;
   1324		break;
   1325	case AP_RESPONSE_Q_NOT_AVAIL:
   1326	case AP_RESPONSE_DECONFIGURED:
   1327	case AP_RESPONSE_CHECKSTOPPED:
   1328		WARN_ON_ONCE(status.irq_enabled);
   1329		ret = -EBUSY;
   1330		goto free_resources;
   1331	default:
   1332		/* things are really broken, give up */
   1333		WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
   1334		     status.response_code);
   1335		return -EIO;
   1336	}
   1337
   1338	/* wait for the reset to take effect */
   1339	while (retry2--) {
   1340		if (status.queue_empty && !status.irq_enabled)
   1341			break;
   1342		msleep(20);
   1343		status = ap_tapq(q->apqn, NULL);
   1344	}
   1345	WARN_ON_ONCE(retry2 <= 0);
   1346
   1347free_resources:
   1348	vfio_ap_free_aqic_resources(q);
   1349
   1350	return ret;
   1351}
   1352
   1353static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
   1354{
   1355	int ret;
   1356	int rc = 0;
   1357	unsigned long apid, apqi;
   1358	struct vfio_ap_queue *q;
   1359
   1360	for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
   1361			     matrix_mdev->matrix.apm_max + 1) {
   1362		for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
   1363				     matrix_mdev->matrix.aqm_max + 1) {
   1364			q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
   1365			ret = vfio_ap_mdev_reset_queue(q, 1);
   1366			/*
   1367			 * Regardless whether a queue turns out to be busy, or
   1368			 * is not operational, we need to continue resetting
   1369			 * the remaining queues.
   1370			 */
   1371			if (ret)
   1372				rc = ret;
   1373		}
   1374	}
   1375
   1376	return rc;
   1377}
   1378
   1379static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
   1380{
   1381	struct ap_matrix_mdev *matrix_mdev =
   1382		container_of(vdev, struct ap_matrix_mdev, vdev);
   1383	unsigned long events;
   1384	int ret;
   1385
   1386	if (!vdev->kvm)
   1387		return -EINVAL;
   1388
   1389	ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
   1390	if (ret)
   1391		return ret;
   1392
   1393	matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
   1394	events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
   1395	ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
   1396				     &matrix_mdev->iommu_notifier);
   1397	if (ret)
   1398		goto err_kvm;
   1399	return 0;
   1400
   1401err_kvm:
   1402	vfio_ap_mdev_unset_kvm(matrix_mdev);
   1403	return ret;
   1404}
   1405
   1406static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
   1407{
   1408	struct ap_matrix_mdev *matrix_mdev =
   1409		container_of(vdev, struct ap_matrix_mdev, vdev);
   1410
   1411	vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
   1412				 &matrix_mdev->iommu_notifier);
   1413	vfio_ap_mdev_unset_kvm(matrix_mdev);
   1414}
   1415
   1416static int vfio_ap_mdev_get_device_info(unsigned long arg)
   1417{
   1418	unsigned long minsz;
   1419	struct vfio_device_info info;
   1420
   1421	minsz = offsetofend(struct vfio_device_info, num_irqs);
   1422
   1423	if (copy_from_user(&info, (void __user *)arg, minsz))
   1424		return -EFAULT;
   1425
   1426	if (info.argsz < minsz)
   1427		return -EINVAL;
   1428
   1429	info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
   1430	info.num_regions = 0;
   1431	info.num_irqs = 0;
   1432
   1433	return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
   1434}
   1435
   1436static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
   1437				    unsigned int cmd, unsigned long arg)
   1438{
   1439	struct ap_matrix_mdev *matrix_mdev =
   1440		container_of(vdev, struct ap_matrix_mdev, vdev);
   1441	int ret;
   1442
   1443	mutex_lock(&matrix_dev->lock);
   1444	switch (cmd) {
   1445	case VFIO_DEVICE_GET_INFO:
   1446		ret = vfio_ap_mdev_get_device_info(arg);
   1447		break;
   1448	case VFIO_DEVICE_RESET:
   1449		ret = vfio_ap_mdev_reset_queues(matrix_mdev);
   1450		break;
   1451	default:
   1452		ret = -EOPNOTSUPP;
   1453		break;
   1454	}
   1455	mutex_unlock(&matrix_dev->lock);
   1456
   1457	return ret;
   1458}
   1459
   1460static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
   1461	.open_device = vfio_ap_mdev_open_device,
   1462	.close_device = vfio_ap_mdev_close_device,
   1463	.ioctl = vfio_ap_mdev_ioctl,
   1464};
   1465
   1466static struct mdev_driver vfio_ap_matrix_driver = {
   1467	.driver = {
   1468		.name = "vfio_ap_mdev",
   1469		.owner = THIS_MODULE,
   1470		.mod_name = KBUILD_MODNAME,
   1471		.dev_groups = vfio_ap_mdev_attr_groups,
   1472	},
   1473	.probe = vfio_ap_mdev_probe,
   1474	.remove = vfio_ap_mdev_remove,
   1475	.supported_type_groups = vfio_ap_mdev_type_groups,
   1476};
   1477
   1478int vfio_ap_mdev_register(void)
   1479{
   1480	int ret;
   1481
   1482	atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
   1483
   1484	ret = mdev_register_driver(&vfio_ap_matrix_driver);
   1485	if (ret)
   1486		return ret;
   1487
   1488	ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_driver);
   1489	if (ret)
   1490		goto err_driver;
   1491	return 0;
   1492
   1493err_driver:
   1494	mdev_unregister_driver(&vfio_ap_matrix_driver);
   1495	return ret;
   1496}
   1497
   1498void vfio_ap_mdev_unregister(void)
   1499{
   1500	mdev_unregister_device(&matrix_dev->device);
   1501	mdev_unregister_driver(&vfio_ap_matrix_driver);
   1502}