io-pgfault.c (12169B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Handle device page faults 4 * 5 * Copyright (C) 2020 ARM Ltd. 6 */ 7 8#include <linux/iommu.h> 9#include <linux/list.h> 10#include <linux/sched/mm.h> 11#include <linux/slab.h> 12#include <linux/workqueue.h> 13 14#include "iommu-sva-lib.h" 15 16/** 17 * struct iopf_queue - IO Page Fault queue 18 * @wq: the fault workqueue 19 * @devices: devices attached to this queue 20 * @lock: protects the device list 21 */ 22struct iopf_queue { 23 struct workqueue_struct *wq; 24 struct list_head devices; 25 struct mutex lock; 26}; 27 28/** 29 * struct iopf_device_param - IO Page Fault data attached to a device 30 * @dev: the device that owns this param 31 * @queue: IOPF queue 32 * @queue_list: index into queue->devices 33 * @partial: faults that are part of a Page Request Group for which the last 34 * request hasn't been submitted yet. 35 */ 36struct iopf_device_param { 37 struct device *dev; 38 struct iopf_queue *queue; 39 struct list_head queue_list; 40 struct list_head partial; 41}; 42 43struct iopf_fault { 44 struct iommu_fault fault; 45 struct list_head list; 46}; 47 48struct iopf_group { 49 struct iopf_fault last_fault; 50 struct list_head faults; 51 struct work_struct work; 52 struct device *dev; 53}; 54 55static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, 56 enum iommu_page_response_code status) 57{ 58 struct iommu_page_response resp = { 59 .version = IOMMU_PAGE_RESP_VERSION_1, 60 .pasid = iopf->fault.prm.pasid, 61 .grpid = iopf->fault.prm.grpid, 62 .code = status, 63 }; 64 65 if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && 66 (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) 67 resp.flags = IOMMU_PAGE_RESP_PASID_VALID; 68 69 return iommu_page_response(dev, &resp); 70} 71 72static enum iommu_page_response_code 73iopf_handle_single(struct iopf_fault *iopf) 74{ 75 vm_fault_t ret; 76 struct mm_struct *mm; 77 struct vm_area_struct *vma; 78 unsigned int access_flags = 0; 79 unsigned int fault_flags = FAULT_FLAG_REMOTE; 80 struct iommu_fault_page_request *prm = &iopf->fault.prm; 81 enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; 82 83 if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) 84 return status; 85 86 mm = iommu_sva_find(prm->pasid); 87 if (IS_ERR_OR_NULL(mm)) 88 return status; 89 90 mmap_read_lock(mm); 91 92 vma = find_extend_vma(mm, prm->addr); 93 if (!vma) 94 /* Unmapped area */ 95 goto out_put_mm; 96 97 if (prm->perm & IOMMU_FAULT_PERM_READ) 98 access_flags |= VM_READ; 99 100 if (prm->perm & IOMMU_FAULT_PERM_WRITE) { 101 access_flags |= VM_WRITE; 102 fault_flags |= FAULT_FLAG_WRITE; 103 } 104 105 if (prm->perm & IOMMU_FAULT_PERM_EXEC) { 106 access_flags |= VM_EXEC; 107 fault_flags |= FAULT_FLAG_INSTRUCTION; 108 } 109 110 if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) 111 fault_flags |= FAULT_FLAG_USER; 112 113 if (access_flags & ~vma->vm_flags) 114 /* Access fault */ 115 goto out_put_mm; 116 117 ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); 118 status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : 119 IOMMU_PAGE_RESP_SUCCESS; 120 121out_put_mm: 122 mmap_read_unlock(mm); 123 mmput(mm); 124 125 return status; 126} 127 128static void iopf_handle_group(struct work_struct *work) 129{ 130 struct iopf_group *group; 131 struct iopf_fault *iopf, *next; 132 enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; 133 134 group = container_of(work, struct iopf_group, work); 135 136 list_for_each_entry_safe(iopf, next, &group->faults, list) { 137 /* 138 * For the moment, errors are sticky: don't handle subsequent 139 * faults in the group if there is an error. 140 */ 141 if (status == IOMMU_PAGE_RESP_SUCCESS) 142 status = iopf_handle_single(iopf); 143 144 if (!(iopf->fault.prm.flags & 145 IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) 146 kfree(iopf); 147 } 148 149 iopf_complete_group(group->dev, &group->last_fault, status); 150 kfree(group); 151} 152 153/** 154 * iommu_queue_iopf - IO Page Fault handler 155 * @fault: fault event 156 * @cookie: struct device, passed to iommu_register_device_fault_handler. 157 * 158 * Add a fault to the device workqueue, to be handled by mm. 159 * 160 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard 161 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't 162 * expect a response. It may be generated when disabling a PASID (issuing a 163 * PASID stop request) by some PCI devices. 164 * 165 * The PASID stop request is issued by the device driver before unbind(). Once 166 * it completes, no page request is generated for this PASID anymore and 167 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 168 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait 169 * for all outstanding page requests to come back with a response before 170 * completing the PASID stop request. Others do not wait for page responses, and 171 * instead issue this Stop Marker that tells us when the PASID can be 172 * reallocated. 173 * 174 * It is safe to discard the Stop Marker because it is an optimization. 175 * a. Page requests, which are posted requests, have been flushed to the IOMMU 176 * when the stop request completes. 177 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the 178 * PASID. 179 * 180 * So even though the Stop Marker might be issued by the device *after* the stop 181 * request completes, outstanding faults will have been dealt with by the time 182 * the PASID is freed. 183 * 184 * Return: 0 on success and <0 on error. 185 */ 186int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) 187{ 188 int ret; 189 struct iopf_group *group; 190 struct iopf_fault *iopf, *next; 191 struct iopf_device_param *iopf_param; 192 193 struct device *dev = cookie; 194 struct dev_iommu *param = dev->iommu; 195 196 lockdep_assert_held(¶m->lock); 197 198 if (fault->type != IOMMU_FAULT_PAGE_REQ) 199 /* Not a recoverable page fault */ 200 return -EOPNOTSUPP; 201 202 /* 203 * As long as we're holding param->lock, the queue can't be unlinked 204 * from the device and therefore cannot disappear. 205 */ 206 iopf_param = param->iopf_param; 207 if (!iopf_param) 208 return -ENODEV; 209 210 if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 211 iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); 212 if (!iopf) 213 return -ENOMEM; 214 215 iopf->fault = *fault; 216 217 /* Non-last request of a group. Postpone until the last one */ 218 list_add(&iopf->list, &iopf_param->partial); 219 220 return 0; 221 } 222 223 group = kzalloc(sizeof(*group), GFP_KERNEL); 224 if (!group) { 225 /* 226 * The caller will send a response to the hardware. But we do 227 * need to clean up before leaving, otherwise partial faults 228 * will be stuck. 229 */ 230 ret = -ENOMEM; 231 goto cleanup_partial; 232 } 233 234 group->dev = dev; 235 group->last_fault.fault = *fault; 236 INIT_LIST_HEAD(&group->faults); 237 list_add(&group->last_fault.list, &group->faults); 238 INIT_WORK(&group->work, iopf_handle_group); 239 240 /* See if we have partial faults for this group */ 241 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 242 if (iopf->fault.prm.grpid == fault->prm.grpid) 243 /* Insert *before* the last fault */ 244 list_move(&iopf->list, &group->faults); 245 } 246 247 queue_work(iopf_param->queue->wq, &group->work); 248 return 0; 249 250cleanup_partial: 251 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 252 if (iopf->fault.prm.grpid == fault->prm.grpid) { 253 list_del(&iopf->list); 254 kfree(iopf); 255 } 256 } 257 return ret; 258} 259EXPORT_SYMBOL_GPL(iommu_queue_iopf); 260 261/** 262 * iopf_queue_flush_dev - Ensure that all queued faults have been processed 263 * @dev: the endpoint whose faults need to be flushed. 264 * 265 * The IOMMU driver calls this before releasing a PASID, to ensure that all 266 * pending faults for this PASID have been handled, and won't hit the address 267 * space of the next process that uses this PASID. The driver must make sure 268 * that no new fault is added to the queue. In particular it must flush its 269 * low-level queue before calling this function. 270 * 271 * Return: 0 on success and <0 on error. 272 */ 273int iopf_queue_flush_dev(struct device *dev) 274{ 275 int ret = 0; 276 struct iopf_device_param *iopf_param; 277 struct dev_iommu *param = dev->iommu; 278 279 if (!param) 280 return -ENODEV; 281 282 mutex_lock(¶m->lock); 283 iopf_param = param->iopf_param; 284 if (iopf_param) 285 flush_workqueue(iopf_param->queue->wq); 286 else 287 ret = -ENODEV; 288 mutex_unlock(¶m->lock); 289 290 return ret; 291} 292EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); 293 294/** 295 * iopf_queue_discard_partial - Remove all pending partial fault 296 * @queue: the queue whose partial faults need to be discarded 297 * 298 * When the hardware queue overflows, last page faults in a group may have been 299 * lost and the IOMMU driver calls this to discard all partial faults. The 300 * driver shouldn't be adding new faults to this queue concurrently. 301 * 302 * Return: 0 on success and <0 on error. 303 */ 304int iopf_queue_discard_partial(struct iopf_queue *queue) 305{ 306 struct iopf_fault *iopf, *next; 307 struct iopf_device_param *iopf_param; 308 309 if (!queue) 310 return -EINVAL; 311 312 mutex_lock(&queue->lock); 313 list_for_each_entry(iopf_param, &queue->devices, queue_list) { 314 list_for_each_entry_safe(iopf, next, &iopf_param->partial, 315 list) { 316 list_del(&iopf->list); 317 kfree(iopf); 318 } 319 } 320 mutex_unlock(&queue->lock); 321 return 0; 322} 323EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); 324 325/** 326 * iopf_queue_add_device - Add producer to the fault queue 327 * @queue: IOPF queue 328 * @dev: device to add 329 * 330 * Return: 0 on success and <0 on error. 331 */ 332int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) 333{ 334 int ret = -EBUSY; 335 struct iopf_device_param *iopf_param; 336 struct dev_iommu *param = dev->iommu; 337 338 if (!param) 339 return -ENODEV; 340 341 iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL); 342 if (!iopf_param) 343 return -ENOMEM; 344 345 INIT_LIST_HEAD(&iopf_param->partial); 346 iopf_param->queue = queue; 347 iopf_param->dev = dev; 348 349 mutex_lock(&queue->lock); 350 mutex_lock(¶m->lock); 351 if (!param->iopf_param) { 352 list_add(&iopf_param->queue_list, &queue->devices); 353 param->iopf_param = iopf_param; 354 ret = 0; 355 } 356 mutex_unlock(¶m->lock); 357 mutex_unlock(&queue->lock); 358 359 if (ret) 360 kfree(iopf_param); 361 362 return ret; 363} 364EXPORT_SYMBOL_GPL(iopf_queue_add_device); 365 366/** 367 * iopf_queue_remove_device - Remove producer from fault queue 368 * @queue: IOPF queue 369 * @dev: device to remove 370 * 371 * Caller makes sure that no more faults are reported for this device. 372 * 373 * Return: 0 on success and <0 on error. 374 */ 375int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) 376{ 377 int ret = -EINVAL; 378 struct iopf_fault *iopf, *next; 379 struct iopf_device_param *iopf_param; 380 struct dev_iommu *param = dev->iommu; 381 382 if (!param || !queue) 383 return -EINVAL; 384 385 mutex_lock(&queue->lock); 386 mutex_lock(¶m->lock); 387 iopf_param = param->iopf_param; 388 if (iopf_param && iopf_param->queue == queue) { 389 list_del(&iopf_param->queue_list); 390 param->iopf_param = NULL; 391 ret = 0; 392 } 393 mutex_unlock(¶m->lock); 394 mutex_unlock(&queue->lock); 395 if (ret) 396 return ret; 397 398 /* Just in case some faults are still stuck */ 399 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) 400 kfree(iopf); 401 402 kfree(iopf_param); 403 404 return 0; 405} 406EXPORT_SYMBOL_GPL(iopf_queue_remove_device); 407 408/** 409 * iopf_queue_alloc - Allocate and initialize a fault queue 410 * @name: a unique string identifying the queue (for workqueue) 411 * 412 * Return: the queue on success and NULL on error. 413 */ 414struct iopf_queue *iopf_queue_alloc(const char *name) 415{ 416 struct iopf_queue *queue; 417 418 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 419 if (!queue) 420 return NULL; 421 422 /* 423 * The WQ is unordered because the low-level handler enqueues faults by 424 * group. PRI requests within a group have to be ordered, but once 425 * that's dealt with, the high-level function can handle groups out of 426 * order. 427 */ 428 queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); 429 if (!queue->wq) { 430 kfree(queue); 431 return NULL; 432 } 433 434 INIT_LIST_HEAD(&queue->devices); 435 mutex_init(&queue->lock); 436 437 return queue; 438} 439EXPORT_SYMBOL_GPL(iopf_queue_alloc); 440 441/** 442 * iopf_queue_free - Free IOPF queue 443 * @queue: queue to free 444 * 445 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or 446 * adding/removing devices on this queue anymore. 447 */ 448void iopf_queue_free(struct iopf_queue *queue) 449{ 450 struct iopf_device_param *iopf_param, *next; 451 452 if (!queue) 453 return; 454 455 list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) 456 iopf_queue_remove_device(queue, iopf_param->dev); 457 458 destroy_workqueue(queue->wq); 459 kfree(queue); 460} 461EXPORT_SYMBOL_GPL(iopf_queue_free);