nouveau_svm.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
nouveau_svm.c (28504B)
      1/*
      2 * Copyright 2018 Red Hat Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 */
     22#include "nouveau_svm.h"
     23#include "nouveau_drv.h"
     24#include "nouveau_chan.h"
     25#include "nouveau_dmem.h"
     26
     27#include <nvif/notify.h>
     28#include <nvif/object.h>
     29#include <nvif/vmm.h>
     30
     31#include <nvif/class.h>
     32#include <nvif/clb069.h>
     33#include <nvif/ifc00d.h>
     34
     35#include <linux/sched/mm.h>
     36#include <linux/sort.h>
     37#include <linux/hmm.h>
     38#include <linux/memremap.h>
     39#include <linux/rmap.h>
     40
     41struct nouveau_svm {
     42	struct nouveau_drm *drm;
     43	struct mutex mutex;
     44	struct list_head inst;
     45
     46	struct nouveau_svm_fault_buffer {
     47		int id;
     48		struct nvif_object object;
     49		u32 entries;
     50		u32 getaddr;
     51		u32 putaddr;
     52		u32 get;
     53		u32 put;
     54		struct nvif_notify notify;
     55
     56		struct nouveau_svm_fault {
     57			u64 inst;
     58			u64 addr;
     59			u64 time;
     60			u32 engine;
     61			u8  gpc;
     62			u8  hub;
     63			u8  access;
     64			u8  client;
     65			u8  fault;
     66			struct nouveau_svmm *svmm;
     67		} **fault;
     68		int fault_nr;
     69	} buffer[1];
     70};
     71
     72#define FAULT_ACCESS_READ 0
     73#define FAULT_ACCESS_WRITE 1
     74#define FAULT_ACCESS_ATOMIC 2
     75#define FAULT_ACCESS_PREFETCH 3
     76
     77#define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
     78#define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
     79
     80struct nouveau_pfnmap_args {
     81	struct nvif_ioctl_v0 i;
     82	struct nvif_ioctl_mthd_v0 m;
     83	struct nvif_vmm_pfnmap_v0 p;
     84};
     85
     86struct nouveau_ivmm {
     87	struct nouveau_svmm *svmm;
     88	u64 inst;
     89	struct list_head head;
     90};
     91
     92static struct nouveau_ivmm *
     93nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
     94{
     95	struct nouveau_ivmm *ivmm;
     96	list_for_each_entry(ivmm, &svm->inst, head) {
     97		if (ivmm->inst == inst)
     98			return ivmm;
     99	}
    100	return NULL;
    101}
    102
    103#define SVMM_DBG(s,f,a...)                                                     \
    104	NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
    105#define SVMM_ERR(s,f,a...)                                                     \
    106	NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
    107
    108int
    109nouveau_svmm_bind(struct drm_device *dev, void *data,
    110		  struct drm_file *file_priv)
    111{
    112	struct nouveau_cli *cli = nouveau_cli(file_priv);
    113	struct drm_nouveau_svm_bind *args = data;
    114	unsigned target, cmd, priority;
    115	unsigned long addr, end;
    116	struct mm_struct *mm;
    117
    118	args->va_start &= PAGE_MASK;
    119	args->va_end = ALIGN(args->va_end, PAGE_SIZE);
    120
    121	/* Sanity check arguments */
    122	if (args->reserved0 || args->reserved1)
    123		return -EINVAL;
    124	if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
    125		return -EINVAL;
    126	if (args->va_start >= args->va_end)
    127		return -EINVAL;
    128
    129	cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
    130	cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
    131	switch (cmd) {
    132	case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
    133		break;
    134	default:
    135		return -EINVAL;
    136	}
    137
    138	priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
    139	priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
    140
    141	/* FIXME support CPU target ie all target value < GPU_VRAM */
    142	target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
    143	target &= NOUVEAU_SVM_BIND_TARGET_MASK;
    144	switch (target) {
    145	case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
    146		break;
    147	default:
    148		return -EINVAL;
    149	}
    150
    151	/*
    152	 * FIXME: For now refuse non 0 stride, we need to change the migrate
    153	 * kernel function to handle stride to avoid to create a mess within
    154	 * each device driver.
    155	 */
    156	if (args->stride)
    157		return -EINVAL;
    158
    159	/*
    160	 * Ok we are ask to do something sane, for now we only support migrate
    161	 * commands but we will add things like memory policy (what to do on
    162	 * page fault) and maybe some other commands.
    163	 */
    164
    165	mm = get_task_mm(current);
    166	if (!mm) {
    167		return -EINVAL;
    168	}
    169	mmap_read_lock(mm);
    170
    171	if (!cli->svm.svmm) {
    172		mmap_read_unlock(mm);
    173		mmput(mm);
    174		return -EINVAL;
    175	}
    176
    177	for (addr = args->va_start, end = args->va_end; addr < end;) {
    178		struct vm_area_struct *vma;
    179		unsigned long next;
    180
    181		vma = find_vma_intersection(mm, addr, end);
    182		if (!vma)
    183			break;
    184
    185		addr = max(addr, vma->vm_start);
    186		next = min(vma->vm_end, end);
    187		/* This is a best effort so we ignore errors */
    188		nouveau_dmem_migrate_vma(cli->drm, cli->svm.svmm, vma, addr,
    189					 next);
    190		addr = next;
    191	}
    192
    193	/*
    194	 * FIXME Return the number of page we have migrated, again we need to
    195	 * update the migrate API to return that information so that we can
    196	 * report it to user space.
    197	 */
    198	args->result = 0;
    199
    200	mmap_read_unlock(mm);
    201	mmput(mm);
    202
    203	return 0;
    204}
    205
    206/* Unlink channel instance from SVMM. */
    207void
    208nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
    209{
    210	struct nouveau_ivmm *ivmm;
    211	if (svmm) {
    212		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
    213		ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst);
    214		if (ivmm) {
    215			list_del(&ivmm->head);
    216			kfree(ivmm);
    217		}
    218		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
    219	}
    220}
    221
    222/* Link channel instance to SVMM. */
    223int
    224nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
    225{
    226	struct nouveau_ivmm *ivmm;
    227	if (svmm) {
    228		if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL)))
    229			return -ENOMEM;
    230		ivmm->svmm = svmm;
    231		ivmm->inst = inst;
    232
    233		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
    234		list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst);
    235		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
    236	}
    237	return 0;
    238}
    239
    240/* Invalidate SVMM address-range on GPU. */
    241void
    242nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
    243{
    244	if (limit > start) {
    245		nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
    246				 &(struct nvif_vmm_pfnclr_v0) {
    247					.addr = start,
    248					.size = limit - start,
    249				 }, sizeof(struct nvif_vmm_pfnclr_v0));
    250	}
    251}
    252
    253static int
    254nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
    255				    const struct mmu_notifier_range *update)
    256{
    257	struct nouveau_svmm *svmm =
    258		container_of(mn, struct nouveau_svmm, notifier);
    259	unsigned long start = update->start;
    260	unsigned long limit = update->end;
    261
    262	if (!mmu_notifier_range_blockable(update))
    263		return -EAGAIN;
    264
    265	SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
    266
    267	mutex_lock(&svmm->mutex);
    268	if (unlikely(!svmm->vmm))
    269		goto out;
    270
    271	/*
    272	 * Ignore invalidation callbacks for device private pages since
    273	 * the invalidation is handled as part of the migration process.
    274	 */
    275	if (update->event == MMU_NOTIFY_MIGRATE &&
    276	    update->owner == svmm->vmm->cli->drm->dev)
    277		goto out;
    278
    279	if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
    280		if (start < svmm->unmanaged.start) {
    281			nouveau_svmm_invalidate(svmm, start,
    282						svmm->unmanaged.limit);
    283		}
    284		start = svmm->unmanaged.limit;
    285	}
    286
    287	nouveau_svmm_invalidate(svmm, start, limit);
    288
    289out:
    290	mutex_unlock(&svmm->mutex);
    291	return 0;
    292}
    293
    294static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
    295{
    296	kfree(container_of(mn, struct nouveau_svmm, notifier));
    297}
    298
    299static const struct mmu_notifier_ops nouveau_mn_ops = {
    300	.invalidate_range_start = nouveau_svmm_invalidate_range_start,
    301	.free_notifier = nouveau_svmm_free_notifier,
    302};
    303
    304void
    305nouveau_svmm_fini(struct nouveau_svmm **psvmm)
    306{
    307	struct nouveau_svmm *svmm = *psvmm;
    308	if (svmm) {
    309		mutex_lock(&svmm->mutex);
    310		svmm->vmm = NULL;
    311		mutex_unlock(&svmm->mutex);
    312		mmu_notifier_put(&svmm->notifier);
    313		*psvmm = NULL;
    314	}
    315}
    316
    317int
    318nouveau_svmm_init(struct drm_device *dev, void *data,
    319		  struct drm_file *file_priv)
    320{
    321	struct nouveau_cli *cli = nouveau_cli(file_priv);
    322	struct nouveau_svmm *svmm;
    323	struct drm_nouveau_svm_init *args = data;
    324	int ret;
    325
    326	/* We need to fail if svm is disabled */
    327	if (!cli->drm->svm)
    328		return -ENOSYS;
    329
    330	/* Allocate tracking for SVM-enabled VMM. */
    331	if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL)))
    332		return -ENOMEM;
    333	svmm->vmm = &cli->svm;
    334	svmm->unmanaged.start = args->unmanaged_addr;
    335	svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
    336	mutex_init(&svmm->mutex);
    337
    338	/* Check that SVM isn't already enabled for the client. */
    339	mutex_lock(&cli->mutex);
    340	if (cli->svm.cli) {
    341		ret = -EBUSY;
    342		goto out_free;
    343	}
    344
    345	/* Allocate a new GPU VMM that can support SVM (managed by the
    346	 * client, with replayable faults enabled).
    347	 *
    348	 * All future channel/memory allocations will make use of this
    349	 * VMM instead of the standard one.
    350	 */
    351	ret = nvif_vmm_ctor(&cli->mmu, "svmVmm",
    352			    cli->vmm.vmm.object.oclass, true,
    353			    args->unmanaged_addr, args->unmanaged_size,
    354			    &(struct gp100_vmm_v0) {
    355				.fault_replay = true,
    356			    }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
    357	if (ret)
    358		goto out_free;
    359
    360	mmap_write_lock(current->mm);
    361	svmm->notifier.ops = &nouveau_mn_ops;
    362	ret = __mmu_notifier_register(&svmm->notifier, current->mm);
    363	if (ret)
    364		goto out_mm_unlock;
    365	/* Note, ownership of svmm transfers to mmu_notifier */
    366
    367	cli->svm.svmm = svmm;
    368	cli->svm.cli = cli;
    369	mmap_write_unlock(current->mm);
    370	mutex_unlock(&cli->mutex);
    371	return 0;
    372
    373out_mm_unlock:
    374	mmap_write_unlock(current->mm);
    375out_free:
    376	mutex_unlock(&cli->mutex);
    377	kfree(svmm);
    378	return ret;
    379}
    380
    381/* Issue fault replay for GPU to retry accesses that faulted previously. */
    382static void
    383nouveau_svm_fault_replay(struct nouveau_svm *svm)
    384{
    385	SVM_DBG(svm, "replay");
    386	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
    387				 GP100_VMM_VN_FAULT_REPLAY,
    388				 &(struct gp100_vmm_fault_replay_vn) {},
    389				 sizeof(struct gp100_vmm_fault_replay_vn)));
    390}
    391
    392/* Cancel a replayable fault that could not be handled.
    393 *
    394 * Cancelling the fault will trigger recovery to reset the engine
    395 * and kill the offending channel (ie. GPU SIGSEGV).
    396 */
    397static void
    398nouveau_svm_fault_cancel(struct nouveau_svm *svm,
    399			 u64 inst, u8 hub, u8 gpc, u8 client)
    400{
    401	SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
    402	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
    403				 GP100_VMM_VN_FAULT_CANCEL,
    404				 &(struct gp100_vmm_fault_cancel_v0) {
    405					.hub = hub,
    406					.gpc = gpc,
    407					.client = client,
    408					.inst = inst,
    409				 }, sizeof(struct gp100_vmm_fault_cancel_v0)));
    410}
    411
    412static void
    413nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
    414			       struct nouveau_svm_fault *fault)
    415{
    416	nouveau_svm_fault_cancel(svm, fault->inst,
    417				      fault->hub,
    418				      fault->gpc,
    419				      fault->client);
    420}
    421
    422static int
    423nouveau_svm_fault_priority(u8 fault)
    424{
    425	switch (fault) {
    426	case FAULT_ACCESS_PREFETCH:
    427		return 0;
    428	case FAULT_ACCESS_READ:
    429		return 1;
    430	case FAULT_ACCESS_WRITE:
    431		return 2;
    432	case FAULT_ACCESS_ATOMIC:
    433		return 3;
    434	default:
    435		WARN_ON_ONCE(1);
    436		return -1;
    437	}
    438}
    439
    440static int
    441nouveau_svm_fault_cmp(const void *a, const void *b)
    442{
    443	const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a;
    444	const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b;
    445	int ret;
    446	if ((ret = (s64)fa->inst - fb->inst))
    447		return ret;
    448	if ((ret = (s64)fa->addr - fb->addr))
    449		return ret;
    450	return nouveau_svm_fault_priority(fa->access) -
    451		nouveau_svm_fault_priority(fb->access);
    452}
    453
    454static void
    455nouveau_svm_fault_cache(struct nouveau_svm *svm,
    456			struct nouveau_svm_fault_buffer *buffer, u32 offset)
    457{
    458	struct nvif_object *memory = &buffer->object;
    459	const u32 instlo = nvif_rd32(memory, offset + 0x00);
    460	const u32 insthi = nvif_rd32(memory, offset + 0x04);
    461	const u32 addrlo = nvif_rd32(memory, offset + 0x08);
    462	const u32 addrhi = nvif_rd32(memory, offset + 0x0c);
    463	const u32 timelo = nvif_rd32(memory, offset + 0x10);
    464	const u32 timehi = nvif_rd32(memory, offset + 0x14);
    465	const u32 engine = nvif_rd32(memory, offset + 0x18);
    466	const u32   info = nvif_rd32(memory, offset + 0x1c);
    467	const u64   inst = (u64)insthi << 32 | instlo;
    468	const u8     gpc = (info & 0x1f000000) >> 24;
    469	const u8     hub = (info & 0x00100000) >> 20;
    470	const u8  client = (info & 0x00007f00) >> 8;
    471	struct nouveau_svm_fault *fault;
    472
    473	//XXX: i think we're supposed to spin waiting */
    474	if (WARN_ON(!(info & 0x80000000)))
    475		return;
    476
    477	nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000);
    478
    479	if (!buffer->fault[buffer->fault_nr]) {
    480		fault = kmalloc(sizeof(*fault), GFP_KERNEL);
    481		if (WARN_ON(!fault)) {
    482			nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
    483			return;
    484		}
    485		buffer->fault[buffer->fault_nr] = fault;
    486	}
    487
    488	fault = buffer->fault[buffer->fault_nr++];
    489	fault->inst   = inst;
    490	fault->addr   = (u64)addrhi << 32 | addrlo;
    491	fault->time   = (u64)timehi << 32 | timelo;
    492	fault->engine = engine;
    493	fault->gpc    = gpc;
    494	fault->hub    = hub;
    495	fault->access = (info & 0x000f0000) >> 16;
    496	fault->client = client;
    497	fault->fault  = (info & 0x0000001f);
    498
    499	SVM_DBG(svm, "fault %016llx %016llx %02x",
    500		fault->inst, fault->addr, fault->access);
    501}
    502
    503struct svm_notifier {
    504	struct mmu_interval_notifier notifier;
    505	struct nouveau_svmm *svmm;
    506};
    507
    508static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
    509					 const struct mmu_notifier_range *range,
    510					 unsigned long cur_seq)
    511{
    512	struct svm_notifier *sn =
    513		container_of(mni, struct svm_notifier, notifier);
    514
    515	if (range->event == MMU_NOTIFY_EXCLUSIVE &&
    516	    range->owner == sn->svmm->vmm->cli->drm->dev)
    517		return true;
    518
    519	/*
    520	 * serializes the update to mni->invalidate_seq done by caller and
    521	 * prevents invalidation of the PTE from progressing while HW is being
    522	 * programmed. This is very hacky and only works because the normal
    523	 * notifier that does invalidation is always called after the range
    524	 * notifier.
    525	 */
    526	if (mmu_notifier_range_blockable(range))
    527		mutex_lock(&sn->svmm->mutex);
    528	else if (!mutex_trylock(&sn->svmm->mutex))
    529		return false;
    530	mmu_interval_set_seq(mni, cur_seq);
    531	mutex_unlock(&sn->svmm->mutex);
    532	return true;
    533}
    534
    535static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
    536	.invalidate = nouveau_svm_range_invalidate,
    537};
    538
    539static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
    540				    struct hmm_range *range,
    541				    struct nouveau_pfnmap_args *args)
    542{
    543	struct page *page;
    544
    545	/*
    546	 * The address prepared here is passed through nvif_object_ioctl()
    547	 * to an eventual DMA map in something like gp100_vmm_pgt_pfn()
    548	 *
    549	 * This is all just encoding the internal hmm representation into a
    550	 * different nouveau internal representation.
    551	 */
    552	if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
    553		args->p.phys[0] = 0;
    554		return;
    555	}
    556
    557	page = hmm_pfn_to_page(range->hmm_pfns[0]);
    558	/*
    559	 * Only map compound pages to the GPU if the CPU is also mapping the
    560	 * page as a compound page. Otherwise, the PTE protections might not be
    561	 * consistent (e.g., CPU only maps part of a compound page).
    562	 * Note that the underlying page might still be larger than the
    563	 * CPU mapping (e.g., a PUD sized compound page partially mapped with
    564	 * a PMD sized page table entry).
    565	 */
    566	if (hmm_pfn_to_map_order(range->hmm_pfns[0])) {
    567		unsigned long addr = args->p.addr;
    568
    569		args->p.page = hmm_pfn_to_map_order(range->hmm_pfns[0]) +
    570				PAGE_SHIFT;
    571		args->p.size = 1UL << args->p.page;
    572		args->p.addr &= ~(args->p.size - 1);
    573		page -= (addr - args->p.addr) >> PAGE_SHIFT;
    574	}
    575	if (is_device_private_page(page))
    576		args->p.phys[0] = nouveau_dmem_page_addr(page) |
    577				NVIF_VMM_PFNMAP_V0_V |
    578				NVIF_VMM_PFNMAP_V0_VRAM;
    579	else
    580		args->p.phys[0] = page_to_phys(page) |
    581				NVIF_VMM_PFNMAP_V0_V |
    582				NVIF_VMM_PFNMAP_V0_HOST;
    583	if (range->hmm_pfns[0] & HMM_PFN_WRITE)
    584		args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W;
    585}
    586
    587static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm,
    588			       struct nouveau_drm *drm,
    589			       struct nouveau_pfnmap_args *args, u32 size,
    590			       struct svm_notifier *notifier)
    591{
    592	unsigned long timeout =
    593		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
    594	struct mm_struct *mm = svmm->notifier.mm;
    595	struct page *page;
    596	unsigned long start = args->p.addr;
    597	unsigned long notifier_seq;
    598	int ret = 0;
    599
    600	ret = mmu_interval_notifier_insert(&notifier->notifier, mm,
    601					args->p.addr, args->p.size,
    602					&nouveau_svm_mni_ops);
    603	if (ret)
    604		return ret;
    605
    606	while (true) {
    607		if (time_after(jiffies, timeout)) {
    608			ret = -EBUSY;
    609			goto out;
    610		}
    611
    612		notifier_seq = mmu_interval_read_begin(&notifier->notifier);
    613		mmap_read_lock(mm);
    614		ret = make_device_exclusive_range(mm, start, start + PAGE_SIZE,
    615					    &page, drm->dev);
    616		mmap_read_unlock(mm);
    617		if (ret <= 0 || !page) {
    618			ret = -EINVAL;
    619			goto out;
    620		}
    621
    622		mutex_lock(&svmm->mutex);
    623		if (!mmu_interval_read_retry(&notifier->notifier,
    624					     notifier_seq))
    625			break;
    626		mutex_unlock(&svmm->mutex);
    627	}
    628
    629	/* Map the page on the GPU. */
    630	args->p.page = 12;
    631	args->p.size = PAGE_SIZE;
    632	args->p.addr = start;
    633	args->p.phys[0] = page_to_phys(page) |
    634		NVIF_VMM_PFNMAP_V0_V |
    635		NVIF_VMM_PFNMAP_V0_W |
    636		NVIF_VMM_PFNMAP_V0_A |
    637		NVIF_VMM_PFNMAP_V0_HOST;
    638
    639	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
    640	mutex_unlock(&svmm->mutex);
    641
    642	unlock_page(page);
    643	put_page(page);
    644
    645out:
    646	mmu_interval_notifier_remove(&notifier->notifier);
    647	return ret;
    648}
    649
    650static int nouveau_range_fault(struct nouveau_svmm *svmm,
    651			       struct nouveau_drm *drm,
    652			       struct nouveau_pfnmap_args *args, u32 size,
    653			       unsigned long hmm_flags,
    654			       struct svm_notifier *notifier)
    655{
    656	unsigned long timeout =
    657		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
    658	/* Have HMM fault pages within the fault window to the GPU. */
    659	unsigned long hmm_pfns[1];
    660	struct hmm_range range = {
    661		.notifier = &notifier->notifier,
    662		.default_flags = hmm_flags,
    663		.hmm_pfns = hmm_pfns,
    664		.dev_private_owner = drm->dev,
    665	};
    666	struct mm_struct *mm = svmm->notifier.mm;
    667	int ret;
    668
    669	ret = mmu_interval_notifier_insert(&notifier->notifier, mm,
    670					args->p.addr, args->p.size,
    671					&nouveau_svm_mni_ops);
    672	if (ret)
    673		return ret;
    674
    675	range.start = notifier->notifier.interval_tree.start;
    676	range.end = notifier->notifier.interval_tree.last + 1;
    677
    678	while (true) {
    679		if (time_after(jiffies, timeout)) {
    680			ret = -EBUSY;
    681			goto out;
    682		}
    683
    684		range.notifier_seq = mmu_interval_read_begin(range.notifier);
    685		mmap_read_lock(mm);
    686		ret = hmm_range_fault(&range);
    687		mmap_read_unlock(mm);
    688		if (ret) {
    689			if (ret == -EBUSY)
    690				continue;
    691			goto out;
    692		}
    693
    694		mutex_lock(&svmm->mutex);
    695		if (mmu_interval_read_retry(range.notifier,
    696					    range.notifier_seq)) {
    697			mutex_unlock(&svmm->mutex);
    698			continue;
    699		}
    700		break;
    701	}
    702
    703	nouveau_hmm_convert_pfn(drm, &range, args);
    704
    705	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
    706	mutex_unlock(&svmm->mutex);
    707
    708out:
    709	mmu_interval_notifier_remove(&notifier->notifier);
    710
    711	return ret;
    712}
    713
    714static int
    715nouveau_svm_fault(struct nvif_notify *notify)
    716{
    717	struct nouveau_svm_fault_buffer *buffer =
    718		container_of(notify, typeof(*buffer), notify);
    719	struct nouveau_svm *svm =
    720		container_of(buffer, typeof(*svm), buffer[buffer->id]);
    721	struct nvif_object *device = &svm->drm->client.device.object;
    722	struct nouveau_svmm *svmm;
    723	struct {
    724		struct nouveau_pfnmap_args i;
    725		u64 phys[1];
    726	} args;
    727	unsigned long hmm_flags;
    728	u64 inst, start, limit;
    729	int fi, fn;
    730	int replay = 0, atomic = 0, ret;
    731
    732	/* Parse available fault buffer entries into a cache, and update
    733	 * the GET pointer so HW can reuse the entries.
    734	 */
    735	SVM_DBG(svm, "fault handler");
    736	if (buffer->get == buffer->put) {
    737		buffer->put = nvif_rd32(device, buffer->putaddr);
    738		buffer->get = nvif_rd32(device, buffer->getaddr);
    739		if (buffer->get == buffer->put)
    740			return NVIF_NOTIFY_KEEP;
    741	}
    742	buffer->fault_nr = 0;
    743
    744	SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
    745	while (buffer->get != buffer->put) {
    746		nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20);
    747		if (++buffer->get == buffer->entries)
    748			buffer->get = 0;
    749	}
    750	nvif_wr32(device, buffer->getaddr, buffer->get);
    751	SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
    752
    753	/* Sort parsed faults by instance pointer to prevent unnecessary
    754	 * instance to SVMM translations, followed by address and access
    755	 * type to reduce the amount of work when handling the faults.
    756	 */
    757	sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault),
    758	     nouveau_svm_fault_cmp, NULL);
    759
    760	/* Lookup SVMM structure for each unique instance pointer. */
    761	mutex_lock(&svm->mutex);
    762	for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) {
    763		if (!svmm || buffer->fault[fi]->inst != inst) {
    764			struct nouveau_ivmm *ivmm =
    765				nouveau_ivmm_find(svm, buffer->fault[fi]->inst);
    766			svmm = ivmm ? ivmm->svmm : NULL;
    767			inst = buffer->fault[fi]->inst;
    768			SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
    769		}
    770		buffer->fault[fi]->svmm = svmm;
    771	}
    772	mutex_unlock(&svm->mutex);
    773
    774	/* Process list of faults. */
    775	args.i.i.version = 0;
    776	args.i.i.type = NVIF_IOCTL_V0_MTHD;
    777	args.i.m.version = 0;
    778	args.i.m.method = NVIF_VMM_V0_PFNMAP;
    779	args.i.p.version = 0;
    780
    781	for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
    782		struct svm_notifier notifier;
    783		struct mm_struct *mm;
    784
    785		/* Cancel any faults from non-SVM channels. */
    786		if (!(svmm = buffer->fault[fi]->svmm)) {
    787			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
    788			continue;
    789		}
    790		SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
    791
    792		/* We try and group handling of faults within a small
    793		 * window into a single update.
    794		 */
    795		start = buffer->fault[fi]->addr;
    796		limit = start + PAGE_SIZE;
    797		if (start < svmm->unmanaged.limit)
    798			limit = min_t(u64, limit, svmm->unmanaged.start);
    799
    800		/*
    801		 * Prepare the GPU-side update of all pages within the
    802		 * fault window, determining required pages and access
    803		 * permissions based on pending faults.
    804		 */
    805		args.i.p.addr = start;
    806		args.i.p.page = PAGE_SHIFT;
    807		args.i.p.size = PAGE_SIZE;
    808		/*
    809		 * Determine required permissions based on GPU fault
    810		 * access flags.
    811		 */
    812		switch (buffer->fault[fi]->access) {
    813		case 0: /* READ. */
    814			hmm_flags = HMM_PFN_REQ_FAULT;
    815			break;
    816		case 2: /* ATOMIC. */
    817			atomic = true;
    818			break;
    819		case 3: /* PREFETCH. */
    820			hmm_flags = 0;
    821			break;
    822		default:
    823			hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
    824			break;
    825		}
    826
    827		mm = svmm->notifier.mm;
    828		if (!mmget_not_zero(mm)) {
    829			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
    830			continue;
    831		}
    832
    833		notifier.svmm = svmm;
    834		if (atomic)
    835			ret = nouveau_atomic_range_fault(svmm, svm->drm,
    836							 &args.i, sizeof(args),
    837							 &notifier);
    838		else
    839			ret = nouveau_range_fault(svmm, svm->drm, &args.i,
    840						  sizeof(args), hmm_flags,
    841						  &notifier);
    842		mmput(mm);
    843
    844		limit = args.i.p.addr + args.i.p.size;
    845		for (fn = fi; ++fn < buffer->fault_nr; ) {
    846			/* It's okay to skip over duplicate addresses from the
    847			 * same SVMM as faults are ordered by access type such
    848			 * that only the first one needs to be handled.
    849			 *
    850			 * ie. WRITE faults appear first, thus any handling of
    851			 * pending READ faults will already be satisfied.
    852			 * But if a large page is mapped, make sure subsequent
    853			 * fault addresses have sufficient access permission.
    854			 */
    855			if (buffer->fault[fn]->svmm != svmm ||
    856			    buffer->fault[fn]->addr >= limit ||
    857			    (buffer->fault[fi]->access == FAULT_ACCESS_READ &&
    858			     !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) ||
    859			    (buffer->fault[fi]->access != FAULT_ACCESS_READ &&
    860			     buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH &&
    861			     !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)) ||
    862			    (buffer->fault[fi]->access != FAULT_ACCESS_READ &&
    863			     buffer->fault[fi]->access != FAULT_ACCESS_WRITE &&
    864			     buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH &&
    865			     !(args.phys[0] & NVIF_VMM_PFNMAP_V0_A)))
    866				break;
    867		}
    868
    869		/* If handling failed completely, cancel all faults. */
    870		if (ret) {
    871			while (fi < fn) {
    872				struct nouveau_svm_fault *fault =
    873					buffer->fault[fi++];
    874
    875				nouveau_svm_fault_cancel_fault(svm, fault);
    876			}
    877		} else
    878			replay++;
    879	}
    880
    881	/* Issue fault replay to the GPU. */
    882	if (replay)
    883		nouveau_svm_fault_replay(svm);
    884	return NVIF_NOTIFY_KEEP;
    885}
    886
    887static struct nouveau_pfnmap_args *
    888nouveau_pfns_to_args(void *pfns)
    889{
    890	return container_of(pfns, struct nouveau_pfnmap_args, p.phys);
    891}
    892
    893u64 *
    894nouveau_pfns_alloc(unsigned long npages)
    895{
    896	struct nouveau_pfnmap_args *args;
    897
    898	args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL);
    899	if (!args)
    900		return NULL;
    901
    902	args->i.type = NVIF_IOCTL_V0_MTHD;
    903	args->m.method = NVIF_VMM_V0_PFNMAP;
    904	args->p.page = PAGE_SHIFT;
    905
    906	return args->p.phys;
    907}
    908
    909void
    910nouveau_pfns_free(u64 *pfns)
    911{
    912	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
    913
    914	kfree(args);
    915}
    916
    917void
    918nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
    919		 unsigned long addr, u64 *pfns, unsigned long npages)
    920{
    921	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
    922	int ret;
    923
    924	args->p.addr = addr;
    925	args->p.size = npages << PAGE_SHIFT;
    926
    927	mutex_lock(&svmm->mutex);
    928
    929	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args,
    930				struct_size(args, p.phys, npages), NULL);
    931
    932	mutex_unlock(&svmm->mutex);
    933}
    934
    935static void
    936nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
    937{
    938	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
    939	nvif_notify_put(&buffer->notify);
    940}
    941
    942static int
    943nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
    944{
    945	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
    946	struct nvif_object *device = &svm->drm->client.device.object;
    947	buffer->get = nvif_rd32(device, buffer->getaddr);
    948	buffer->put = nvif_rd32(device, buffer->putaddr);
    949	SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
    950	return nvif_notify_get(&buffer->notify);
    951}
    952
    953static void
    954nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
    955{
    956	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
    957	int i;
    958
    959	if (buffer->fault) {
    960		for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
    961			kfree(buffer->fault[i]);
    962		kvfree(buffer->fault);
    963	}
    964
    965	nouveau_svm_fault_buffer_fini(svm, id);
    966
    967	nvif_notify_dtor(&buffer->notify);
    968	nvif_object_dtor(&buffer->object);
    969}
    970
    971static int
    972nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
    973{
    974	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
    975	struct nouveau_drm *drm = svm->drm;
    976	struct nvif_object *device = &drm->client.device.object;
    977	struct nvif_clb069_v0 args = {};
    978	int ret;
    979
    980	buffer->id = id;
    981
    982	ret = nvif_object_ctor(device, "svmFaultBuffer", 0, oclass, &args,
    983			       sizeof(args), &buffer->object);
    984	if (ret < 0) {
    985		SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
    986		return ret;
    987	}
    988
    989	nvif_object_map(&buffer->object, NULL, 0);
    990	buffer->entries = args.entries;
    991	buffer->getaddr = args.get;
    992	buffer->putaddr = args.put;
    993
    994	ret = nvif_notify_ctor(&buffer->object, "svmFault", nouveau_svm_fault,
    995			       true, NVB069_V0_NTFY_FAULT, NULL, 0, 0,
    996			       &buffer->notify);
    997	if (ret)
    998		return ret;
    999
   1000	buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
   1001	if (!buffer->fault)
   1002		return -ENOMEM;
   1003
   1004	return nouveau_svm_fault_buffer_init(svm, id);
   1005}
   1006
   1007void
   1008nouveau_svm_resume(struct nouveau_drm *drm)
   1009{
   1010	struct nouveau_svm *svm = drm->svm;
   1011	if (svm)
   1012		nouveau_svm_fault_buffer_init(svm, 0);
   1013}
   1014
   1015void
   1016nouveau_svm_suspend(struct nouveau_drm *drm)
   1017{
   1018	struct nouveau_svm *svm = drm->svm;
   1019	if (svm)
   1020		nouveau_svm_fault_buffer_fini(svm, 0);
   1021}
   1022
   1023void
   1024nouveau_svm_fini(struct nouveau_drm *drm)
   1025{
   1026	struct nouveau_svm *svm = drm->svm;
   1027	if (svm) {
   1028		nouveau_svm_fault_buffer_dtor(svm, 0);
   1029		kfree(drm->svm);
   1030		drm->svm = NULL;
   1031	}
   1032}
   1033
   1034void
   1035nouveau_svm_init(struct nouveau_drm *drm)
   1036{
   1037	static const struct nvif_mclass buffers[] = {
   1038		{   VOLTA_FAULT_BUFFER_A, 0 },
   1039		{ MAXWELL_FAULT_BUFFER_A, 0 },
   1040		{}
   1041	};
   1042	struct nouveau_svm *svm;
   1043	int ret;
   1044
   1045	/* Disable on Volta and newer until channel recovery is fixed,
   1046	 * otherwise clients will have a trivial way to trash the GPU
   1047	 * for everyone.
   1048	 */
   1049	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
   1050		return;
   1051
   1052	if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL)))
   1053		return;
   1054
   1055	drm->svm->drm = drm;
   1056	mutex_init(&drm->svm->mutex);
   1057	INIT_LIST_HEAD(&drm->svm->inst);
   1058
   1059	ret = nvif_mclass(&drm->client.device.object, buffers);
   1060	if (ret < 0) {
   1061		SVM_DBG(svm, "No supported fault buffer class");
   1062		nouveau_svm_fini(drm);
   1063		return;
   1064	}
   1065
   1066	ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0);
   1067	if (ret) {
   1068		nouveau_svm_fini(drm);
   1069		return;
   1070	}
   1071
   1072	SVM_DBG(svm, "Initialised");
   1073}