cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_vce.c (29106B)


      1/*
      2 * Copyright 2013 Advanced Micro Devices, Inc.
      3 * All Rights Reserved.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the
      7 * "Software"), to deal in the Software without restriction, including
      8 * without limitation the rights to use, copy, modify, merge, publish,
      9 * distribute, sub license, and/or sell copies of the Software, and to
     10 * permit persons to whom the Software is furnished to do so, subject to
     11 * the following conditions:
     12 *
     13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20 *
     21 * The above copyright notice and this permission notice (including the
     22 * next paragraph) shall be included in all copies or substantial portions
     23 * of the Software.
     24 *
     25 * Authors: Christian König <christian.koenig@amd.com>
     26 */
     27
     28#include <linux/firmware.h>
     29#include <linux/module.h>
     30
     31#include <drm/drm.h>
     32#include <drm/drm_drv.h>
     33
     34#include "amdgpu.h"
     35#include "amdgpu_pm.h"
     36#include "amdgpu_vce.h"
     37#include "amdgpu_cs.h"
     38#include "cikd.h"
     39
     40/* 1 second timeout */
     41#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
     42
     43/* Firmware Names */
     44#ifdef CONFIG_DRM_AMDGPU_CIK
     45#define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
     46#define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
     47#define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
     48#define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
     49#define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
     50#endif
     51#define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
     52#define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
     53#define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
     54#define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
     55#define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
     56#define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
     57#define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
     58#define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
     59
     60#define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
     61#define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
     62#define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
     63
     64#ifdef CONFIG_DRM_AMDGPU_CIK
     65MODULE_FIRMWARE(FIRMWARE_BONAIRE);
     66MODULE_FIRMWARE(FIRMWARE_KABINI);
     67MODULE_FIRMWARE(FIRMWARE_KAVERI);
     68MODULE_FIRMWARE(FIRMWARE_HAWAII);
     69MODULE_FIRMWARE(FIRMWARE_MULLINS);
     70#endif
     71MODULE_FIRMWARE(FIRMWARE_TONGA);
     72MODULE_FIRMWARE(FIRMWARE_CARRIZO);
     73MODULE_FIRMWARE(FIRMWARE_FIJI);
     74MODULE_FIRMWARE(FIRMWARE_STONEY);
     75MODULE_FIRMWARE(FIRMWARE_POLARIS10);
     76MODULE_FIRMWARE(FIRMWARE_POLARIS11);
     77MODULE_FIRMWARE(FIRMWARE_POLARIS12);
     78MODULE_FIRMWARE(FIRMWARE_VEGAM);
     79
     80MODULE_FIRMWARE(FIRMWARE_VEGA10);
     81MODULE_FIRMWARE(FIRMWARE_VEGA12);
     82MODULE_FIRMWARE(FIRMWARE_VEGA20);
     83
     84static void amdgpu_vce_idle_work_handler(struct work_struct *work);
     85static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
     86				     struct dma_fence **fence);
     87static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
     88				      bool direct, struct dma_fence **fence);
     89
     90/**
     91 * amdgpu_vce_sw_init - allocate memory, load vce firmware
     92 *
     93 * @adev: amdgpu_device pointer
     94 * @size: size for the new BO
     95 *
     96 * First step to get VCE online, allocate memory and load the firmware
     97 */
     98int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
     99{
    100	const char *fw_name;
    101	const struct common_firmware_header *hdr;
    102	unsigned ucode_version, version_major, version_minor, binary_id;
    103	int i, r;
    104
    105	switch (adev->asic_type) {
    106#ifdef CONFIG_DRM_AMDGPU_CIK
    107	case CHIP_BONAIRE:
    108		fw_name = FIRMWARE_BONAIRE;
    109		break;
    110	case CHIP_KAVERI:
    111		fw_name = FIRMWARE_KAVERI;
    112		break;
    113	case CHIP_KABINI:
    114		fw_name = FIRMWARE_KABINI;
    115		break;
    116	case CHIP_HAWAII:
    117		fw_name = FIRMWARE_HAWAII;
    118		break;
    119	case CHIP_MULLINS:
    120		fw_name = FIRMWARE_MULLINS;
    121		break;
    122#endif
    123	case CHIP_TONGA:
    124		fw_name = FIRMWARE_TONGA;
    125		break;
    126	case CHIP_CARRIZO:
    127		fw_name = FIRMWARE_CARRIZO;
    128		break;
    129	case CHIP_FIJI:
    130		fw_name = FIRMWARE_FIJI;
    131		break;
    132	case CHIP_STONEY:
    133		fw_name = FIRMWARE_STONEY;
    134		break;
    135	case CHIP_POLARIS10:
    136		fw_name = FIRMWARE_POLARIS10;
    137		break;
    138	case CHIP_POLARIS11:
    139		fw_name = FIRMWARE_POLARIS11;
    140		break;
    141	case CHIP_POLARIS12:
    142		fw_name = FIRMWARE_POLARIS12;
    143		break;
    144	case CHIP_VEGAM:
    145		fw_name = FIRMWARE_VEGAM;
    146		break;
    147	case CHIP_VEGA10:
    148		fw_name = FIRMWARE_VEGA10;
    149		break;
    150	case CHIP_VEGA12:
    151		fw_name = FIRMWARE_VEGA12;
    152		break;
    153	case CHIP_VEGA20:
    154		fw_name = FIRMWARE_VEGA20;
    155		break;
    156
    157	default:
    158		return -EINVAL;
    159	}
    160
    161	r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
    162	if (r) {
    163		dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
    164			fw_name);
    165		return r;
    166	}
    167
    168	r = amdgpu_ucode_validate(adev->vce.fw);
    169	if (r) {
    170		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
    171			fw_name);
    172		release_firmware(adev->vce.fw);
    173		adev->vce.fw = NULL;
    174		return r;
    175	}
    176
    177	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    178
    179	ucode_version = le32_to_cpu(hdr->ucode_version);
    180	version_major = (ucode_version >> 20) & 0xfff;
    181	version_minor = (ucode_version >> 8) & 0xfff;
    182	binary_id = ucode_version & 0xff;
    183	DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
    184		version_major, version_minor, binary_id);
    185	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
    186				(binary_id << 8));
    187
    188	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
    189				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
    190				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
    191	if (r) {
    192		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
    193		return r;
    194	}
    195
    196	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    197		atomic_set(&adev->vce.handles[i], 0);
    198		adev->vce.filp[i] = NULL;
    199	}
    200
    201	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
    202	mutex_init(&adev->vce.idle_mutex);
    203
    204	return 0;
    205}
    206
    207/**
    208 * amdgpu_vce_sw_fini - free memory
    209 *
    210 * @adev: amdgpu_device pointer
    211 *
    212 * Last step on VCE teardown, free firmware memory
    213 */
    214int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
    215{
    216	unsigned i;
    217
    218	if (adev->vce.vcpu_bo == NULL)
    219		return 0;
    220
    221	drm_sched_entity_destroy(&adev->vce.entity);
    222
    223	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
    224		(void **)&adev->vce.cpu_addr);
    225
    226	for (i = 0; i < adev->vce.num_rings; i++)
    227		amdgpu_ring_fini(&adev->vce.ring[i]);
    228
    229	release_firmware(adev->vce.fw);
    230	mutex_destroy(&adev->vce.idle_mutex);
    231
    232	return 0;
    233}
    234
    235/**
    236 * amdgpu_vce_entity_init - init entity
    237 *
    238 * @adev: amdgpu_device pointer
    239 *
    240 */
    241int amdgpu_vce_entity_init(struct amdgpu_device *adev)
    242{
    243	struct amdgpu_ring *ring;
    244	struct drm_gpu_scheduler *sched;
    245	int r;
    246
    247	ring = &adev->vce.ring[0];
    248	sched = &ring->sched;
    249	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
    250				  &sched, 1, NULL);
    251	if (r != 0) {
    252		DRM_ERROR("Failed setting up VCE run queue.\n");
    253		return r;
    254	}
    255
    256	return 0;
    257}
    258
    259/**
    260 * amdgpu_vce_suspend - unpin VCE fw memory
    261 *
    262 * @adev: amdgpu_device pointer
    263 *
    264 */
    265int amdgpu_vce_suspend(struct amdgpu_device *adev)
    266{
    267	int i;
    268
    269	cancel_delayed_work_sync(&adev->vce.idle_work);
    270
    271	if (adev->vce.vcpu_bo == NULL)
    272		return 0;
    273
    274	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
    275		if (atomic_read(&adev->vce.handles[i]))
    276			break;
    277
    278	if (i == AMDGPU_MAX_VCE_HANDLES)
    279		return 0;
    280
    281	/* TODO: suspending running encoding sessions isn't supported */
    282	return -EINVAL;
    283}
    284
    285/**
    286 * amdgpu_vce_resume - pin VCE fw memory
    287 *
    288 * @adev: amdgpu_device pointer
    289 *
    290 */
    291int amdgpu_vce_resume(struct amdgpu_device *adev)
    292{
    293	void *cpu_addr;
    294	const struct common_firmware_header *hdr;
    295	unsigned offset;
    296	int r, idx;
    297
    298	if (adev->vce.vcpu_bo == NULL)
    299		return -EINVAL;
    300
    301	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
    302	if (r) {
    303		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
    304		return r;
    305	}
    306
    307	r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
    308	if (r) {
    309		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    310		dev_err(adev->dev, "(%d) VCE map failed\n", r);
    311		return r;
    312	}
    313
    314	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
    315	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
    316
    317	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
    318		memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
    319			    adev->vce.fw->size - offset);
    320		drm_dev_exit(idx);
    321	}
    322
    323	amdgpu_bo_kunmap(adev->vce.vcpu_bo);
    324
    325	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
    326
    327	return 0;
    328}
    329
    330/**
    331 * amdgpu_vce_idle_work_handler - power off VCE
    332 *
    333 * @work: pointer to work structure
    334 *
    335 * power of VCE when it's not used any more
    336 */
    337static void amdgpu_vce_idle_work_handler(struct work_struct *work)
    338{
    339	struct amdgpu_device *adev =
    340		container_of(work, struct amdgpu_device, vce.idle_work.work);
    341	unsigned i, count = 0;
    342
    343	for (i = 0; i < adev->vce.num_rings; i++)
    344		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
    345
    346	if (count == 0) {
    347		if (adev->pm.dpm_enabled) {
    348			amdgpu_dpm_enable_vce(adev, false);
    349		} else {
    350			amdgpu_asic_set_vce_clocks(adev, 0, 0);
    351			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    352							       AMD_PG_STATE_GATE);
    353			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    354							       AMD_CG_STATE_GATE);
    355		}
    356	} else {
    357		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
    358	}
    359}
    360
    361/**
    362 * amdgpu_vce_ring_begin_use - power up VCE
    363 *
    364 * @ring: amdgpu ring
    365 *
    366 * Make sure VCE is powerd up when we want to use it
    367 */
    368void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
    369{
    370	struct amdgpu_device *adev = ring->adev;
    371	bool set_clocks;
    372
    373	if (amdgpu_sriov_vf(adev))
    374		return;
    375
    376	mutex_lock(&adev->vce.idle_mutex);
    377	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
    378	if (set_clocks) {
    379		if (adev->pm.dpm_enabled) {
    380			amdgpu_dpm_enable_vce(adev, true);
    381		} else {
    382			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
    383			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    384							       AMD_CG_STATE_UNGATE);
    385			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    386							       AMD_PG_STATE_UNGATE);
    387
    388		}
    389	}
    390	mutex_unlock(&adev->vce.idle_mutex);
    391}
    392
    393/**
    394 * amdgpu_vce_ring_end_use - power VCE down
    395 *
    396 * @ring: amdgpu ring
    397 *
    398 * Schedule work to power VCE down again
    399 */
    400void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
    401{
    402	if (!amdgpu_sriov_vf(ring->adev))
    403		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
    404}
    405
    406/**
    407 * amdgpu_vce_free_handles - free still open VCE handles
    408 *
    409 * @adev: amdgpu_device pointer
    410 * @filp: drm file pointer
    411 *
    412 * Close all VCE handles still open by this file pointer
    413 */
    414void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
    415{
    416	struct amdgpu_ring *ring = &adev->vce.ring[0];
    417	int i, r;
    418	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    419		uint32_t handle = atomic_read(&adev->vce.handles[i]);
    420
    421		if (!handle || adev->vce.filp[i] != filp)
    422			continue;
    423
    424		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
    425		if (r)
    426			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
    427
    428		adev->vce.filp[i] = NULL;
    429		atomic_set(&adev->vce.handles[i], 0);
    430	}
    431}
    432
    433/**
    434 * amdgpu_vce_get_create_msg - generate a VCE create msg
    435 *
    436 * @ring: ring we should submit the msg to
    437 * @handle: VCE session handle to use
    438 * @fence: optional fence to return
    439 *
    440 * Open up a stream for HW test
    441 */
    442static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
    443				     struct dma_fence **fence)
    444{
    445	const unsigned ib_size_dw = 1024;
    446	struct amdgpu_job *job;
    447	struct amdgpu_ib *ib;
    448	struct amdgpu_ib ib_msg;
    449	struct dma_fence *f = NULL;
    450	uint64_t addr;
    451	int i, r;
    452
    453	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
    454				     AMDGPU_IB_POOL_DIRECT, &job);
    455	if (r)
    456		return r;
    457
    458	memset(&ib_msg, 0, sizeof(ib_msg));
    459	/* only one gpu page is needed, alloc +1 page to make addr aligned. */
    460	r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
    461			  AMDGPU_IB_POOL_DIRECT,
    462			  &ib_msg);
    463	if (r)
    464		goto err;
    465
    466	ib = &job->ibs[0];
    467	/* let addr point to page boundary */
    468	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
    469
    470	/* stitch together an VCE create msg */
    471	ib->length_dw = 0;
    472	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
    473	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
    474	ib->ptr[ib->length_dw++] = handle;
    475
    476	if ((ring->adev->vce.fw_version >> 24) >= 52)
    477		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
    478	else
    479		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
    480	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
    481	ib->ptr[ib->length_dw++] = 0x00000000;
    482	ib->ptr[ib->length_dw++] = 0x00000042;
    483	ib->ptr[ib->length_dw++] = 0x0000000a;
    484	ib->ptr[ib->length_dw++] = 0x00000001;
    485	ib->ptr[ib->length_dw++] = 0x00000080;
    486	ib->ptr[ib->length_dw++] = 0x00000060;
    487	ib->ptr[ib->length_dw++] = 0x00000100;
    488	ib->ptr[ib->length_dw++] = 0x00000100;
    489	ib->ptr[ib->length_dw++] = 0x0000000c;
    490	ib->ptr[ib->length_dw++] = 0x00000000;
    491	if ((ring->adev->vce.fw_version >> 24) >= 52) {
    492		ib->ptr[ib->length_dw++] = 0x00000000;
    493		ib->ptr[ib->length_dw++] = 0x00000000;
    494		ib->ptr[ib->length_dw++] = 0x00000000;
    495		ib->ptr[ib->length_dw++] = 0x00000000;
    496	}
    497
    498	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
    499	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
    500	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
    501	ib->ptr[ib->length_dw++] = addr;
    502	ib->ptr[ib->length_dw++] = 0x00000001;
    503
    504	for (i = ib->length_dw; i < ib_size_dw; ++i)
    505		ib->ptr[i] = 0x0;
    506
    507	r = amdgpu_job_submit_direct(job, ring, &f);
    508	amdgpu_ib_free(ring->adev, &ib_msg, f);
    509	if (r)
    510		goto err;
    511
    512	if (fence)
    513		*fence = dma_fence_get(f);
    514	dma_fence_put(f);
    515	return 0;
    516
    517err:
    518	amdgpu_job_free(job);
    519	return r;
    520}
    521
    522/**
    523 * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
    524 *
    525 * @ring: ring we should submit the msg to
    526 * @handle: VCE session handle to use
    527 * @direct: direct or delayed pool
    528 * @fence: optional fence to return
    529 *
    530 * Close up a stream for HW test or if userspace failed to do so
    531 */
    532static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
    533				      bool direct, struct dma_fence **fence)
    534{
    535	const unsigned ib_size_dw = 1024;
    536	struct amdgpu_job *job;
    537	struct amdgpu_ib *ib;
    538	struct dma_fence *f = NULL;
    539	int i, r;
    540
    541	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
    542				     direct ? AMDGPU_IB_POOL_DIRECT :
    543				     AMDGPU_IB_POOL_DELAYED, &job);
    544	if (r)
    545		return r;
    546
    547	ib = &job->ibs[0];
    548
    549	/* stitch together an VCE destroy msg */
    550	ib->length_dw = 0;
    551	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
    552	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
    553	ib->ptr[ib->length_dw++] = handle;
    554
    555	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
    556	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
    557	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
    558	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
    559	ib->ptr[ib->length_dw++] = 0x00000000;
    560	ib->ptr[ib->length_dw++] = 0x00000000;
    561	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
    562	ib->ptr[ib->length_dw++] = 0x00000000;
    563
    564	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
    565	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
    566
    567	for (i = ib->length_dw; i < ib_size_dw; ++i)
    568		ib->ptr[i] = 0x0;
    569
    570	if (direct)
    571		r = amdgpu_job_submit_direct(job, ring, &f);
    572	else
    573		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
    574				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
    575	if (r)
    576		goto err;
    577
    578	if (fence)
    579		*fence = dma_fence_get(f);
    580	dma_fence_put(f);
    581	return 0;
    582
    583err:
    584	amdgpu_job_free(job);
    585	return r;
    586}
    587
    588/**
    589 * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
    590 *
    591 * @ib: indirect buffer to use
    592 * @lo: address of lower dword
    593 * @hi: address of higher dword
    594 * @size: minimum size
    595 * @index: bs/fb index
    596 *
    597 * Make sure that no BO cross a 4GB boundary.
    598 */
    599static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
    600				  struct amdgpu_ib *ib, int lo, int hi,
    601				  unsigned size, int32_t index)
    602{
    603	int64_t offset = ((uint64_t)size) * ((int64_t)index);
    604	struct ttm_operation_ctx ctx = { false, false };
    605	struct amdgpu_bo_va_mapping *mapping;
    606	unsigned i, fpfn, lpfn;
    607	struct amdgpu_bo *bo;
    608	uint64_t addr;
    609	int r;
    610
    611	addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
    612	       ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
    613	if (index >= 0) {
    614		addr += offset;
    615		fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
    616		lpfn = 0x100000000ULL >> PAGE_SHIFT;
    617	} else {
    618		fpfn = 0;
    619		lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
    620	}
    621
    622	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
    623	if (r) {
    624		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
    625			  addr, lo, hi, size, index);
    626		return r;
    627	}
    628
    629	for (i = 0; i < bo->placement.num_placement; ++i) {
    630		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
    631		bo->placements[i].lpfn = bo->placements[i].lpfn ?
    632			min(bo->placements[i].lpfn, lpfn) : lpfn;
    633	}
    634	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    635}
    636
    637
    638/**
    639 * amdgpu_vce_cs_reloc - command submission relocation
    640 *
    641 * @p: parser context
    642 * @ib: indirect buffer to use
    643 * @lo: address of lower dword
    644 * @hi: address of higher dword
    645 * @size: minimum size
    646 * @index: bs/fb index
    647 *
    648 * Patch relocation inside command stream with real buffer address
    649 */
    650static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
    651			       int lo, int hi, unsigned size, uint32_t index)
    652{
    653	struct amdgpu_bo_va_mapping *mapping;
    654	struct amdgpu_bo *bo;
    655	uint64_t addr;
    656	int r;
    657
    658	if (index == 0xffffffff)
    659		index = 0;
    660
    661	addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
    662	       ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
    663	addr += ((uint64_t)size) * ((uint64_t)index);
    664
    665	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
    666	if (r) {
    667		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
    668			  addr, lo, hi, size, index);
    669		return r;
    670	}
    671
    672	if ((addr + (uint64_t)size) >
    673	    (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
    674		DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
    675			  addr, lo, hi);
    676		return -EINVAL;
    677	}
    678
    679	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
    680	addr += amdgpu_bo_gpu_offset(bo);
    681	addr -= ((uint64_t)size) * ((uint64_t)index);
    682
    683	amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
    684	amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
    685
    686	return 0;
    687}
    688
    689/**
    690 * amdgpu_vce_validate_handle - validate stream handle
    691 *
    692 * @p: parser context
    693 * @handle: handle to validate
    694 * @allocated: allocated a new handle?
    695 *
    696 * Validates the handle and return the found session index or -EINVAL
    697 * we we don't have another free session index.
    698 */
    699static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
    700				      uint32_t handle, uint32_t *allocated)
    701{
    702	unsigned i;
    703
    704	/* validate the handle */
    705	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    706		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
    707			if (p->adev->vce.filp[i] != p->filp) {
    708				DRM_ERROR("VCE handle collision detected!\n");
    709				return -EINVAL;
    710			}
    711			return i;
    712		}
    713	}
    714
    715	/* handle not found try to alloc a new one */
    716	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
    717		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
    718			p->adev->vce.filp[i] = p->filp;
    719			p->adev->vce.img_size[i] = 0;
    720			*allocated |= 1 << i;
    721			return i;
    722		}
    723	}
    724
    725	DRM_ERROR("No more free VCE handles!\n");
    726	return -EINVAL;
    727}
    728
    729/**
    730 * amdgpu_vce_ring_parse_cs - parse and validate the command stream
    731 *
    732 * @p: parser context
    733 * @job: the job to parse
    734 * @ib: the IB to patch
    735 */
    736int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
    737			     struct amdgpu_job *job,
    738			     struct amdgpu_ib *ib)
    739{
    740	unsigned fb_idx = 0, bs_idx = 0;
    741	int session_idx = -1;
    742	uint32_t destroyed = 0;
    743	uint32_t created = 0;
    744	uint32_t allocated = 0;
    745	uint32_t tmp, handle = 0;
    746	uint32_t *size = &tmp;
    747	unsigned idx;
    748	int i, r = 0;
    749
    750	job->vm = NULL;
    751	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
    752
    753	for (idx = 0; idx < ib->length_dw;) {
    754		uint32_t len = amdgpu_ib_get_value(ib, idx);
    755		uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
    756
    757		if ((len < 8) || (len & 3)) {
    758			DRM_ERROR("invalid VCE command length (%d)!\n", len);
    759			r = -EINVAL;
    760			goto out;
    761		}
    762
    763		switch (cmd) {
    764		case 0x00000002: /* task info */
    765			fb_idx = amdgpu_ib_get_value(ib, idx + 6);
    766			bs_idx = amdgpu_ib_get_value(ib, idx + 7);
    767			break;
    768
    769		case 0x03000001: /* encode */
    770			r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
    771						   0, 0);
    772			if (r)
    773				goto out;
    774
    775			r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
    776						   0, 0);
    777			if (r)
    778				goto out;
    779			break;
    780
    781		case 0x05000001: /* context buffer */
    782			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
    783						   0, 0);
    784			if (r)
    785				goto out;
    786			break;
    787
    788		case 0x05000004: /* video bitstream buffer */
    789			tmp = amdgpu_ib_get_value(ib, idx + 4);
    790			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
    791						   tmp, bs_idx);
    792			if (r)
    793				goto out;
    794			break;
    795
    796		case 0x05000005: /* feedback buffer */
    797			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
    798						   4096, fb_idx);
    799			if (r)
    800				goto out;
    801			break;
    802
    803		case 0x0500000d: /* MV buffer */
    804			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
    805						   0, 0);
    806			if (r)
    807				goto out;
    808
    809			r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
    810						   0, 0);
    811			if (r)
    812				goto out;
    813			break;
    814		}
    815
    816		idx += len / 4;
    817	}
    818
    819	for (idx = 0; idx < ib->length_dw;) {
    820		uint32_t len = amdgpu_ib_get_value(ib, idx);
    821		uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
    822
    823		switch (cmd) {
    824		case 0x00000001: /* session */
    825			handle = amdgpu_ib_get_value(ib, idx + 2);
    826			session_idx = amdgpu_vce_validate_handle(p, handle,
    827								 &allocated);
    828			if (session_idx < 0) {
    829				r = session_idx;
    830				goto out;
    831			}
    832			size = &p->adev->vce.img_size[session_idx];
    833			break;
    834
    835		case 0x00000002: /* task info */
    836			fb_idx = amdgpu_ib_get_value(ib, idx + 6);
    837			bs_idx = amdgpu_ib_get_value(ib, idx + 7);
    838			break;
    839
    840		case 0x01000001: /* create */
    841			created |= 1 << session_idx;
    842			if (destroyed & (1 << session_idx)) {
    843				destroyed &= ~(1 << session_idx);
    844				allocated |= 1 << session_idx;
    845
    846			} else if (!(allocated & (1 << session_idx))) {
    847				DRM_ERROR("Handle already in use!\n");
    848				r = -EINVAL;
    849				goto out;
    850			}
    851
    852			*size = amdgpu_ib_get_value(ib, idx + 8) *
    853				amdgpu_ib_get_value(ib, idx + 10) *
    854				8 * 3 / 2;
    855			break;
    856
    857		case 0x04000001: /* config extension */
    858		case 0x04000002: /* pic control */
    859		case 0x04000005: /* rate control */
    860		case 0x04000007: /* motion estimation */
    861		case 0x04000008: /* rdo */
    862		case 0x04000009: /* vui */
    863		case 0x05000002: /* auxiliary buffer */
    864		case 0x05000009: /* clock table */
    865			break;
    866
    867		case 0x0500000c: /* hw config */
    868			switch (p->adev->asic_type) {
    869#ifdef CONFIG_DRM_AMDGPU_CIK
    870			case CHIP_KAVERI:
    871			case CHIP_MULLINS:
    872#endif
    873			case CHIP_CARRIZO:
    874				break;
    875			default:
    876				r = -EINVAL;
    877				goto out;
    878			}
    879			break;
    880
    881		case 0x03000001: /* encode */
    882			r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
    883						*size, 0);
    884			if (r)
    885				goto out;
    886
    887			r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
    888						*size / 3, 0);
    889			if (r)
    890				goto out;
    891			break;
    892
    893		case 0x02000001: /* destroy */
    894			destroyed |= 1 << session_idx;
    895			break;
    896
    897		case 0x05000001: /* context buffer */
    898			r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
    899						*size * 2, 0);
    900			if (r)
    901				goto out;
    902			break;
    903
    904		case 0x05000004: /* video bitstream buffer */
    905			tmp = amdgpu_ib_get_value(ib, idx + 4);
    906			r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
    907						tmp, bs_idx);
    908			if (r)
    909				goto out;
    910			break;
    911
    912		case 0x05000005: /* feedback buffer */
    913			r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
    914						4096, fb_idx);
    915			if (r)
    916				goto out;
    917			break;
    918
    919		case 0x0500000d: /* MV buffer */
    920			r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
    921						idx + 2, *size, 0);
    922			if (r)
    923				goto out;
    924
    925			r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
    926						idx + 7, *size / 12, 0);
    927			if (r)
    928				goto out;
    929			break;
    930
    931		default:
    932			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
    933			r = -EINVAL;
    934			goto out;
    935		}
    936
    937		if (session_idx == -1) {
    938			DRM_ERROR("no session command at start of IB\n");
    939			r = -EINVAL;
    940			goto out;
    941		}
    942
    943		idx += len / 4;
    944	}
    945
    946	if (allocated & ~created) {
    947		DRM_ERROR("New session without create command!\n");
    948		r = -ENOENT;
    949	}
    950
    951out:
    952	if (!r) {
    953		/* No error, free all destroyed handle slots */
    954		tmp = destroyed;
    955	} else {
    956		/* Error during parsing, free all allocated handle slots */
    957		tmp = allocated;
    958	}
    959
    960	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
    961		if (tmp & (1 << i))
    962			atomic_set(&p->adev->vce.handles[i], 0);
    963
    964	return r;
    965}
    966
    967/**
    968 * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
    969 *
    970 * @p: parser context
    971 * @job: the job to parse
    972 * @ib: the IB to patch
    973 */
    974int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
    975				struct amdgpu_job *job,
    976				struct amdgpu_ib *ib)
    977{
    978	int session_idx = -1;
    979	uint32_t destroyed = 0;
    980	uint32_t created = 0;
    981	uint32_t allocated = 0;
    982	uint32_t tmp, handle = 0;
    983	int i, r = 0, idx = 0;
    984
    985	while (idx < ib->length_dw) {
    986		uint32_t len = amdgpu_ib_get_value(ib, idx);
    987		uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
    988
    989		if ((len < 8) || (len & 3)) {
    990			DRM_ERROR("invalid VCE command length (%d)!\n", len);
    991			r = -EINVAL;
    992			goto out;
    993		}
    994
    995		switch (cmd) {
    996		case 0x00000001: /* session */
    997			handle = amdgpu_ib_get_value(ib, idx + 2);
    998			session_idx = amdgpu_vce_validate_handle(p, handle,
    999								 &allocated);
   1000			if (session_idx < 0) {
   1001				r = session_idx;
   1002				goto out;
   1003			}
   1004			break;
   1005
   1006		case 0x01000001: /* create */
   1007			created |= 1 << session_idx;
   1008			if (destroyed & (1 << session_idx)) {
   1009				destroyed &= ~(1 << session_idx);
   1010				allocated |= 1 << session_idx;
   1011
   1012			} else if (!(allocated & (1 << session_idx))) {
   1013				DRM_ERROR("Handle already in use!\n");
   1014				r = -EINVAL;
   1015				goto out;
   1016			}
   1017
   1018			break;
   1019
   1020		case 0x02000001: /* destroy */
   1021			destroyed |= 1 << session_idx;
   1022			break;
   1023
   1024		default:
   1025			break;
   1026		}
   1027
   1028		if (session_idx == -1) {
   1029			DRM_ERROR("no session command at start of IB\n");
   1030			r = -EINVAL;
   1031			goto out;
   1032		}
   1033
   1034		idx += len / 4;
   1035	}
   1036
   1037	if (allocated & ~created) {
   1038		DRM_ERROR("New session without create command!\n");
   1039		r = -ENOENT;
   1040	}
   1041
   1042out:
   1043	if (!r) {
   1044		/* No error, free all destroyed handle slots */
   1045		tmp = destroyed;
   1046		amdgpu_ib_free(p->adev, ib, NULL);
   1047	} else {
   1048		/* Error during parsing, free all allocated handle slots */
   1049		tmp = allocated;
   1050	}
   1051
   1052	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
   1053		if (tmp & (1 << i))
   1054			atomic_set(&p->adev->vce.handles[i], 0);
   1055
   1056	return r;
   1057}
   1058
   1059/**
   1060 * amdgpu_vce_ring_emit_ib - execute indirect buffer
   1061 *
   1062 * @ring: engine to use
   1063 * @job: job to retrieve vmid from
   1064 * @ib: the IB to execute
   1065 * @flags: unused
   1066 *
   1067 */
   1068void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
   1069				struct amdgpu_job *job,
   1070				struct amdgpu_ib *ib,
   1071				uint32_t flags)
   1072{
   1073	amdgpu_ring_write(ring, VCE_CMD_IB);
   1074	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
   1075	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
   1076	amdgpu_ring_write(ring, ib->length_dw);
   1077}
   1078
   1079/**
   1080 * amdgpu_vce_ring_emit_fence - add a fence command to the ring
   1081 *
   1082 * @ring: engine to use
   1083 * @addr: address
   1084 * @seq: sequence number
   1085 * @flags: fence related flags
   1086 *
   1087 */
   1088void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
   1089				unsigned flags)
   1090{
   1091	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
   1092
   1093	amdgpu_ring_write(ring, VCE_CMD_FENCE);
   1094	amdgpu_ring_write(ring, addr);
   1095	amdgpu_ring_write(ring, upper_32_bits(addr));
   1096	amdgpu_ring_write(ring, seq);
   1097	amdgpu_ring_write(ring, VCE_CMD_TRAP);
   1098	amdgpu_ring_write(ring, VCE_CMD_END);
   1099}
   1100
   1101/**
   1102 * amdgpu_vce_ring_test_ring - test if VCE ring is working
   1103 *
   1104 * @ring: the engine to test on
   1105 *
   1106 */
   1107int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
   1108{
   1109	struct amdgpu_device *adev = ring->adev;
   1110	uint32_t rptr;
   1111	unsigned i;
   1112	int r, timeout = adev->usec_timeout;
   1113
   1114	/* skip ring test for sriov*/
   1115	if (amdgpu_sriov_vf(adev))
   1116		return 0;
   1117
   1118	r = amdgpu_ring_alloc(ring, 16);
   1119	if (r)
   1120		return r;
   1121
   1122	rptr = amdgpu_ring_get_rptr(ring);
   1123
   1124	amdgpu_ring_write(ring, VCE_CMD_END);
   1125	amdgpu_ring_commit(ring);
   1126
   1127	for (i = 0; i < timeout; i++) {
   1128		if (amdgpu_ring_get_rptr(ring) != rptr)
   1129			break;
   1130		udelay(1);
   1131	}
   1132
   1133	if (i >= timeout)
   1134		r = -ETIMEDOUT;
   1135
   1136	return r;
   1137}
   1138
   1139/**
   1140 * amdgpu_vce_ring_test_ib - test if VCE IBs are working
   1141 *
   1142 * @ring: the engine to test on
   1143 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
   1144 *
   1145 */
   1146int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
   1147{
   1148	struct dma_fence *fence = NULL;
   1149	long r;
   1150
   1151	/* skip vce ring1/2 ib test for now, since it's not reliable */
   1152	if (ring != &ring->adev->vce.ring[0])
   1153		return 0;
   1154
   1155	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
   1156	if (r)
   1157		goto error;
   1158
   1159	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
   1160	if (r)
   1161		goto error;
   1162
   1163	r = dma_fence_wait_timeout(fence, false, timeout);
   1164	if (r == 0)
   1165		r = -ETIMEDOUT;
   1166	else if (r > 0)
   1167		r = 0;
   1168
   1169error:
   1170	dma_fence_put(fence);
   1171	return r;
   1172}
   1173
   1174enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
   1175{
   1176	switch(ring) {
   1177	case 0:
   1178		return AMDGPU_RING_PRIO_0;
   1179	case 1:
   1180		return AMDGPU_RING_PRIO_1;
   1181	case 2:
   1182		return AMDGPU_RING_PRIO_2;
   1183	default:
   1184		return AMDGPU_RING_PRIO_0;
   1185	}
   1186}