amdgpu_uvd.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
amdgpu_uvd.c (34874B)
      1/*
      2 * Copyright 2011 Advanced Micro Devices, Inc.
      3 * All Rights Reserved.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the
      7 * "Software"), to deal in the Software without restriction, including
      8 * without limitation the rights to use, copy, modify, merge, publish,
      9 * distribute, sub license, and/or sell copies of the Software, and to
     10 * permit persons to whom the Software is furnished to do so, subject to
     11 * the following conditions:
     12 *
     13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20 *
     21 * The above copyright notice and this permission notice (including the
     22 * next paragraph) shall be included in all copies or substantial portions
     23 * of the Software.
     24 *
     25 */
     26/*
     27 * Authors:
     28 *    Christian König <deathsimple@vodafone.de>
     29 */
     30
     31#include <linux/firmware.h>
     32#include <linux/module.h>
     33
     34#include <drm/drm.h>
     35#include <drm/drm_drv.h>
     36
     37#include "amdgpu.h"
     38#include "amdgpu_pm.h"
     39#include "amdgpu_uvd.h"
     40#include "amdgpu_cs.h"
     41#include "cikd.h"
     42#include "uvd/uvd_4_2_d.h"
     43
     44#include "amdgpu_ras.h"
     45
     46/* 1 second timeout */
     47#define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
     48
     49/* Firmware versions for VI */
     50#define FW_1_65_10	((1 << 24) | (65 << 16) | (10 << 8))
     51#define FW_1_87_11	((1 << 24) | (87 << 16) | (11 << 8))
     52#define FW_1_87_12	((1 << 24) | (87 << 16) | (12 << 8))
     53#define FW_1_37_15	((1 << 24) | (37 << 16) | (15 << 8))
     54
     55/* Polaris10/11 firmware version */
     56#define FW_1_66_16	((1 << 24) | (66 << 16) | (16 << 8))
     57
     58/* Firmware Names */
     59#ifdef CONFIG_DRM_AMDGPU_SI
     60#define FIRMWARE_TAHITI		"amdgpu/tahiti_uvd.bin"
     61#define FIRMWARE_VERDE		"amdgpu/verde_uvd.bin"
     62#define FIRMWARE_PITCAIRN	"amdgpu/pitcairn_uvd.bin"
     63#define FIRMWARE_OLAND		"amdgpu/oland_uvd.bin"
     64#endif
     65#ifdef CONFIG_DRM_AMDGPU_CIK
     66#define FIRMWARE_BONAIRE	"amdgpu/bonaire_uvd.bin"
     67#define FIRMWARE_KABINI	"amdgpu/kabini_uvd.bin"
     68#define FIRMWARE_KAVERI	"amdgpu/kaveri_uvd.bin"
     69#define FIRMWARE_HAWAII	"amdgpu/hawaii_uvd.bin"
     70#define FIRMWARE_MULLINS	"amdgpu/mullins_uvd.bin"
     71#endif
     72#define FIRMWARE_TONGA		"amdgpu/tonga_uvd.bin"
     73#define FIRMWARE_CARRIZO	"amdgpu/carrizo_uvd.bin"
     74#define FIRMWARE_FIJI		"amdgpu/fiji_uvd.bin"
     75#define FIRMWARE_STONEY		"amdgpu/stoney_uvd.bin"
     76#define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
     77#define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
     78#define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
     79#define FIRMWARE_VEGAM		"amdgpu/vegam_uvd.bin"
     80
     81#define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
     82#define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
     83#define FIRMWARE_VEGA20		"amdgpu/vega20_uvd.bin"
     84
     85/* These are common relative offsets for all asics, from uvd_7_0_offset.h,  */
     86#define UVD_GPCOM_VCPU_CMD		0x03c3
     87#define UVD_GPCOM_VCPU_DATA0	0x03c4
     88#define UVD_GPCOM_VCPU_DATA1	0x03c5
     89#define UVD_NO_OP				0x03ff
     90#define UVD_BASE_SI				0x3800
     91
     92/*
     93 * amdgpu_uvd_cs_ctx - Command submission parser context
     94 *
     95 * Used for emulating virtual memory support on UVD 4.2.
     96 */
     97struct amdgpu_uvd_cs_ctx {
     98	struct amdgpu_cs_parser *parser;
     99	unsigned reg, count;
    100	unsigned data0, data1;
    101	unsigned idx;
    102	struct amdgpu_ib *ib;
    103
    104	/* does the IB has a msg command */
    105	bool has_msg_cmd;
    106
    107	/* minimum buffer sizes */
    108	unsigned *buf_sizes;
    109};
    110
    111#ifdef CONFIG_DRM_AMDGPU_SI
    112MODULE_FIRMWARE(FIRMWARE_TAHITI);
    113MODULE_FIRMWARE(FIRMWARE_VERDE);
    114MODULE_FIRMWARE(FIRMWARE_PITCAIRN);
    115MODULE_FIRMWARE(FIRMWARE_OLAND);
    116#endif
    117#ifdef CONFIG_DRM_AMDGPU_CIK
    118MODULE_FIRMWARE(FIRMWARE_BONAIRE);
    119MODULE_FIRMWARE(FIRMWARE_KABINI);
    120MODULE_FIRMWARE(FIRMWARE_KAVERI);
    121MODULE_FIRMWARE(FIRMWARE_HAWAII);
    122MODULE_FIRMWARE(FIRMWARE_MULLINS);
    123#endif
    124MODULE_FIRMWARE(FIRMWARE_TONGA);
    125MODULE_FIRMWARE(FIRMWARE_CARRIZO);
    126MODULE_FIRMWARE(FIRMWARE_FIJI);
    127MODULE_FIRMWARE(FIRMWARE_STONEY);
    128MODULE_FIRMWARE(FIRMWARE_POLARIS10);
    129MODULE_FIRMWARE(FIRMWARE_POLARIS11);
    130MODULE_FIRMWARE(FIRMWARE_POLARIS12);
    131MODULE_FIRMWARE(FIRMWARE_VEGAM);
    132
    133MODULE_FIRMWARE(FIRMWARE_VEGA10);
    134MODULE_FIRMWARE(FIRMWARE_VEGA12);
    135MODULE_FIRMWARE(FIRMWARE_VEGA20);
    136
    137static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
    138static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
    139
    140static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
    141					   uint32_t size,
    142					   struct amdgpu_bo **bo_ptr)
    143{
    144	struct ttm_operation_ctx ctx = { true, false };
    145	struct amdgpu_bo *bo = NULL;
    146	void *addr;
    147	int r;
    148
    149	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
    150				      AMDGPU_GEM_DOMAIN_GTT,
    151				      &bo, NULL, &addr);
    152	if (r)
    153		return r;
    154
    155	if (adev->uvd.address_64_bit)
    156		goto succ;
    157
    158	amdgpu_bo_kunmap(bo);
    159	amdgpu_bo_unpin(bo);
    160	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
    161	amdgpu_uvd_force_into_uvd_segment(bo);
    162	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    163	if (r)
    164		goto err;
    165	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
    166	if (r)
    167		goto err_pin;
    168	r = amdgpu_bo_kmap(bo, &addr);
    169	if (r)
    170		goto err_kmap;
    171succ:
    172	amdgpu_bo_unreserve(bo);
    173	*bo_ptr = bo;
    174	return 0;
    175err_kmap:
    176	amdgpu_bo_unpin(bo);
    177err_pin:
    178err:
    179	amdgpu_bo_unreserve(bo);
    180	amdgpu_bo_unref(&bo);
    181	return r;
    182}
    183
    184int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
    185{
    186	unsigned long bo_size;
    187	const char *fw_name;
    188	const struct common_firmware_header *hdr;
    189	unsigned family_id;
    190	int i, j, r;
    191
    192	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
    193
    194	switch (adev->asic_type) {
    195#ifdef CONFIG_DRM_AMDGPU_SI
    196	case CHIP_TAHITI:
    197		fw_name = FIRMWARE_TAHITI;
    198		break;
    199	case CHIP_VERDE:
    200		fw_name = FIRMWARE_VERDE;
    201		break;
    202	case CHIP_PITCAIRN:
    203		fw_name = FIRMWARE_PITCAIRN;
    204		break;
    205	case CHIP_OLAND:
    206		fw_name = FIRMWARE_OLAND;
    207		break;
    208#endif
    209#ifdef CONFIG_DRM_AMDGPU_CIK
    210	case CHIP_BONAIRE:
    211		fw_name = FIRMWARE_BONAIRE;
    212		break;
    213	case CHIP_KABINI:
    214		fw_name = FIRMWARE_KABINI;
    215		break;
    216	case CHIP_KAVERI:
    217		fw_name = FIRMWARE_KAVERI;
    218		break;
    219	case CHIP_HAWAII:
    220		fw_name = FIRMWARE_HAWAII;
    221		break;
    222	case CHIP_MULLINS:
    223		fw_name = FIRMWARE_MULLINS;
    224		break;
    225#endif
    226	case CHIP_TONGA:
    227		fw_name = FIRMWARE_TONGA;
    228		break;
    229	case CHIP_FIJI:
    230		fw_name = FIRMWARE_FIJI;
    231		break;
    232	case CHIP_CARRIZO:
    233		fw_name = FIRMWARE_CARRIZO;
    234		break;
    235	case CHIP_STONEY:
    236		fw_name = FIRMWARE_STONEY;
    237		break;
    238	case CHIP_POLARIS10:
    239		fw_name = FIRMWARE_POLARIS10;
    240		break;
    241	case CHIP_POLARIS11:
    242		fw_name = FIRMWARE_POLARIS11;
    243		break;
    244	case CHIP_POLARIS12:
    245		fw_name = FIRMWARE_POLARIS12;
    246		break;
    247	case CHIP_VEGA10:
    248		fw_name = FIRMWARE_VEGA10;
    249		break;
    250	case CHIP_VEGA12:
    251		fw_name = FIRMWARE_VEGA12;
    252		break;
    253	case CHIP_VEGAM:
    254		fw_name = FIRMWARE_VEGAM;
    255		break;
    256	case CHIP_VEGA20:
    257		fw_name = FIRMWARE_VEGA20;
    258		break;
    259	default:
    260		return -EINVAL;
    261	}
    262
    263	r = request_firmware(&adev->uvd.fw, fw_name, adev->dev);
    264	if (r) {
    265		dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n",
    266			fw_name);
    267		return r;
    268	}
    269
    270	r = amdgpu_ucode_validate(adev->uvd.fw);
    271	if (r) {
    272		dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
    273			fw_name);
    274		release_firmware(adev->uvd.fw);
    275		adev->uvd.fw = NULL;
    276		return r;
    277	}
    278
    279	/* Set the default UVD handles that the firmware can handle */
    280	adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
    281
    282	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
    283	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
    284
    285	if (adev->asic_type < CHIP_VEGA20) {
    286		unsigned version_major, version_minor;
    287
    288		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
    289		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
    290		DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
    291			version_major, version_minor, family_id);
    292
    293		/*
    294		 * Limit the number of UVD handles depending on microcode major
    295		 * and minor versions. The firmware version which has 40 UVD
    296		 * instances support is 1.80. So all subsequent versions should
    297		 * also have the same support.
    298		 */
    299		if ((version_major > 0x01) ||
    300		    ((version_major == 0x01) && (version_minor >= 0x50)))
    301			adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
    302
    303		adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
    304					(family_id << 8));
    305
    306		if ((adev->asic_type == CHIP_POLARIS10 ||
    307		     adev->asic_type == CHIP_POLARIS11) &&
    308		    (adev->uvd.fw_version < FW_1_66_16))
    309			DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n",
    310				  version_major, version_minor);
    311	} else {
    312		unsigned int enc_major, enc_minor, dec_minor;
    313
    314		dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
    315		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
    316		enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
    317		DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
    318			enc_major, enc_minor, dec_minor, family_id);
    319
    320		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
    321
    322		adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
    323	}
    324
    325	bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
    326		  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
    327	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
    328		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
    329
    330	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
    331		if (adev->uvd.harvest_config & (1 << j))
    332			continue;
    333		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
    334					    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
    335					    &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
    336		if (r) {
    337			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
    338			return r;
    339		}
    340	}
    341
    342	for (i = 0; i < adev->uvd.max_handles; ++i) {
    343		atomic_set(&adev->uvd.handles[i], 0);
    344		adev->uvd.filp[i] = NULL;
    345	}
    346
    347	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
    348	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
    349		adev->uvd.address_64_bit = true;
    350
    351	r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
    352	if (r)
    353		return r;
    354
    355	switch (adev->asic_type) {
    356	case CHIP_TONGA:
    357		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
    358		break;
    359	case CHIP_CARRIZO:
    360		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
    361		break;
    362	case CHIP_FIJI:
    363		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
    364		break;
    365	case CHIP_STONEY:
    366		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
    367		break;
    368	default:
    369		adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
    370	}
    371
    372	return 0;
    373}
    374
    375int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
    376{
    377	void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
    378	int i, j;
    379
    380	drm_sched_entity_destroy(&adev->uvd.entity);
    381
    382	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
    383		if (adev->uvd.harvest_config & (1 << j))
    384			continue;
    385		kvfree(adev->uvd.inst[j].saved_bo);
    386
    387		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
    388				      &adev->uvd.inst[j].gpu_addr,
    389				      (void **)&adev->uvd.inst[j].cpu_addr);
    390
    391		amdgpu_ring_fini(&adev->uvd.inst[j].ring);
    392
    393		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
    394			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
    395	}
    396	amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr);
    397	release_firmware(adev->uvd.fw);
    398
    399	return 0;
    400}
    401
    402/**
    403 * amdgpu_uvd_entity_init - init entity
    404 *
    405 * @adev: amdgpu_device pointer
    406 *
    407 */
    408int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
    409{
    410	struct amdgpu_ring *ring;
    411	struct drm_gpu_scheduler *sched;
    412	int r;
    413
    414	ring = &adev->uvd.inst[0].ring;
    415	sched = &ring->sched;
    416	r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
    417				  &sched, 1, NULL);
    418	if (r) {
    419		DRM_ERROR("Failed setting up UVD kernel entity.\n");
    420		return r;
    421	}
    422
    423	return 0;
    424}
    425
    426int amdgpu_uvd_suspend(struct amdgpu_device *adev)
    427{
    428	unsigned size;
    429	void *ptr;
    430	int i, j, idx;
    431	bool in_ras_intr = amdgpu_ras_intr_triggered();
    432
    433	cancel_delayed_work_sync(&adev->uvd.idle_work);
    434
    435	/* only valid for physical mode */
    436	if (adev->asic_type < CHIP_POLARIS10) {
    437		for (i = 0; i < adev->uvd.max_handles; ++i)
    438			if (atomic_read(&adev->uvd.handles[i]))
    439				break;
    440
    441		if (i == adev->uvd.max_handles)
    442			return 0;
    443	}
    444
    445	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
    446		if (adev->uvd.harvest_config & (1 << j))
    447			continue;
    448		if (adev->uvd.inst[j].vcpu_bo == NULL)
    449			continue;
    450
    451		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
    452		ptr = adev->uvd.inst[j].cpu_addr;
    453
    454		adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
    455		if (!adev->uvd.inst[j].saved_bo)
    456			return -ENOMEM;
    457
    458		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
    459			/* re-write 0 since err_event_athub will corrupt VCPU buffer */
    460			if (in_ras_intr)
    461				memset(adev->uvd.inst[j].saved_bo, 0, size);
    462			else
    463				memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
    464
    465			drm_dev_exit(idx);
    466		}
    467	}
    468
    469	if (in_ras_intr)
    470		DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
    471
    472	return 0;
    473}
    474
    475int amdgpu_uvd_resume(struct amdgpu_device *adev)
    476{
    477	unsigned size;
    478	void *ptr;
    479	int i, idx;
    480
    481	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
    482		if (adev->uvd.harvest_config & (1 << i))
    483			continue;
    484		if (adev->uvd.inst[i].vcpu_bo == NULL)
    485			return -EINVAL;
    486
    487		size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
    488		ptr = adev->uvd.inst[i].cpu_addr;
    489
    490		if (adev->uvd.inst[i].saved_bo != NULL) {
    491			if (drm_dev_enter(adev_to_drm(adev), &idx)) {
    492				memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
    493				drm_dev_exit(idx);
    494			}
    495			kvfree(adev->uvd.inst[i].saved_bo);
    496			adev->uvd.inst[i].saved_bo = NULL;
    497		} else {
    498			const struct common_firmware_header *hdr;
    499			unsigned offset;
    500
    501			hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
    502			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
    503				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
    504				if (drm_dev_enter(adev_to_drm(adev), &idx)) {
    505					memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
    506						    le32_to_cpu(hdr->ucode_size_bytes));
    507					drm_dev_exit(idx);
    508				}
    509				size -= le32_to_cpu(hdr->ucode_size_bytes);
    510				ptr += le32_to_cpu(hdr->ucode_size_bytes);
    511			}
    512			memset_io(ptr, 0, size);
    513			/* to restore uvd fence seq */
    514			amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
    515		}
    516	}
    517	return 0;
    518}
    519
    520void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
    521{
    522	struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
    523	int i, r;
    524
    525	for (i = 0; i < adev->uvd.max_handles; ++i) {
    526		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
    527
    528		if (handle != 0 && adev->uvd.filp[i] == filp) {
    529			struct dma_fence *fence;
    530
    531			r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
    532						       &fence);
    533			if (r) {
    534				DRM_ERROR("Error destroying UVD %d!\n", r);
    535				continue;
    536			}
    537
    538			dma_fence_wait(fence, false);
    539			dma_fence_put(fence);
    540
    541			adev->uvd.filp[i] = NULL;
    542			atomic_set(&adev->uvd.handles[i], 0);
    543		}
    544	}
    545}
    546
    547static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
    548{
    549	int i;
    550	for (i = 0; i < abo->placement.num_placement; ++i) {
    551		abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
    552		abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
    553	}
    554}
    555
    556static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
    557{
    558	uint32_t lo, hi;
    559	uint64_t addr;
    560
    561	lo = amdgpu_ib_get_value(ctx->ib, ctx->data0);
    562	hi = amdgpu_ib_get_value(ctx->ib, ctx->data1);
    563	addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
    564
    565	return addr;
    566}
    567
    568/**
    569 * amdgpu_uvd_cs_pass1 - first parsing round
    570 *
    571 * @ctx: UVD parser context
    572 *
    573 * Make sure UVD message and feedback buffers are in VRAM and
    574 * nobody is violating an 256MB boundary.
    575 */
    576static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
    577{
    578	struct ttm_operation_ctx tctx = { false, false };
    579	struct amdgpu_bo_va_mapping *mapping;
    580	struct amdgpu_bo *bo;
    581	uint32_t cmd;
    582	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
    583	int r = 0;
    584
    585	r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
    586	if (r) {
    587		DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
    588		return r;
    589	}
    590
    591	if (!ctx->parser->adev->uvd.address_64_bit) {
    592		/* check if it's a message or feedback command */
    593		cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
    594		if (cmd == 0x0 || cmd == 0x3) {
    595			/* yes, force it into VRAM */
    596			uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
    597			amdgpu_bo_placement_from_domain(bo, domain);
    598		}
    599		amdgpu_uvd_force_into_uvd_segment(bo);
    600
    601		r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
    602	}
    603
    604	return r;
    605}
    606
    607/**
    608 * amdgpu_uvd_cs_msg_decode - handle UVD decode message
    609 *
    610 * @adev: amdgpu_device pointer
    611 * @msg: pointer to message structure
    612 * @buf_sizes: placeholder to put the different buffer lengths
    613 *
    614 * Peek into the decode message and calculate the necessary buffer sizes.
    615 */
    616static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
    617	unsigned buf_sizes[])
    618{
    619	unsigned stream_type = msg[4];
    620	unsigned width = msg[6];
    621	unsigned height = msg[7];
    622	unsigned dpb_size = msg[9];
    623	unsigned pitch = msg[28];
    624	unsigned level = msg[57];
    625
    626	unsigned width_in_mb = width / 16;
    627	unsigned height_in_mb = ALIGN(height / 16, 2);
    628	unsigned fs_in_mb = width_in_mb * height_in_mb;
    629
    630	unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
    631	unsigned min_ctx_size = ~0;
    632
    633	image_size = width * height;
    634	image_size += image_size / 2;
    635	image_size = ALIGN(image_size, 1024);
    636
    637	switch (stream_type) {
    638	case 0: /* H264 */
    639		switch(level) {
    640		case 30:
    641			num_dpb_buffer = 8100 / fs_in_mb;
    642			break;
    643		case 31:
    644			num_dpb_buffer = 18000 / fs_in_mb;
    645			break;
    646		case 32:
    647			num_dpb_buffer = 20480 / fs_in_mb;
    648			break;
    649		case 41:
    650			num_dpb_buffer = 32768 / fs_in_mb;
    651			break;
    652		case 42:
    653			num_dpb_buffer = 34816 / fs_in_mb;
    654			break;
    655		case 50:
    656			num_dpb_buffer = 110400 / fs_in_mb;
    657			break;
    658		case 51:
    659			num_dpb_buffer = 184320 / fs_in_mb;
    660			break;
    661		default:
    662			num_dpb_buffer = 184320 / fs_in_mb;
    663			break;
    664		}
    665		num_dpb_buffer++;
    666		if (num_dpb_buffer > 17)
    667			num_dpb_buffer = 17;
    668
    669		/* reference picture buffer */
    670		min_dpb_size = image_size * num_dpb_buffer;
    671
    672		/* macroblock context buffer */
    673		min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192;
    674
    675		/* IT surface buffer */
    676		min_dpb_size += width_in_mb * height_in_mb * 32;
    677		break;
    678
    679	case 1: /* VC1 */
    680
    681		/* reference picture buffer */
    682		min_dpb_size = image_size * 3;
    683
    684		/* CONTEXT_BUFFER */
    685		min_dpb_size += width_in_mb * height_in_mb * 128;
    686
    687		/* IT surface buffer */
    688		min_dpb_size += width_in_mb * 64;
    689
    690		/* DB surface buffer */
    691		min_dpb_size += width_in_mb * 128;
    692
    693		/* BP */
    694		tmp = max(width_in_mb, height_in_mb);
    695		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
    696		break;
    697
    698	case 3: /* MPEG2 */
    699
    700		/* reference picture buffer */
    701		min_dpb_size = image_size * 3;
    702		break;
    703
    704	case 4: /* MPEG4 */
    705
    706		/* reference picture buffer */
    707		min_dpb_size = image_size * 3;
    708
    709		/* CM */
    710		min_dpb_size += width_in_mb * height_in_mb * 64;
    711
    712		/* IT surface buffer */
    713		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
    714		break;
    715
    716	case 7: /* H264 Perf */
    717		switch(level) {
    718		case 30:
    719			num_dpb_buffer = 8100 / fs_in_mb;
    720			break;
    721		case 31:
    722			num_dpb_buffer = 18000 / fs_in_mb;
    723			break;
    724		case 32:
    725			num_dpb_buffer = 20480 / fs_in_mb;
    726			break;
    727		case 41:
    728			num_dpb_buffer = 32768 / fs_in_mb;
    729			break;
    730		case 42:
    731			num_dpb_buffer = 34816 / fs_in_mb;
    732			break;
    733		case 50:
    734			num_dpb_buffer = 110400 / fs_in_mb;
    735			break;
    736		case 51:
    737			num_dpb_buffer = 184320 / fs_in_mb;
    738			break;
    739		default:
    740			num_dpb_buffer = 184320 / fs_in_mb;
    741			break;
    742		}
    743		num_dpb_buffer++;
    744		if (num_dpb_buffer > 17)
    745			num_dpb_buffer = 17;
    746
    747		/* reference picture buffer */
    748		min_dpb_size = image_size * num_dpb_buffer;
    749
    750		if (!adev->uvd.use_ctx_buf){
    751			/* macroblock context buffer */
    752			min_dpb_size +=
    753				width_in_mb * height_in_mb * num_dpb_buffer * 192;
    754
    755			/* IT surface buffer */
    756			min_dpb_size += width_in_mb * height_in_mb * 32;
    757		} else {
    758			/* macroblock context buffer */
    759			min_ctx_size =
    760				width_in_mb * height_in_mb * num_dpb_buffer * 192;
    761		}
    762		break;
    763
    764	case 8: /* MJPEG */
    765		min_dpb_size = 0;
    766		break;
    767
    768	case 16: /* H265 */
    769		image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
    770		image_size = ALIGN(image_size, 256);
    771
    772		num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2;
    773		min_dpb_size = image_size * num_dpb_buffer;
    774		min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16)
    775					   * 16 * num_dpb_buffer + 52 * 1024;
    776		break;
    777
    778	default:
    779		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
    780		return -EINVAL;
    781	}
    782
    783	if (width > pitch) {
    784		DRM_ERROR("Invalid UVD decoding target pitch!\n");
    785		return -EINVAL;
    786	}
    787
    788	if (dpb_size < min_dpb_size) {
    789		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
    790			  dpb_size, min_dpb_size);
    791		return -EINVAL;
    792	}
    793
    794	buf_sizes[0x1] = dpb_size;
    795	buf_sizes[0x2] = image_size;
    796	buf_sizes[0x4] = min_ctx_size;
    797	/* store image width to adjust nb memory pstate */
    798	adev->uvd.decode_image_width = width;
    799	return 0;
    800}
    801
    802/**
    803 * amdgpu_uvd_cs_msg - handle UVD message
    804 *
    805 * @ctx: UVD parser context
    806 * @bo: buffer object containing the message
    807 * @offset: offset into the buffer object
    808 *
    809 * Peek into the UVD message and extract the session id.
    810 * Make sure that we don't open up to many sessions.
    811 */
    812static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
    813			     struct amdgpu_bo *bo, unsigned offset)
    814{
    815	struct amdgpu_device *adev = ctx->parser->adev;
    816	int32_t *msg, msg_type, handle;
    817	void *ptr;
    818	long r;
    819	int i;
    820
    821	if (offset & 0x3F) {
    822		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
    823		return -EINVAL;
    824	}
    825
    826	r = amdgpu_bo_kmap(bo, &ptr);
    827	if (r) {
    828		DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
    829		return r;
    830	}
    831
    832	msg = ptr + offset;
    833
    834	msg_type = msg[1];
    835	handle = msg[2];
    836
    837	if (handle == 0) {
    838		amdgpu_bo_kunmap(bo);
    839		DRM_ERROR("Invalid UVD handle!\n");
    840		return -EINVAL;
    841	}
    842
    843	switch (msg_type) {
    844	case 0:
    845		/* it's a create msg, calc image size (width * height) */
    846		amdgpu_bo_kunmap(bo);
    847
    848		/* try to alloc a new handle */
    849		for (i = 0; i < adev->uvd.max_handles; ++i) {
    850			if (atomic_read(&adev->uvd.handles[i]) == handle) {
    851				DRM_ERROR(")Handle 0x%x already in use!\n",
    852					  handle);
    853				return -EINVAL;
    854			}
    855
    856			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
    857				adev->uvd.filp[i] = ctx->parser->filp;
    858				return 0;
    859			}
    860		}
    861
    862		DRM_ERROR("No more free UVD handles!\n");
    863		return -ENOSPC;
    864
    865	case 1:
    866		/* it's a decode msg, calc buffer sizes */
    867		r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
    868		amdgpu_bo_kunmap(bo);
    869		if (r)
    870			return r;
    871
    872		/* validate the handle */
    873		for (i = 0; i < adev->uvd.max_handles; ++i) {
    874			if (atomic_read(&adev->uvd.handles[i]) == handle) {
    875				if (adev->uvd.filp[i] != ctx->parser->filp) {
    876					DRM_ERROR("UVD handle collision detected!\n");
    877					return -EINVAL;
    878				}
    879				return 0;
    880			}
    881		}
    882
    883		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
    884		return -ENOENT;
    885
    886	case 2:
    887		/* it's a destroy msg, free the handle */
    888		for (i = 0; i < adev->uvd.max_handles; ++i)
    889			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
    890		amdgpu_bo_kunmap(bo);
    891		return 0;
    892
    893	default:
    894		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
    895	}
    896
    897	amdgpu_bo_kunmap(bo);
    898	return -EINVAL;
    899}
    900
    901/**
    902 * amdgpu_uvd_cs_pass2 - second parsing round
    903 *
    904 * @ctx: UVD parser context
    905 *
    906 * Patch buffer addresses, make sure buffer sizes are correct.
    907 */
    908static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
    909{
    910	struct amdgpu_bo_va_mapping *mapping;
    911	struct amdgpu_bo *bo;
    912	uint32_t cmd;
    913	uint64_t start, end;
    914	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
    915	int r;
    916
    917	r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
    918	if (r) {
    919		DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
    920		return r;
    921	}
    922
    923	start = amdgpu_bo_gpu_offset(bo);
    924
    925	end = (mapping->last + 1 - mapping->start);
    926	end = end * AMDGPU_GPU_PAGE_SIZE + start;
    927
    928	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
    929	start += addr;
    930
    931	amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start));
    932	amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start));
    933
    934	cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
    935	if (cmd < 0x4) {
    936		if ((end - start) < ctx->buf_sizes[cmd]) {
    937			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
    938				  (unsigned)(end - start),
    939				  ctx->buf_sizes[cmd]);
    940			return -EINVAL;
    941		}
    942
    943	} else if (cmd == 0x206) {
    944		if ((end - start) < ctx->buf_sizes[4]) {
    945			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
    946					  (unsigned)(end - start),
    947					  ctx->buf_sizes[4]);
    948			return -EINVAL;
    949		}
    950	} else if ((cmd != 0x100) && (cmd != 0x204)) {
    951		DRM_ERROR("invalid UVD command %X!\n", cmd);
    952		return -EINVAL;
    953	}
    954
    955	if (!ctx->parser->adev->uvd.address_64_bit) {
    956		if ((start >> 28) != ((end - 1) >> 28)) {
    957			DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
    958				  start, end);
    959			return -EINVAL;
    960		}
    961
    962		if ((cmd == 0 || cmd == 0x3) &&
    963		    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
    964			DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
    965				  start, end);
    966			return -EINVAL;
    967		}
    968	}
    969
    970	if (cmd == 0) {
    971		ctx->has_msg_cmd = true;
    972		r = amdgpu_uvd_cs_msg(ctx, bo, addr);
    973		if (r)
    974			return r;
    975	} else if (!ctx->has_msg_cmd) {
    976		DRM_ERROR("Message needed before other commands are send!\n");
    977		return -EINVAL;
    978	}
    979
    980	return 0;
    981}
    982
    983/**
    984 * amdgpu_uvd_cs_reg - parse register writes
    985 *
    986 * @ctx: UVD parser context
    987 * @cb: callback function
    988 *
    989 * Parse the register writes, call cb on each complete command.
    990 */
    991static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
    992			     int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
    993{
    994	int i, r;
    995
    996	ctx->idx++;
    997	for (i = 0; i <= ctx->count; ++i) {
    998		unsigned reg = ctx->reg + i;
    999
   1000		if (ctx->idx >= ctx->ib->length_dw) {
   1001			DRM_ERROR("Register command after end of CS!\n");
   1002			return -EINVAL;
   1003		}
   1004
   1005		switch (reg) {
   1006		case mmUVD_GPCOM_VCPU_DATA0:
   1007			ctx->data0 = ctx->idx;
   1008			break;
   1009		case mmUVD_GPCOM_VCPU_DATA1:
   1010			ctx->data1 = ctx->idx;
   1011			break;
   1012		case mmUVD_GPCOM_VCPU_CMD:
   1013			r = cb(ctx);
   1014			if (r)
   1015				return r;
   1016			break;
   1017		case mmUVD_ENGINE_CNTL:
   1018		case mmUVD_NO_OP:
   1019			break;
   1020		default:
   1021			DRM_ERROR("Invalid reg 0x%X!\n", reg);
   1022			return -EINVAL;
   1023		}
   1024		ctx->idx++;
   1025	}
   1026	return 0;
   1027}
   1028
   1029/**
   1030 * amdgpu_uvd_cs_packets - parse UVD packets
   1031 *
   1032 * @ctx: UVD parser context
   1033 * @cb: callback function
   1034 *
   1035 * Parse the command stream packets.
   1036 */
   1037static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
   1038				 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
   1039{
   1040	int r;
   1041
   1042	for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
   1043		uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
   1044		unsigned type = CP_PACKET_GET_TYPE(cmd);
   1045		switch (type) {
   1046		case PACKET_TYPE0:
   1047			ctx->reg = CP_PACKET0_GET_REG(cmd);
   1048			ctx->count = CP_PACKET_GET_COUNT(cmd);
   1049			r = amdgpu_uvd_cs_reg(ctx, cb);
   1050			if (r)
   1051				return r;
   1052			break;
   1053		case PACKET_TYPE2:
   1054			++ctx->idx;
   1055			break;
   1056		default:
   1057			DRM_ERROR("Unknown packet type %d !\n", type);
   1058			return -EINVAL;
   1059		}
   1060	}
   1061	return 0;
   1062}
   1063
   1064/**
   1065 * amdgpu_uvd_ring_parse_cs - UVD command submission parser
   1066 *
   1067 * @parser: Command submission parser context
   1068 * @job: the job to parse
   1069 * @ib: the IB to patch
   1070 *
   1071 * Parse the command stream, patch in addresses as necessary.
   1072 */
   1073int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
   1074			     struct amdgpu_job *job,
   1075			     struct amdgpu_ib *ib)
   1076{
   1077	struct amdgpu_uvd_cs_ctx ctx = {};
   1078	unsigned buf_sizes[] = {
   1079		[0x00000000]	=	2048,
   1080		[0x00000001]	=	0xFFFFFFFF,
   1081		[0x00000002]	=	0xFFFFFFFF,
   1082		[0x00000003]	=	2048,
   1083		[0x00000004]	=	0xFFFFFFFF,
   1084	};
   1085	int r;
   1086
   1087	job->vm = NULL;
   1088	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
   1089
   1090	if (ib->length_dw % 16) {
   1091		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
   1092			  ib->length_dw);
   1093		return -EINVAL;
   1094	}
   1095
   1096	ctx.parser = parser;
   1097	ctx.buf_sizes = buf_sizes;
   1098	ctx.ib = ib;
   1099
   1100	/* first round only required on chips without UVD 64 bit address support */
   1101	if (!parser->adev->uvd.address_64_bit) {
   1102		/* first round, make sure the buffers are actually in the UVD segment */
   1103		r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
   1104		if (r)
   1105			return r;
   1106	}
   1107
   1108	/* second round, patch buffer addresses into the command stream */
   1109	r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
   1110	if (r)
   1111		return r;
   1112
   1113	if (!ctx.has_msg_cmd) {
   1114		DRM_ERROR("UVD-IBs need a msg command!\n");
   1115		return -EINVAL;
   1116	}
   1117
   1118	return 0;
   1119}
   1120
   1121static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
   1122			       bool direct, struct dma_fence **fence)
   1123{
   1124	struct amdgpu_device *adev = ring->adev;
   1125	struct dma_fence *f = NULL;
   1126	struct amdgpu_job *job;
   1127	struct amdgpu_ib *ib;
   1128	uint32_t data[4];
   1129	uint64_t addr;
   1130	long r;
   1131	int i;
   1132	unsigned offset_idx = 0;
   1133	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
   1134
   1135	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
   1136				     AMDGPU_IB_POOL_DELAYED, &job);
   1137	if (r)
   1138		return r;
   1139
   1140	if (adev->asic_type >= CHIP_VEGA10) {
   1141		offset_idx = 1 + ring->me;
   1142		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
   1143		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
   1144	}
   1145
   1146	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
   1147	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
   1148	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
   1149	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
   1150
   1151	ib = &job->ibs[0];
   1152	addr = amdgpu_bo_gpu_offset(bo);
   1153	ib->ptr[0] = data[0];
   1154	ib->ptr[1] = addr;
   1155	ib->ptr[2] = data[1];
   1156	ib->ptr[3] = addr >> 32;
   1157	ib->ptr[4] = data[2];
   1158	ib->ptr[5] = 0;
   1159	for (i = 6; i < 16; i += 2) {
   1160		ib->ptr[i] = data[3];
   1161		ib->ptr[i+1] = 0;
   1162	}
   1163	ib->length_dw = 16;
   1164
   1165	if (direct) {
   1166		r = dma_resv_wait_timeout(bo->tbo.base.resv,
   1167					  DMA_RESV_USAGE_KERNEL, false,
   1168					  msecs_to_jiffies(10));
   1169		if (r == 0)
   1170			r = -ETIMEDOUT;
   1171		if (r < 0)
   1172			goto err_free;
   1173
   1174		r = amdgpu_job_submit_direct(job, ring, &f);
   1175		if (r)
   1176			goto err_free;
   1177	} else {
   1178		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
   1179				     AMDGPU_SYNC_ALWAYS,
   1180				     AMDGPU_FENCE_OWNER_UNDEFINED);
   1181		if (r)
   1182			goto err_free;
   1183
   1184		r = amdgpu_job_submit(job, &adev->uvd.entity,
   1185				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
   1186		if (r)
   1187			goto err_free;
   1188	}
   1189
   1190	amdgpu_bo_reserve(bo, true);
   1191	amdgpu_bo_fence(bo, f, false);
   1192	amdgpu_bo_unreserve(bo);
   1193
   1194	if (fence)
   1195		*fence = dma_fence_get(f);
   1196	dma_fence_put(f);
   1197
   1198	return 0;
   1199
   1200err_free:
   1201	amdgpu_job_free(job);
   1202	return r;
   1203}
   1204
   1205/* multiple fence commands without any stream commands in between can
   1206   crash the vcpu so just try to emmit a dummy create/destroy msg to
   1207   avoid this */
   1208int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
   1209			      struct dma_fence **fence)
   1210{
   1211	struct amdgpu_device *adev = ring->adev;
   1212	struct amdgpu_bo *bo = adev->uvd.ib_bo;
   1213	uint32_t *msg;
   1214	int i;
   1215
   1216	msg = amdgpu_bo_kptr(bo);
   1217	/* stitch together an UVD create msg */
   1218	msg[0] = cpu_to_le32(0x00000de4);
   1219	msg[1] = cpu_to_le32(0x00000000);
   1220	msg[2] = cpu_to_le32(handle);
   1221	msg[3] = cpu_to_le32(0x00000000);
   1222	msg[4] = cpu_to_le32(0x00000000);
   1223	msg[5] = cpu_to_le32(0x00000000);
   1224	msg[6] = cpu_to_le32(0x00000000);
   1225	msg[7] = cpu_to_le32(0x00000780);
   1226	msg[8] = cpu_to_le32(0x00000440);
   1227	msg[9] = cpu_to_le32(0x00000000);
   1228	msg[10] = cpu_to_le32(0x01b37000);
   1229	for (i = 11; i < 1024; ++i)
   1230		msg[i] = cpu_to_le32(0x0);
   1231
   1232	return amdgpu_uvd_send_msg(ring, bo, true, fence);
   1233
   1234}
   1235
   1236int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
   1237			       bool direct, struct dma_fence **fence)
   1238{
   1239	struct amdgpu_device *adev = ring->adev;
   1240	struct amdgpu_bo *bo = NULL;
   1241	uint32_t *msg;
   1242	int r, i;
   1243
   1244	if (direct) {
   1245		bo = adev->uvd.ib_bo;
   1246	} else {
   1247		r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo);
   1248		if (r)
   1249			return r;
   1250	}
   1251
   1252	msg = amdgpu_bo_kptr(bo);
   1253	/* stitch together an UVD destroy msg */
   1254	msg[0] = cpu_to_le32(0x00000de4);
   1255	msg[1] = cpu_to_le32(0x00000002);
   1256	msg[2] = cpu_to_le32(handle);
   1257	msg[3] = cpu_to_le32(0x00000000);
   1258	for (i = 4; i < 1024; ++i)
   1259		msg[i] = cpu_to_le32(0x0);
   1260
   1261	r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
   1262
   1263	if (!direct)
   1264		amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
   1265
   1266	return r;
   1267}
   1268
   1269static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
   1270{
   1271	struct amdgpu_device *adev =
   1272		container_of(work, struct amdgpu_device, uvd.idle_work.work);
   1273	unsigned fences = 0, i, j;
   1274
   1275	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
   1276		if (adev->uvd.harvest_config & (1 << i))
   1277			continue;
   1278		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
   1279		for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
   1280			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
   1281		}
   1282	}
   1283
   1284	if (fences == 0) {
   1285		if (adev->pm.dpm_enabled) {
   1286			amdgpu_dpm_enable_uvd(adev, false);
   1287		} else {
   1288			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
   1289			/* shutdown the UVD block */
   1290			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
   1291							       AMD_PG_STATE_GATE);
   1292			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
   1293							       AMD_CG_STATE_GATE);
   1294		}
   1295	} else {
   1296		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
   1297	}
   1298}
   1299
   1300void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
   1301{
   1302	struct amdgpu_device *adev = ring->adev;
   1303	bool set_clocks;
   1304
   1305	if (amdgpu_sriov_vf(adev))
   1306		return;
   1307
   1308	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
   1309	if (set_clocks) {
   1310		if (adev->pm.dpm_enabled) {
   1311			amdgpu_dpm_enable_uvd(adev, true);
   1312		} else {
   1313			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
   1314			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
   1315							       AMD_CG_STATE_UNGATE);
   1316			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
   1317							       AMD_PG_STATE_UNGATE);
   1318		}
   1319	}
   1320}
   1321
   1322void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
   1323{
   1324	if (!amdgpu_sriov_vf(ring->adev))
   1325		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
   1326}
   1327
   1328/**
   1329 * amdgpu_uvd_ring_test_ib - test ib execution
   1330 *
   1331 * @ring: amdgpu_ring pointer
   1332 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
   1333 *
   1334 * Test if we can successfully execute an IB
   1335 */
   1336int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
   1337{
   1338	struct dma_fence *fence;
   1339	long r;
   1340
   1341	r = amdgpu_uvd_get_create_msg(ring, 1, &fence);
   1342	if (r)
   1343		goto error;
   1344
   1345	r = dma_fence_wait_timeout(fence, false, timeout);
   1346	dma_fence_put(fence);
   1347	if (r == 0)
   1348		r = -ETIMEDOUT;
   1349	if (r < 0)
   1350		goto error;
   1351
   1352	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
   1353	if (r)
   1354		goto error;
   1355
   1356	r = dma_fence_wait_timeout(fence, false, timeout);
   1357	if (r == 0)
   1358		r = -ETIMEDOUT;
   1359	else if (r > 0)
   1360		r = 0;
   1361
   1362	dma_fence_put(fence);
   1363
   1364error:
   1365	return r;
   1366}
   1367
   1368/**
   1369 * amdgpu_uvd_used_handles - returns used UVD handles
   1370 *
   1371 * @adev: amdgpu_device pointer
   1372 *
   1373 * Returns the number of UVD handles in use
   1374 */
   1375uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
   1376{
   1377	unsigned i;
   1378	uint32_t used_handles = 0;
   1379
   1380	for (i = 0; i < adev->uvd.max_handles; ++i) {
   1381		/*
   1382		 * Handles can be freed in any order, and not
   1383		 * necessarily linear. So we need to count
   1384		 * all non-zero handles.
   1385		 */
   1386		if (atomic_read(&adev->uvd.handles[i]))
   1387			used_handles++;
   1388	}
   1389
   1390	return used_handles;
   1391}