cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

radeon_uvd.c (25831B)


      1/*
      2 * Copyright 2011 Advanced Micro Devices, Inc.
      3 * All Rights Reserved.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the
      7 * "Software"), to deal in the Software without restriction, including
      8 * without limitation the rights to use, copy, modify, merge, publish,
      9 * distribute, sub license, and/or sell copies of the Software, and to
     10 * permit persons to whom the Software is furnished to do so, subject to
     11 * the following conditions:
     12 *
     13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20 *
     21 * The above copyright notice and this permission notice (including the
     22 * next paragraph) shall be included in all copies or substantial portions
     23 * of the Software.
     24 *
     25 */
     26/*
     27 * Authors:
     28 *    Christian König <deathsimple@vodafone.de>
     29 */
     30
     31#include <linux/firmware.h>
     32#include <linux/module.h>
     33
     34#include <drm/drm.h>
     35
     36#include "radeon.h"
     37#include "radeon_ucode.h"
     38#include "r600d.h"
     39
     40/* 1 second timeout */
     41#define UVD_IDLE_TIMEOUT_MS	1000
     42
     43/* Firmware Names */
     44#define FIRMWARE_R600		"radeon/R600_uvd.bin"
     45#define FIRMWARE_RS780		"radeon/RS780_uvd.bin"
     46#define FIRMWARE_RV770		"radeon/RV770_uvd.bin"
     47#define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
     48#define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
     49#define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
     50#define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
     51#define FIRMWARE_BONAIRE_LEGACY	"radeon/BONAIRE_uvd.bin"
     52#define FIRMWARE_BONAIRE	"radeon/bonaire_uvd.bin"
     53
     54MODULE_FIRMWARE(FIRMWARE_R600);
     55MODULE_FIRMWARE(FIRMWARE_RS780);
     56MODULE_FIRMWARE(FIRMWARE_RV770);
     57MODULE_FIRMWARE(FIRMWARE_RV710);
     58MODULE_FIRMWARE(FIRMWARE_CYPRESS);
     59MODULE_FIRMWARE(FIRMWARE_SUMO);
     60MODULE_FIRMWARE(FIRMWARE_TAHITI);
     61MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY);
     62MODULE_FIRMWARE(FIRMWARE_BONAIRE);
     63
     64static void radeon_uvd_idle_work_handler(struct work_struct *work);
     65
     66int radeon_uvd_init(struct radeon_device *rdev)
     67{
     68	unsigned long bo_size;
     69	const char *fw_name = NULL, *legacy_fw_name = NULL;
     70	int i, r;
     71
     72	INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
     73
     74	switch (rdev->family) {
     75	case CHIP_RV610:
     76	case CHIP_RV630:
     77	case CHIP_RV670:
     78	case CHIP_RV620:
     79	case CHIP_RV635:
     80		legacy_fw_name = FIRMWARE_R600;
     81		break;
     82
     83	case CHIP_RS780:
     84	case CHIP_RS880:
     85		legacy_fw_name = FIRMWARE_RS780;
     86		break;
     87
     88	case CHIP_RV770:
     89		legacy_fw_name = FIRMWARE_RV770;
     90		break;
     91
     92	case CHIP_RV710:
     93	case CHIP_RV730:
     94	case CHIP_RV740:
     95		legacy_fw_name = FIRMWARE_RV710;
     96		break;
     97
     98	case CHIP_CYPRESS:
     99	case CHIP_HEMLOCK:
    100	case CHIP_JUNIPER:
    101	case CHIP_REDWOOD:
    102	case CHIP_CEDAR:
    103		legacy_fw_name = FIRMWARE_CYPRESS;
    104		break;
    105
    106	case CHIP_SUMO:
    107	case CHIP_SUMO2:
    108	case CHIP_PALM:
    109	case CHIP_CAYMAN:
    110	case CHIP_BARTS:
    111	case CHIP_TURKS:
    112	case CHIP_CAICOS:
    113		legacy_fw_name = FIRMWARE_SUMO;
    114		break;
    115
    116	case CHIP_TAHITI:
    117	case CHIP_VERDE:
    118	case CHIP_PITCAIRN:
    119	case CHIP_ARUBA:
    120	case CHIP_OLAND:
    121		legacy_fw_name = FIRMWARE_TAHITI;
    122		break;
    123
    124	case CHIP_BONAIRE:
    125	case CHIP_KABINI:
    126	case CHIP_KAVERI:
    127	case CHIP_HAWAII:
    128	case CHIP_MULLINS:
    129		legacy_fw_name = FIRMWARE_BONAIRE_LEGACY;
    130		fw_name = FIRMWARE_BONAIRE;
    131		break;
    132
    133	default:
    134		return -EINVAL;
    135	}
    136
    137	rdev->uvd.fw_header_present = false;
    138	rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES;
    139	if (fw_name) {
    140		/* Let's try to load the newer firmware first */
    141		r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
    142		if (r) {
    143			dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
    144				fw_name);
    145		} else {
    146			struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data;
    147			unsigned version_major, version_minor, family_id;
    148
    149			r = radeon_ucode_validate(rdev->uvd_fw);
    150			if (r)
    151				return r;
    152
    153			rdev->uvd.fw_header_present = true;
    154
    155			family_id = (__force u32)(hdr->ucode_version) & 0xff;
    156			version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version))
    157							 >> 24) & 0xff;
    158			version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version))
    159							 >> 8) & 0xff;
    160			DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
    161				 version_major, version_minor, family_id);
    162
    163			/*
    164			 * Limit the number of UVD handles depending on
    165			 * microcode major and minor versions.
    166			 */
    167			if ((version_major >= 0x01) && (version_minor >= 0x37))
    168				rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES;
    169		}
    170	}
    171
    172	/*
    173	 * In case there is only legacy firmware, or we encounter an error
    174	 * while loading the new firmware, we fall back to loading the legacy
    175	 * firmware now.
    176	 */
    177	if (!fw_name || r) {
    178		r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev);
    179		if (r) {
    180			dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
    181				legacy_fw_name);
    182			return r;
    183		}
    184	}
    185
    186	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
    187		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
    188		  RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles;
    189	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
    190			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
    191			     NULL, &rdev->uvd.vcpu_bo);
    192	if (r) {
    193		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
    194		return r;
    195	}
    196
    197	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
    198	if (r) {
    199		radeon_bo_unref(&rdev->uvd.vcpu_bo);
    200		dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
    201		return r;
    202	}
    203
    204	r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
    205			  &rdev->uvd.gpu_addr);
    206	if (r) {
    207		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    208		radeon_bo_unref(&rdev->uvd.vcpu_bo);
    209		dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
    210		return r;
    211	}
    212
    213	r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
    214	if (r) {
    215		dev_err(rdev->dev, "(%d) UVD map failed\n", r);
    216		return r;
    217	}
    218
    219	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    220
    221	for (i = 0; i < rdev->uvd.max_handles; ++i) {
    222		atomic_set(&rdev->uvd.handles[i], 0);
    223		rdev->uvd.filp[i] = NULL;
    224		rdev->uvd.img_size[i] = 0;
    225	}
    226
    227	return 0;
    228}
    229
    230void radeon_uvd_fini(struct radeon_device *rdev)
    231{
    232	int r;
    233
    234	if (rdev->uvd.vcpu_bo == NULL)
    235		return;
    236
    237	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
    238	if (!r) {
    239		radeon_bo_kunmap(rdev->uvd.vcpu_bo);
    240		radeon_bo_unpin(rdev->uvd.vcpu_bo);
    241		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    242	}
    243
    244	radeon_bo_unref(&rdev->uvd.vcpu_bo);
    245
    246	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]);
    247
    248	release_firmware(rdev->uvd_fw);
    249}
    250
    251int radeon_uvd_suspend(struct radeon_device *rdev)
    252{
    253	int i, r;
    254
    255	if (rdev->uvd.vcpu_bo == NULL)
    256		return 0;
    257
    258	for (i = 0; i < rdev->uvd.max_handles; ++i) {
    259		uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
    260		if (handle != 0) {
    261			struct radeon_fence *fence;
    262
    263			radeon_uvd_note_usage(rdev);
    264
    265			r = radeon_uvd_get_destroy_msg(rdev,
    266				R600_RING_TYPE_UVD_INDEX, handle, &fence);
    267			if (r) {
    268				DRM_ERROR("Error destroying UVD (%d)!\n", r);
    269				continue;
    270			}
    271
    272			radeon_fence_wait(fence, false);
    273			radeon_fence_unref(&fence);
    274
    275			rdev->uvd.filp[i] = NULL;
    276			atomic_set(&rdev->uvd.handles[i], 0);
    277		}
    278	}
    279
    280	return 0;
    281}
    282
    283int radeon_uvd_resume(struct radeon_device *rdev)
    284{
    285	unsigned size;
    286	void *ptr;
    287
    288	if (rdev->uvd.vcpu_bo == NULL)
    289		return -EINVAL;
    290
    291	memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
    292
    293	size = radeon_bo_size(rdev->uvd.vcpu_bo);
    294	size -= rdev->uvd_fw->size;
    295
    296	ptr = rdev->uvd.cpu_addr;
    297	ptr += rdev->uvd_fw->size;
    298
    299	memset_io((void __iomem *)ptr, 0, size);
    300
    301	return 0;
    302}
    303
    304void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
    305				       uint32_t allowed_domains)
    306{
    307	int i;
    308
    309	for (i = 0; i < rbo->placement.num_placement; ++i) {
    310		rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
    311		rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
    312	}
    313
    314	/* If it must be in VRAM it must be in the first segment as well */
    315	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
    316		return;
    317
    318	/* abort if we already have more than one placement */
    319	if (rbo->placement.num_placement > 1)
    320		return;
    321
    322	/* add another 256MB segment */
    323	rbo->placements[1] = rbo->placements[0];
    324	rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
    325	rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
    326	rbo->placement.num_placement++;
    327	rbo->placement.num_busy_placement++;
    328}
    329
    330void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
    331{
    332	int i, r;
    333	for (i = 0; i < rdev->uvd.max_handles; ++i) {
    334		uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
    335		if (handle != 0 && rdev->uvd.filp[i] == filp) {
    336			struct radeon_fence *fence;
    337
    338			radeon_uvd_note_usage(rdev);
    339
    340			r = radeon_uvd_get_destroy_msg(rdev,
    341				R600_RING_TYPE_UVD_INDEX, handle, &fence);
    342			if (r) {
    343				DRM_ERROR("Error destroying UVD (%d)!\n", r);
    344				continue;
    345			}
    346
    347			radeon_fence_wait(fence, false);
    348			radeon_fence_unref(&fence);
    349
    350			rdev->uvd.filp[i] = NULL;
    351			atomic_set(&rdev->uvd.handles[i], 0);
    352		}
    353	}
    354}
    355
    356static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
    357{
    358	unsigned stream_type = msg[4];
    359	unsigned width = msg[6];
    360	unsigned height = msg[7];
    361	unsigned dpb_size = msg[9];
    362	unsigned pitch = msg[28];
    363
    364	unsigned width_in_mb = width / 16;
    365	unsigned height_in_mb = ALIGN(height / 16, 2);
    366
    367	unsigned image_size, tmp, min_dpb_size;
    368
    369	image_size = width * height;
    370	image_size += image_size / 2;
    371	image_size = ALIGN(image_size, 1024);
    372
    373	switch (stream_type) {
    374	case 0: /* H264 */
    375
    376		/* reference picture buffer */
    377		min_dpb_size = image_size * 17;
    378
    379		/* macroblock context buffer */
    380		min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
    381
    382		/* IT surface buffer */
    383		min_dpb_size += width_in_mb * height_in_mb * 32;
    384		break;
    385
    386	case 1: /* VC1 */
    387
    388		/* reference picture buffer */
    389		min_dpb_size = image_size * 3;
    390
    391		/* CONTEXT_BUFFER */
    392		min_dpb_size += width_in_mb * height_in_mb * 128;
    393
    394		/* IT surface buffer */
    395		min_dpb_size += width_in_mb * 64;
    396
    397		/* DB surface buffer */
    398		min_dpb_size += width_in_mb * 128;
    399
    400		/* BP */
    401		tmp = max(width_in_mb, height_in_mb);
    402		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
    403		break;
    404
    405	case 3: /* MPEG2 */
    406
    407		/* reference picture buffer */
    408		min_dpb_size = image_size * 3;
    409		break;
    410
    411	case 4: /* MPEG4 */
    412
    413		/* reference picture buffer */
    414		min_dpb_size = image_size * 3;
    415
    416		/* CM */
    417		min_dpb_size += width_in_mb * height_in_mb * 64;
    418
    419		/* IT surface buffer */
    420		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
    421		break;
    422
    423	default:
    424		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
    425		return -EINVAL;
    426	}
    427
    428	if (width > pitch) {
    429		DRM_ERROR("Invalid UVD decoding target pitch!\n");
    430		return -EINVAL;
    431	}
    432
    433	if (dpb_size < min_dpb_size) {
    434		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
    435			  dpb_size, min_dpb_size);
    436		return -EINVAL;
    437	}
    438
    439	buf_sizes[0x1] = dpb_size;
    440	buf_sizes[0x2] = image_size;
    441	return 0;
    442}
    443
    444static int radeon_uvd_validate_codec(struct radeon_cs_parser *p,
    445				     unsigned stream_type)
    446{
    447	switch (stream_type) {
    448	case 0: /* H264 */
    449	case 1: /* VC1 */
    450		/* always supported */
    451		return 0;
    452
    453	case 3: /* MPEG2 */
    454	case 4: /* MPEG4 */
    455		/* only since UVD 3 */
    456		if (p->rdev->family >= CHIP_PALM)
    457			return 0;
    458
    459		fallthrough;
    460	default:
    461		DRM_ERROR("UVD codec not supported by hardware %d!\n",
    462			  stream_type);
    463		return -EINVAL;
    464	}
    465}
    466
    467static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
    468			     unsigned offset, unsigned buf_sizes[])
    469{
    470	int32_t *msg, msg_type, handle;
    471	unsigned img_size = 0;
    472	void *ptr;
    473	int i, r;
    474
    475	if (offset & 0x3F) {
    476		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
    477		return -EINVAL;
    478	}
    479
    480	r = radeon_bo_kmap(bo, &ptr);
    481	if (r) {
    482		DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
    483		return r;
    484	}
    485
    486	msg = ptr + offset;
    487
    488	msg_type = msg[1];
    489	handle = msg[2];
    490
    491	if (handle == 0) {
    492		radeon_bo_kunmap(bo);
    493		DRM_ERROR("Invalid UVD handle!\n");
    494		return -EINVAL;
    495	}
    496
    497	switch (msg_type) {
    498	case 0:
    499		/* it's a create msg, calc image size (width * height) */
    500		img_size = msg[7] * msg[8];
    501
    502		r = radeon_uvd_validate_codec(p, msg[4]);
    503		radeon_bo_kunmap(bo);
    504		if (r)
    505			return r;
    506
    507		/* try to alloc a new handle */
    508		for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
    509			if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
    510				DRM_ERROR("Handle 0x%x already in use!\n", handle);
    511				return -EINVAL;
    512			}
    513
    514			if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
    515				p->rdev->uvd.filp[i] = p->filp;
    516				p->rdev->uvd.img_size[i] = img_size;
    517				return 0;
    518			}
    519		}
    520
    521		DRM_ERROR("No more free UVD handles!\n");
    522		return -EINVAL;
    523
    524	case 1:
    525		/* it's a decode msg, validate codec and calc buffer sizes */
    526		r = radeon_uvd_validate_codec(p, msg[4]);
    527		if (!r)
    528			r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
    529		radeon_bo_kunmap(bo);
    530		if (r)
    531			return r;
    532
    533		/* validate the handle */
    534		for (i = 0; i < p->rdev->uvd.max_handles; ++i) {
    535			if (atomic_read(&p->rdev->uvd.handles[i]) == handle) {
    536				if (p->rdev->uvd.filp[i] != p->filp) {
    537					DRM_ERROR("UVD handle collision detected!\n");
    538					return -EINVAL;
    539				}
    540				return 0;
    541			}
    542		}
    543
    544		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
    545		return -ENOENT;
    546
    547	case 2:
    548		/* it's a destroy msg, free the handle */
    549		for (i = 0; i < p->rdev->uvd.max_handles; ++i)
    550			atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
    551		radeon_bo_kunmap(bo);
    552		return 0;
    553
    554	default:
    555		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
    556	}
    557
    558	radeon_bo_kunmap(bo);
    559	return -EINVAL;
    560}
    561
    562static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
    563			       int data0, int data1,
    564			       unsigned buf_sizes[], bool *has_msg_cmd)
    565{
    566	struct radeon_cs_chunk *relocs_chunk;
    567	struct radeon_bo_list *reloc;
    568	unsigned idx, cmd, offset;
    569	uint64_t start, end;
    570	int r;
    571
    572	relocs_chunk = p->chunk_relocs;
    573	offset = radeon_get_ib_value(p, data0);
    574	idx = radeon_get_ib_value(p, data1);
    575	if (idx >= relocs_chunk->length_dw) {
    576		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
    577			  idx, relocs_chunk->length_dw);
    578		return -EINVAL;
    579	}
    580
    581	reloc = &p->relocs[(idx / 4)];
    582	start = reloc->gpu_offset;
    583	end = start + radeon_bo_size(reloc->robj);
    584	start += offset;
    585
    586	p->ib.ptr[data0] = start & 0xFFFFFFFF;
    587	p->ib.ptr[data1] = start >> 32;
    588
    589	cmd = radeon_get_ib_value(p, p->idx) >> 1;
    590
    591	if (cmd < 0x4) {
    592		if (end <= start) {
    593			DRM_ERROR("invalid reloc offset %X!\n", offset);
    594			return -EINVAL;
    595		}
    596		if ((end - start) < buf_sizes[cmd]) {
    597			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
    598				  (unsigned)(end - start), buf_sizes[cmd]);
    599			return -EINVAL;
    600		}
    601
    602	} else if (cmd != 0x100) {
    603		DRM_ERROR("invalid UVD command %X!\n", cmd);
    604		return -EINVAL;
    605	}
    606
    607	if ((start >> 28) != ((end - 1) >> 28)) {
    608		DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
    609			  start, end);
    610		return -EINVAL;
    611	}
    612
    613	/* TODO: is this still necessary on NI+ ? */
    614	if ((cmd == 0 || cmd == 0x3) &&
    615	    (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
    616		DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
    617			  start, end);
    618		return -EINVAL;
    619	}
    620
    621	if (cmd == 0) {
    622		if (*has_msg_cmd) {
    623			DRM_ERROR("More than one message in a UVD-IB!\n");
    624			return -EINVAL;
    625		}
    626		*has_msg_cmd = true;
    627		r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
    628		if (r)
    629			return r;
    630	} else if (!*has_msg_cmd) {
    631		DRM_ERROR("Message needed before other commands are send!\n");
    632		return -EINVAL;
    633	}
    634
    635	return 0;
    636}
    637
    638static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
    639			     struct radeon_cs_packet *pkt,
    640			     int *data0, int *data1,
    641			     unsigned buf_sizes[],
    642			     bool *has_msg_cmd)
    643{
    644	int i, r;
    645
    646	p->idx++;
    647	for (i = 0; i <= pkt->count; ++i) {
    648		switch (pkt->reg + i*4) {
    649		case UVD_GPCOM_VCPU_DATA0:
    650			*data0 = p->idx;
    651			break;
    652		case UVD_GPCOM_VCPU_DATA1:
    653			*data1 = p->idx;
    654			break;
    655		case UVD_GPCOM_VCPU_CMD:
    656			r = radeon_uvd_cs_reloc(p, *data0, *data1,
    657						buf_sizes, has_msg_cmd);
    658			if (r)
    659				return r;
    660			break;
    661		case UVD_ENGINE_CNTL:
    662		case UVD_NO_OP:
    663			break;
    664		default:
    665			DRM_ERROR("Invalid reg 0x%X!\n",
    666				  pkt->reg + i*4);
    667			return -EINVAL;
    668		}
    669		p->idx++;
    670	}
    671	return 0;
    672}
    673
    674int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
    675{
    676	struct radeon_cs_packet pkt;
    677	int r, data0 = 0, data1 = 0;
    678
    679	/* does the IB has a msg command */
    680	bool has_msg_cmd = false;
    681
    682	/* minimum buffer sizes */
    683	unsigned buf_sizes[] = {
    684		[0x00000000]	=	2048,
    685		[0x00000001]	=	32 * 1024 * 1024,
    686		[0x00000002]	=	2048 * 1152 * 3,
    687		[0x00000003]	=	2048,
    688	};
    689
    690	if (p->chunk_ib->length_dw % 16) {
    691		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
    692			  p->chunk_ib->length_dw);
    693		return -EINVAL;
    694	}
    695
    696	if (p->chunk_relocs == NULL) {
    697		DRM_ERROR("No relocation chunk !\n");
    698		return -EINVAL;
    699	}
    700
    701
    702	do {
    703		r = radeon_cs_packet_parse(p, &pkt, p->idx);
    704		if (r)
    705			return r;
    706		switch (pkt.type) {
    707		case RADEON_PACKET_TYPE0:
    708			r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
    709					      buf_sizes, &has_msg_cmd);
    710			if (r)
    711				return r;
    712			break;
    713		case RADEON_PACKET_TYPE2:
    714			p->idx += pkt.count + 2;
    715			break;
    716		default:
    717			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
    718			return -EINVAL;
    719		}
    720	} while (p->idx < p->chunk_ib->length_dw);
    721
    722	if (!has_msg_cmd) {
    723		DRM_ERROR("UVD-IBs need a msg command!\n");
    724		return -EINVAL;
    725	}
    726
    727	return 0;
    728}
    729
    730static int radeon_uvd_send_msg(struct radeon_device *rdev,
    731			       int ring, uint64_t addr,
    732			       struct radeon_fence **fence)
    733{
    734	struct radeon_ib ib;
    735	int i, r;
    736
    737	r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
    738	if (r)
    739		return r;
    740
    741	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
    742	ib.ptr[1] = addr;
    743	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
    744	ib.ptr[3] = addr >> 32;
    745	ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
    746	ib.ptr[5] = 0;
    747	for (i = 6; i < 16; i += 2) {
    748		ib.ptr[i] = PACKET0(UVD_NO_OP, 0);
    749		ib.ptr[i+1] = 0;
    750	}
    751	ib.length_dw = 16;
    752
    753	r = radeon_ib_schedule(rdev, &ib, NULL, false);
    754
    755	if (fence)
    756		*fence = radeon_fence_ref(ib.fence);
    757
    758	radeon_ib_free(rdev, &ib);
    759	return r;
    760}
    761
    762/*
    763 * multiple fence commands without any stream commands in between can
    764 * crash the vcpu so just try to emmit a dummy create/destroy msg to
    765 * avoid this
    766 */
    767int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
    768			      uint32_t handle, struct radeon_fence **fence)
    769{
    770	/* we use the last page of the vcpu bo for the UVD message */
    771	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
    772		RADEON_GPU_PAGE_SIZE;
    773
    774	uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs);
    775	uint64_t addr = rdev->uvd.gpu_addr + offs;
    776
    777	int r, i;
    778
    779	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
    780	if (r)
    781		return r;
    782
    783	/* stitch together an UVD create msg */
    784	writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]);
    785	writel(0x0, (void __iomem *)&msg[1]);
    786	writel((__force u32)cpu_to_le32(handle), &msg[2]);
    787	writel(0x0, &msg[3]);
    788	writel(0x0, &msg[4]);
    789	writel(0x0, &msg[5]);
    790	writel(0x0, &msg[6]);
    791	writel((__force u32)cpu_to_le32(0x00000780), &msg[7]);
    792	writel((__force u32)cpu_to_le32(0x00000440), &msg[8]);
    793	writel(0x0, &msg[9]);
    794	writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]);
    795	for (i = 11; i < 1024; ++i)
    796		writel(0x0, &msg[i]);
    797
    798	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
    799	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    800	return r;
    801}
    802
    803int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
    804			       uint32_t handle, struct radeon_fence **fence)
    805{
    806	/* we use the last page of the vcpu bo for the UVD message */
    807	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
    808		RADEON_GPU_PAGE_SIZE;
    809
    810	uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs);
    811	uint64_t addr = rdev->uvd.gpu_addr + offs;
    812
    813	int r, i;
    814
    815	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
    816	if (r)
    817		return r;
    818
    819	/* stitch together an UVD destroy msg */
    820	writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]);
    821	writel((__force u32)cpu_to_le32(0x00000002), &msg[1]);
    822	writel((__force u32)cpu_to_le32(handle), &msg[2]);
    823	writel(0x0, &msg[3]);
    824	for (i = 4; i < 1024; ++i)
    825		writel(0x0, &msg[i]);
    826
    827	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
    828	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
    829	return r;
    830}
    831
    832/**
    833 * radeon_uvd_count_handles - count number of open streams
    834 *
    835 * @rdev: radeon_device pointer
    836 * @sd: number of SD streams
    837 * @hd: number of HD streams
    838 *
    839 * Count the number of open SD/HD streams as a hint for power mangement
    840 */
    841static void radeon_uvd_count_handles(struct radeon_device *rdev,
    842				     unsigned *sd, unsigned *hd)
    843{
    844	unsigned i;
    845
    846	*sd = 0;
    847	*hd = 0;
    848
    849	for (i = 0; i < rdev->uvd.max_handles; ++i) {
    850		if (!atomic_read(&rdev->uvd.handles[i]))
    851			continue;
    852
    853		if (rdev->uvd.img_size[i] >= 720*576)
    854			++(*hd);
    855		else
    856			++(*sd);
    857	}
    858}
    859
    860static void radeon_uvd_idle_work_handler(struct work_struct *work)
    861{
    862	struct radeon_device *rdev =
    863		container_of(work, struct radeon_device, uvd.idle_work.work);
    864
    865	if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
    866		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    867			radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd,
    868						 &rdev->pm.dpm.hd);
    869			radeon_dpm_enable_uvd(rdev, false);
    870		} else {
    871			radeon_set_uvd_clocks(rdev, 0, 0);
    872		}
    873	} else {
    874		schedule_delayed_work(&rdev->uvd.idle_work,
    875				      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
    876	}
    877}
    878
    879void radeon_uvd_note_usage(struct radeon_device *rdev)
    880{
    881	bool streams_changed = false;
    882	bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
    883	set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
    884					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
    885
    886	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    887		unsigned hd = 0, sd = 0;
    888		radeon_uvd_count_handles(rdev, &sd, &hd);
    889		if ((rdev->pm.dpm.sd != sd) ||
    890		    (rdev->pm.dpm.hd != hd)) {
    891			rdev->pm.dpm.sd = sd;
    892			rdev->pm.dpm.hd = hd;
    893			/* disable this for now */
    894			/*streams_changed = true;*/
    895		}
    896	}
    897
    898	if (set_clocks || streams_changed) {
    899		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
    900			radeon_dpm_enable_uvd(rdev, true);
    901		} else {
    902			radeon_set_uvd_clocks(rdev, 53300, 40000);
    903		}
    904	}
    905}
    906
    907static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq,
    908					      unsigned target_freq,
    909					      unsigned pd_min,
    910					      unsigned pd_even)
    911{
    912	unsigned post_div = vco_freq / target_freq;
    913
    914	/* adjust to post divider minimum value */
    915	if (post_div < pd_min)
    916		post_div = pd_min;
    917
    918	/* we alway need a frequency less than or equal the target */
    919	if ((vco_freq / post_div) > target_freq)
    920		post_div += 1;
    921
    922	/* post dividers above a certain value must be even */
    923	if (post_div > pd_even && post_div % 2)
    924		post_div += 1;
    925
    926	return post_div;
    927}
    928
    929/**
    930 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers
    931 *
    932 * @rdev: radeon_device pointer
    933 * @vclk: wanted VCLK
    934 * @dclk: wanted DCLK
    935 * @vco_min: minimum VCO frequency
    936 * @vco_max: maximum VCO frequency
    937 * @fb_factor: factor to multiply vco freq with
    938 * @fb_mask: limit and bitmask for feedback divider
    939 * @pd_min: post divider minimum
    940 * @pd_max: post divider maximum
    941 * @pd_even: post divider must be even above this value
    942 * @optimal_fb_div: resulting feedback divider
    943 * @optimal_vclk_div: resulting vclk post divider
    944 * @optimal_dclk_div: resulting dclk post divider
    945 *
    946 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs).
    947 * Returns zero on success -EINVAL on error.
    948 */
    949int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
    950				  unsigned vclk, unsigned dclk,
    951				  unsigned vco_min, unsigned vco_max,
    952				  unsigned fb_factor, unsigned fb_mask,
    953				  unsigned pd_min, unsigned pd_max,
    954				  unsigned pd_even,
    955				  unsigned *optimal_fb_div,
    956				  unsigned *optimal_vclk_div,
    957				  unsigned *optimal_dclk_div)
    958{
    959	unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq;
    960
    961	/* start off with something large */
    962	unsigned optimal_score = ~0;
    963
    964	/* loop through vco from low to high */
    965	vco_min = max(max(vco_min, vclk), dclk);
    966	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
    967
    968		uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
    969		unsigned vclk_div, dclk_div, score;
    970
    971		do_div(fb_div, ref_freq);
    972
    973		/* fb div out of range ? */
    974		if (fb_div > fb_mask)
    975			break; /* it can oly get worse */
    976
    977		fb_div &= fb_mask;
    978
    979		/* calc vclk divider with current vco freq */
    980		vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk,
    981							 pd_min, pd_even);
    982		if (vclk_div > pd_max)
    983			break; /* vco is too big, it has to stop */
    984
    985		/* calc dclk divider with current vco freq */
    986		dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk,
    987							 pd_min, pd_even);
    988		if (dclk_div > pd_max)
    989			break; /* vco is too big, it has to stop */
    990
    991		/* calc score with current vco freq */
    992		score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
    993
    994		/* determine if this vco setting is better than current optimal settings */
    995		if (score < optimal_score) {
    996			*optimal_fb_div = fb_div;
    997			*optimal_vclk_div = vclk_div;
    998			*optimal_dclk_div = dclk_div;
    999			optimal_score = score;
   1000			if (optimal_score == 0)
   1001				break; /* it can't get better than this */
   1002		}
   1003	}
   1004
   1005	/* did we found a valid setup ? */
   1006	if (optimal_score == ~0)
   1007		return -EINVAL;
   1008
   1009	return 0;
   1010}
   1011
   1012int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
   1013				unsigned cg_upll_func_cntl)
   1014{
   1015	unsigned i;
   1016
   1017	/* make sure UPLL_CTLREQ is deasserted */
   1018	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
   1019
   1020	mdelay(10);
   1021
   1022	/* assert UPLL_CTLREQ */
   1023	WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
   1024
   1025	/* wait for CTLACK and CTLACK2 to get asserted */
   1026	for (i = 0; i < 100; ++i) {
   1027		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
   1028		if ((RREG32(cg_upll_func_cntl) & mask) == mask)
   1029			break;
   1030		mdelay(10);
   1031	}
   1032
   1033	/* deassert UPLL_CTLREQ */
   1034	WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
   1035
   1036	if (i == 100) {
   1037		DRM_ERROR("Timeout setting UVD clocks!\n");
   1038		return -ETIMEDOUT;
   1039	}
   1040
   1041	return 0;
   1042}