cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kfd_packet_manager.c (11598B)


      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 *
     23 */
     24
     25#include <linux/slab.h>
     26#include <linux/mutex.h>
     27#include "kfd_device_queue_manager.h"
     28#include "kfd_kernel_queue.h"
     29#include "kfd_priv.h"
     30
     31static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
     32				unsigned int buffer_size_bytes)
     33{
     34	unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
     35
     36	WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
     37	     "Runlist IB overflow");
     38	*wptr = temp;
     39}
     40
     41static void pm_calc_rlib_size(struct packet_manager *pm,
     42				unsigned int *rlib_size,
     43				bool *over_subscription)
     44{
     45	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
     46	unsigned int map_queue_size;
     47	unsigned int max_proc_per_quantum = 1;
     48	struct kfd_dev *dev = pm->dqm->dev;
     49
     50	process_count = pm->dqm->processes_count;
     51	queue_count = pm->dqm->active_queue_count;
     52	compute_queue_count = pm->dqm->active_cp_queue_count;
     53	gws_queue_count = pm->dqm->gws_queue_count;
     54
     55	/* check if there is over subscription
     56	 * Note: the arbitration between the number of VMIDs and
     57	 * hws_max_conc_proc has been done in
     58	 * kgd2kfd_device_init().
     59	 */
     60	*over_subscription = false;
     61
     62	if (dev->max_proc_per_quantum > 1)
     63		max_proc_per_quantum = dev->max_proc_per_quantum;
     64
     65	if ((process_count > max_proc_per_quantum) ||
     66	    compute_queue_count > get_cp_queues_num(pm->dqm) ||
     67	    gws_queue_count > 1) {
     68		*over_subscription = true;
     69		pr_debug("Over subscribed runlist\n");
     70	}
     71
     72	map_queue_size = pm->pmf->map_queues_size;
     73	/* calculate run list ib allocation size */
     74	*rlib_size = process_count * pm->pmf->map_process_size +
     75		     queue_count * map_queue_size;
     76
     77	/*
     78	 * Increase the allocation size in case we need a chained run list
     79	 * when over subscription
     80	 */
     81	if (*over_subscription)
     82		*rlib_size += pm->pmf->runlist_size;
     83
     84	pr_debug("runlist ib size %d\n", *rlib_size);
     85}
     86
     87static int pm_allocate_runlist_ib(struct packet_manager *pm,
     88				unsigned int **rl_buffer,
     89				uint64_t *rl_gpu_buffer,
     90				unsigned int *rl_buffer_size,
     91				bool *is_over_subscription)
     92{
     93	int retval;
     94
     95	if (WARN_ON(pm->allocated))
     96		return -EINVAL;
     97
     98	pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
     99
    100	mutex_lock(&pm->lock);
    101
    102	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
    103					&pm->ib_buffer_obj);
    104
    105	if (retval) {
    106		pr_err("Failed to allocate runlist IB\n");
    107		goto out;
    108	}
    109
    110	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
    111	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
    112
    113	memset(*rl_buffer, 0, *rl_buffer_size);
    114	pm->allocated = true;
    115
    116out:
    117	mutex_unlock(&pm->lock);
    118	return retval;
    119}
    120
    121static int pm_create_runlist_ib(struct packet_manager *pm,
    122				struct list_head *queues,
    123				uint64_t *rl_gpu_addr,
    124				size_t *rl_size_bytes)
    125{
    126	unsigned int alloc_size_bytes;
    127	unsigned int *rl_buffer, rl_wptr, i;
    128	int retval, processes_mapped;
    129	struct device_process_node *cur;
    130	struct qcm_process_device *qpd;
    131	struct queue *q;
    132	struct kernel_queue *kq;
    133	bool is_over_subscription;
    134
    135	rl_wptr = retval = processes_mapped = 0;
    136
    137	retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
    138				&alloc_size_bytes, &is_over_subscription);
    139	if (retval)
    140		return retval;
    141
    142	*rl_size_bytes = alloc_size_bytes;
    143	pm->ib_size_bytes = alloc_size_bytes;
    144
    145	pr_debug("Building runlist ib process count: %d queues count %d\n",
    146		pm->dqm->processes_count, pm->dqm->active_queue_count);
    147
    148	/* build the run list ib packet */
    149	list_for_each_entry(cur, queues, list) {
    150		qpd = cur->qpd;
    151		/* build map process packet */
    152		if (processes_mapped >= pm->dqm->processes_count) {
    153			pr_debug("Not enough space left in runlist IB\n");
    154			pm_release_ib(pm);
    155			return -ENOMEM;
    156		}
    157
    158		retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
    159		if (retval)
    160			return retval;
    161
    162		processes_mapped++;
    163		inc_wptr(&rl_wptr, pm->pmf->map_process_size,
    164				alloc_size_bytes);
    165
    166		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
    167			if (!kq->queue->properties.is_active)
    168				continue;
    169
    170			pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
    171				kq->queue->queue, qpd->is_debug);
    172
    173			retval = pm->pmf->map_queues(pm,
    174						&rl_buffer[rl_wptr],
    175						kq->queue,
    176						qpd->is_debug);
    177			if (retval)
    178				return retval;
    179
    180			inc_wptr(&rl_wptr,
    181				pm->pmf->map_queues_size,
    182				alloc_size_bytes);
    183		}
    184
    185		list_for_each_entry(q, &qpd->queues_list, list) {
    186			if (!q->properties.is_active)
    187				continue;
    188
    189			pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
    190				q->queue, qpd->is_debug);
    191
    192			retval = pm->pmf->map_queues(pm,
    193						&rl_buffer[rl_wptr],
    194						q,
    195						qpd->is_debug);
    196
    197			if (retval)
    198				return retval;
    199
    200			inc_wptr(&rl_wptr,
    201				pm->pmf->map_queues_size,
    202				alloc_size_bytes);
    203		}
    204	}
    205
    206	pr_debug("Finished map process and queues to runlist\n");
    207
    208	if (is_over_subscription) {
    209		if (!pm->is_over_subscription)
    210			pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
    211		retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
    212					*rl_gpu_addr,
    213					alloc_size_bytes / sizeof(uint32_t),
    214					true);
    215	}
    216	pm->is_over_subscription = is_over_subscription;
    217
    218	for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
    219		pr_debug("0x%2X ", rl_buffer[i]);
    220	pr_debug("\n");
    221
    222	return retval;
    223}
    224
    225int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
    226{
    227	switch (dqm->dev->adev->asic_type) {
    228	case CHIP_KAVERI:
    229	case CHIP_HAWAII:
    230		/* PM4 packet structures on CIK are the same as on VI */
    231	case CHIP_CARRIZO:
    232	case CHIP_TONGA:
    233	case CHIP_FIJI:
    234	case CHIP_POLARIS10:
    235	case CHIP_POLARIS11:
    236	case CHIP_POLARIS12:
    237	case CHIP_VEGAM:
    238		pm->pmf = &kfd_vi_pm_funcs;
    239		break;
    240	default:
    241		if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2))
    242			pm->pmf = &kfd_aldebaran_pm_funcs;
    243		else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
    244			pm->pmf = &kfd_v9_pm_funcs;
    245		else {
    246			WARN(1, "Unexpected ASIC family %u",
    247			     dqm->dev->adev->asic_type);
    248			return -EINVAL;
    249		}
    250	}
    251
    252	pm->dqm = dqm;
    253	mutex_init(&pm->lock);
    254	pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
    255	if (!pm->priv_queue) {
    256		mutex_destroy(&pm->lock);
    257		return -ENOMEM;
    258	}
    259	pm->allocated = false;
    260
    261	return 0;
    262}
    263
    264void pm_uninit(struct packet_manager *pm, bool hanging)
    265{
    266	mutex_destroy(&pm->lock);
    267	kernel_queue_uninit(pm->priv_queue, hanging);
    268	pm->priv_queue = NULL;
    269}
    270
    271int pm_send_set_resources(struct packet_manager *pm,
    272				struct scheduling_resources *res)
    273{
    274	uint32_t *buffer, size;
    275	int retval = 0;
    276
    277	size = pm->pmf->set_resources_size;
    278	mutex_lock(&pm->lock);
    279	kq_acquire_packet_buffer(pm->priv_queue,
    280					size / sizeof(uint32_t),
    281					(unsigned int **)&buffer);
    282	if (!buffer) {
    283		pr_err("Failed to allocate buffer on kernel queue\n");
    284		retval = -ENOMEM;
    285		goto out;
    286	}
    287
    288	retval = pm->pmf->set_resources(pm, buffer, res);
    289	if (!retval)
    290		kq_submit_packet(pm->priv_queue);
    291	else
    292		kq_rollback_packet(pm->priv_queue);
    293
    294out:
    295	mutex_unlock(&pm->lock);
    296
    297	return retval;
    298}
    299
    300int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
    301{
    302	uint64_t rl_gpu_ib_addr;
    303	uint32_t *rl_buffer;
    304	size_t rl_ib_size, packet_size_dwords;
    305	int retval;
    306
    307	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
    308					&rl_ib_size);
    309	if (retval)
    310		goto fail_create_runlist_ib;
    311
    312	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
    313
    314	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
    315	mutex_lock(&pm->lock);
    316
    317	retval = kq_acquire_packet_buffer(pm->priv_queue,
    318					packet_size_dwords, &rl_buffer);
    319	if (retval)
    320		goto fail_acquire_packet_buffer;
    321
    322	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
    323					rl_ib_size / sizeof(uint32_t), false);
    324	if (retval)
    325		goto fail_create_runlist;
    326
    327	kq_submit_packet(pm->priv_queue);
    328
    329	mutex_unlock(&pm->lock);
    330
    331	return retval;
    332
    333fail_create_runlist:
    334	kq_rollback_packet(pm->priv_queue);
    335fail_acquire_packet_buffer:
    336	mutex_unlock(&pm->lock);
    337fail_create_runlist_ib:
    338	pm_release_ib(pm);
    339	return retval;
    340}
    341
    342int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
    343			uint64_t fence_value)
    344{
    345	uint32_t *buffer, size;
    346	int retval = 0;
    347
    348	if (WARN_ON(!fence_address))
    349		return -EFAULT;
    350
    351	size = pm->pmf->query_status_size;
    352	mutex_lock(&pm->lock);
    353	kq_acquire_packet_buffer(pm->priv_queue,
    354			size / sizeof(uint32_t), (unsigned int **)&buffer);
    355	if (!buffer) {
    356		pr_err("Failed to allocate buffer on kernel queue\n");
    357		retval = -ENOMEM;
    358		goto out;
    359	}
    360
    361	retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
    362	if (!retval)
    363		kq_submit_packet(pm->priv_queue);
    364	else
    365		kq_rollback_packet(pm->priv_queue);
    366
    367out:
    368	mutex_unlock(&pm->lock);
    369	return retval;
    370}
    371
    372int pm_send_unmap_queue(struct packet_manager *pm,
    373			enum kfd_unmap_queues_filter filter,
    374			uint32_t filter_param, bool reset)
    375{
    376	uint32_t *buffer, size;
    377	int retval = 0;
    378
    379	size = pm->pmf->unmap_queues_size;
    380	mutex_lock(&pm->lock);
    381	kq_acquire_packet_buffer(pm->priv_queue,
    382			size / sizeof(uint32_t), (unsigned int **)&buffer);
    383	if (!buffer) {
    384		pr_err("Failed to allocate buffer on kernel queue\n");
    385		retval = -ENOMEM;
    386		goto out;
    387	}
    388
    389	retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
    390	if (!retval)
    391		kq_submit_packet(pm->priv_queue);
    392	else
    393		kq_rollback_packet(pm->priv_queue);
    394
    395out:
    396	mutex_unlock(&pm->lock);
    397	return retval;
    398}
    399
    400void pm_release_ib(struct packet_manager *pm)
    401{
    402	mutex_lock(&pm->lock);
    403	if (pm->allocated) {
    404		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
    405		pm->allocated = false;
    406	}
    407	mutex_unlock(&pm->lock);
    408}
    409
    410#if defined(CONFIG_DEBUG_FS)
    411
    412int pm_debugfs_runlist(struct seq_file *m, void *data)
    413{
    414	struct packet_manager *pm = data;
    415
    416	mutex_lock(&pm->lock);
    417
    418	if (!pm->allocated) {
    419		seq_puts(m, "  No active runlist\n");
    420		goto out;
    421	}
    422
    423	seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
    424		     pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
    425
    426out:
    427	mutex_unlock(&pm->lock);
    428	return 0;
    429}
    430
    431int pm_debugfs_hang_hws(struct packet_manager *pm)
    432{
    433	uint32_t *buffer, size;
    434	int r = 0;
    435
    436	if (!pm->priv_queue)
    437		return -EAGAIN;
    438
    439	size = pm->pmf->query_status_size;
    440	mutex_lock(&pm->lock);
    441	kq_acquire_packet_buffer(pm->priv_queue,
    442			size / sizeof(uint32_t), (unsigned int **)&buffer);
    443	if (!buffer) {
    444		pr_err("Failed to allocate buffer on kernel queue\n");
    445		r = -ENOMEM;
    446		goto out;
    447	}
    448	memset(buffer, 0x55, size);
    449	kq_submit_packet(pm->priv_queue);
    450
    451	pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
    452		buffer[0], buffer[1], buffer[2], buffer[3],
    453		buffer[4], buffer[5], buffer[6]);
    454out:
    455	mutex_unlock(&pm->lock);
    456	return r;
    457}
    458
    459
    460#endif