v3d_sched.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
v3d_sched.c (11898B)
      1// SPDX-License-Identifier: GPL-2.0+
      2/* Copyright (C) 2018 Broadcom */
      3
      4/**
      5 * DOC: Broadcom V3D scheduling
      6 *
      7 * The shared DRM GPU scheduler is used to coordinate submitting jobs
      8 * to the hardware.  Each DRM fd (roughly a client process) gets its
      9 * own scheduler entity, which will process jobs in order.  The GPU
     10 * scheduler will round-robin between clients to submit the next job.
     11 *
     12 * For simplicity, and in order to keep latency low for interactive
     13 * jobs when bulk background jobs are queued up, we submit a new job
     14 * to the HW only when it has completed the last one, instead of
     15 * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
     16 * drm_sched_job_add_dependency() to manage the dependency between bin and
     17 * render, instead of having the clients submit jobs using the HW's
     18 * semaphores to interlock between them.
     19 */
     20
     21#include <linux/kthread.h>
     22
     23#include "v3d_drv.h"
     24#include "v3d_regs.h"
     25#include "v3d_trace.h"
     26
     27static struct v3d_job *
     28to_v3d_job(struct drm_sched_job *sched_job)
     29{
     30	return container_of(sched_job, struct v3d_job, base);
     31}
     32
     33static struct v3d_bin_job *
     34to_bin_job(struct drm_sched_job *sched_job)
     35{
     36	return container_of(sched_job, struct v3d_bin_job, base.base);
     37}
     38
     39static struct v3d_render_job *
     40to_render_job(struct drm_sched_job *sched_job)
     41{
     42	return container_of(sched_job, struct v3d_render_job, base.base);
     43}
     44
     45static struct v3d_tfu_job *
     46to_tfu_job(struct drm_sched_job *sched_job)
     47{
     48	return container_of(sched_job, struct v3d_tfu_job, base.base);
     49}
     50
     51static struct v3d_csd_job *
     52to_csd_job(struct drm_sched_job *sched_job)
     53{
     54	return container_of(sched_job, struct v3d_csd_job, base.base);
     55}
     56
     57static void
     58v3d_sched_job_free(struct drm_sched_job *sched_job)
     59{
     60	struct v3d_job *job = to_v3d_job(sched_job);
     61
     62	v3d_job_cleanup(job);
     63}
     64
     65static void
     66v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
     67{
     68	if (job->perfmon != v3d->active_perfmon)
     69		v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
     70
     71	if (job->perfmon && v3d->active_perfmon != job->perfmon)
     72		v3d_perfmon_start(v3d, job->perfmon);
     73}
     74
     75static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
     76{
     77	struct v3d_bin_job *job = to_bin_job(sched_job);
     78	struct v3d_dev *v3d = job->base.v3d;
     79	struct drm_device *dev = &v3d->drm;
     80	struct dma_fence *fence;
     81	unsigned long irqflags;
     82
     83	if (unlikely(job->base.base.s_fence->finished.error))
     84		return NULL;
     85
     86	/* Lock required around bin_job update vs
     87	 * v3d_overflow_mem_work().
     88	 */
     89	spin_lock_irqsave(&v3d->job_lock, irqflags);
     90	v3d->bin_job = job;
     91	/* Clear out the overflow allocation, so we don't
     92	 * reuse the overflow attached to a previous job.
     93	 */
     94	V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
     95	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
     96
     97	v3d_invalidate_caches(v3d);
     98
     99	fence = v3d_fence_create(v3d, V3D_BIN);
    100	if (IS_ERR(fence))
    101		return NULL;
    102
    103	if (job->base.irq_fence)
    104		dma_fence_put(job->base.irq_fence);
    105	job->base.irq_fence = dma_fence_get(fence);
    106
    107	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
    108			    job->start, job->end);
    109
    110	v3d_switch_perfmon(v3d, &job->base);
    111
    112	/* Set the current and end address of the control list.
    113	 * Writing the end register is what starts the job.
    114	 */
    115	if (job->qma) {
    116		V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
    117		V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
    118	}
    119	if (job->qts) {
    120		V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
    121			       V3D_CLE_CT0QTS_ENABLE |
    122			       job->qts);
    123	}
    124	V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
    125	V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
    126
    127	return fence;
    128}
    129
    130static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
    131{
    132	struct v3d_render_job *job = to_render_job(sched_job);
    133	struct v3d_dev *v3d = job->base.v3d;
    134	struct drm_device *dev = &v3d->drm;
    135	struct dma_fence *fence;
    136
    137	if (unlikely(job->base.base.s_fence->finished.error))
    138		return NULL;
    139
    140	v3d->render_job = job;
    141
    142	/* Can we avoid this flush?  We need to be careful of
    143	 * scheduling, though -- imagine job0 rendering to texture and
    144	 * job1 reading, and them being executed as bin0, bin1,
    145	 * render0, render1, so that render1's flush at bin time
    146	 * wasn't enough.
    147	 */
    148	v3d_invalidate_caches(v3d);
    149
    150	fence = v3d_fence_create(v3d, V3D_RENDER);
    151	if (IS_ERR(fence))
    152		return NULL;
    153
    154	if (job->base.irq_fence)
    155		dma_fence_put(job->base.irq_fence);
    156	job->base.irq_fence = dma_fence_get(fence);
    157
    158	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
    159			    job->start, job->end);
    160
    161	v3d_switch_perfmon(v3d, &job->base);
    162
    163	/* XXX: Set the QCFG */
    164
    165	/* Set the current and end address of the control list.
    166	 * Writing the end register is what starts the job.
    167	 */
    168	V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
    169	V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
    170
    171	return fence;
    172}
    173
    174static struct dma_fence *
    175v3d_tfu_job_run(struct drm_sched_job *sched_job)
    176{
    177	struct v3d_tfu_job *job = to_tfu_job(sched_job);
    178	struct v3d_dev *v3d = job->base.v3d;
    179	struct drm_device *dev = &v3d->drm;
    180	struct dma_fence *fence;
    181
    182	fence = v3d_fence_create(v3d, V3D_TFU);
    183	if (IS_ERR(fence))
    184		return NULL;
    185
    186	v3d->tfu_job = job;
    187	if (job->base.irq_fence)
    188		dma_fence_put(job->base.irq_fence);
    189	job->base.irq_fence = dma_fence_get(fence);
    190
    191	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
    192
    193	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
    194	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
    195	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
    196	V3D_WRITE(V3D_TFU_IUA, job->args.iua);
    197	V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
    198	V3D_WRITE(V3D_TFU_IOS, job->args.ios);
    199	V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
    200	if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
    201		V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
    202		V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
    203		V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
    204	}
    205	/* ICFG kicks off the job. */
    206	V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
    207
    208	return fence;
    209}
    210
    211static struct dma_fence *
    212v3d_csd_job_run(struct drm_sched_job *sched_job)
    213{
    214	struct v3d_csd_job *job = to_csd_job(sched_job);
    215	struct v3d_dev *v3d = job->base.v3d;
    216	struct drm_device *dev = &v3d->drm;
    217	struct dma_fence *fence;
    218	int i;
    219
    220	v3d->csd_job = job;
    221
    222	v3d_invalidate_caches(v3d);
    223
    224	fence = v3d_fence_create(v3d, V3D_CSD);
    225	if (IS_ERR(fence))
    226		return NULL;
    227
    228	if (job->base.irq_fence)
    229		dma_fence_put(job->base.irq_fence);
    230	job->base.irq_fence = dma_fence_get(fence);
    231
    232	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
    233
    234	v3d_switch_perfmon(v3d, &job->base);
    235
    236	for (i = 1; i <= 6; i++)
    237		V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
    238	/* CFG0 write kicks off the job. */
    239	V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
    240
    241	return fence;
    242}
    243
    244static struct dma_fence *
    245v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
    246{
    247	struct v3d_job *job = to_v3d_job(sched_job);
    248	struct v3d_dev *v3d = job->v3d;
    249
    250	v3d_clean_caches(v3d);
    251
    252	return NULL;
    253}
    254
    255static enum drm_gpu_sched_stat
    256v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
    257{
    258	enum v3d_queue q;
    259
    260	mutex_lock(&v3d->reset_lock);
    261
    262	/* block scheduler */
    263	for (q = 0; q < V3D_MAX_QUEUES; q++)
    264		drm_sched_stop(&v3d->queue[q].sched, sched_job);
    265
    266	if (sched_job)
    267		drm_sched_increase_karma(sched_job);
    268
    269	/* get the GPU back into the init state */
    270	v3d_reset(v3d);
    271
    272	for (q = 0; q < V3D_MAX_QUEUES; q++)
    273		drm_sched_resubmit_jobs(&v3d->queue[q].sched);
    274
    275	/* Unblock schedulers and restart their jobs. */
    276	for (q = 0; q < V3D_MAX_QUEUES; q++) {
    277		drm_sched_start(&v3d->queue[q].sched, true);
    278	}
    279
    280	mutex_unlock(&v3d->reset_lock);
    281
    282	return DRM_GPU_SCHED_STAT_NOMINAL;
    283}
    284
    285/* If the current address or return address have changed, then the GPU
    286 * has probably made progress and we should delay the reset.  This
    287 * could fail if the GPU got in an infinite loop in the CL, but that
    288 * is pretty unlikely outside of an i-g-t testcase.
    289 */
    290static enum drm_gpu_sched_stat
    291v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
    292		    u32 *timedout_ctca, u32 *timedout_ctra)
    293{
    294	struct v3d_job *job = to_v3d_job(sched_job);
    295	struct v3d_dev *v3d = job->v3d;
    296	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
    297	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
    298
    299	if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
    300		*timedout_ctca = ctca;
    301		*timedout_ctra = ctra;
    302		return DRM_GPU_SCHED_STAT_NOMINAL;
    303	}
    304
    305	return v3d_gpu_reset_for_timeout(v3d, sched_job);
    306}
    307
    308static enum drm_gpu_sched_stat
    309v3d_bin_job_timedout(struct drm_sched_job *sched_job)
    310{
    311	struct v3d_bin_job *job = to_bin_job(sched_job);
    312
    313	return v3d_cl_job_timedout(sched_job, V3D_BIN,
    314				   &job->timedout_ctca, &job->timedout_ctra);
    315}
    316
    317static enum drm_gpu_sched_stat
    318v3d_render_job_timedout(struct drm_sched_job *sched_job)
    319{
    320	struct v3d_render_job *job = to_render_job(sched_job);
    321
    322	return v3d_cl_job_timedout(sched_job, V3D_RENDER,
    323				   &job->timedout_ctca, &job->timedout_ctra);
    324}
    325
    326static enum drm_gpu_sched_stat
    327v3d_generic_job_timedout(struct drm_sched_job *sched_job)
    328{
    329	struct v3d_job *job = to_v3d_job(sched_job);
    330
    331	return v3d_gpu_reset_for_timeout(job->v3d, sched_job);
    332}
    333
    334static enum drm_gpu_sched_stat
    335v3d_csd_job_timedout(struct drm_sched_job *sched_job)
    336{
    337	struct v3d_csd_job *job = to_csd_job(sched_job);
    338	struct v3d_dev *v3d = job->base.v3d;
    339	u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
    340
    341	/* If we've made progress, skip reset and let the timer get
    342	 * rearmed.
    343	 */
    344	if (job->timedout_batches != batches) {
    345		job->timedout_batches = batches;
    346		return DRM_GPU_SCHED_STAT_NOMINAL;
    347	}
    348
    349	return v3d_gpu_reset_for_timeout(v3d, sched_job);
    350}
    351
    352static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
    353	.run_job = v3d_bin_job_run,
    354	.timedout_job = v3d_bin_job_timedout,
    355	.free_job = v3d_sched_job_free,
    356};
    357
    358static const struct drm_sched_backend_ops v3d_render_sched_ops = {
    359	.run_job = v3d_render_job_run,
    360	.timedout_job = v3d_render_job_timedout,
    361	.free_job = v3d_sched_job_free,
    362};
    363
    364static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
    365	.run_job = v3d_tfu_job_run,
    366	.timedout_job = v3d_generic_job_timedout,
    367	.free_job = v3d_sched_job_free,
    368};
    369
    370static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
    371	.run_job = v3d_csd_job_run,
    372	.timedout_job = v3d_csd_job_timedout,
    373	.free_job = v3d_sched_job_free
    374};
    375
    376static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
    377	.run_job = v3d_cache_clean_job_run,
    378	.timedout_job = v3d_generic_job_timedout,
    379	.free_job = v3d_sched_job_free
    380};
    381
    382int
    383v3d_sched_init(struct v3d_dev *v3d)
    384{
    385	int hw_jobs_limit = 1;
    386	int job_hang_limit = 0;
    387	int hang_limit_ms = 500;
    388	int ret;
    389
    390	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
    391			     &v3d_bin_sched_ops,
    392			     hw_jobs_limit, job_hang_limit,
    393			     msecs_to_jiffies(hang_limit_ms), NULL,
    394			     NULL, "v3d_bin", v3d->drm.dev);
    395	if (ret)
    396		return ret;
    397
    398	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
    399			     &v3d_render_sched_ops,
    400			     hw_jobs_limit, job_hang_limit,
    401			     msecs_to_jiffies(hang_limit_ms), NULL,
    402			     NULL, "v3d_render", v3d->drm.dev);
    403	if (ret)
    404		goto fail;
    405
    406	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
    407			     &v3d_tfu_sched_ops,
    408			     hw_jobs_limit, job_hang_limit,
    409			     msecs_to_jiffies(hang_limit_ms), NULL,
    410			     NULL, "v3d_tfu", v3d->drm.dev);
    411	if (ret)
    412		goto fail;
    413
    414	if (v3d_has_csd(v3d)) {
    415		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
    416				     &v3d_csd_sched_ops,
    417				     hw_jobs_limit, job_hang_limit,
    418				     msecs_to_jiffies(hang_limit_ms), NULL,
    419				     NULL, "v3d_csd", v3d->drm.dev);
    420		if (ret)
    421			goto fail;
    422
    423		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
    424				     &v3d_cache_clean_sched_ops,
    425				     hw_jobs_limit, job_hang_limit,
    426				     msecs_to_jiffies(hang_limit_ms), NULL,
    427				     NULL, "v3d_cache_clean", v3d->drm.dev);
    428		if (ret)
    429			goto fail;
    430	}
    431
    432	return 0;
    433
    434fail:
    435	v3d_sched_fini(v3d);
    436	return ret;
    437}
    438
    439void
    440v3d_sched_fini(struct v3d_dev *v3d)
    441{
    442	enum v3d_queue q;
    443
    444	for (q = 0; q < V3D_MAX_QUEUES; q++) {
    445		if (v3d->queue[q].sched.ready)
    446			drm_sched_fini(&v3d->queue[q].sched);
    447	}
    448}