gpu_scheduler.h - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
gpu_scheduler.h (18209B)
      1/*
      2 * Copyright 2015 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#ifndef _DRM_GPU_SCHEDULER_H_
     25#define _DRM_GPU_SCHEDULER_H_
     26
     27#include <drm/spsc_queue.h>
     28#include <linux/dma-fence.h>
     29#include <linux/completion.h>
     30#include <linux/xarray.h>
     31#include <linux/irq_work.h>
     32
     33#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
     34
     35struct drm_gem_object;
     36
     37struct drm_gpu_scheduler;
     38struct drm_sched_rq;
     39
     40/* These are often used as an (initial) index
     41 * to an array, and as such should start at 0.
     42 */
     43enum drm_sched_priority {
     44	DRM_SCHED_PRIORITY_MIN,
     45	DRM_SCHED_PRIORITY_NORMAL,
     46	DRM_SCHED_PRIORITY_HIGH,
     47	DRM_SCHED_PRIORITY_KERNEL,
     48
     49	DRM_SCHED_PRIORITY_COUNT,
     50	DRM_SCHED_PRIORITY_UNSET = -2
     51};
     52
     53/**
     54 * struct drm_sched_entity - A wrapper around a job queue (typically
     55 * attached to the DRM file_priv).
     56 *
     57 * Entities will emit jobs in order to their corresponding hardware
     58 * ring, and the scheduler will alternate between entities based on
     59 * scheduling policy.
     60 */
     61struct drm_sched_entity {
     62	/**
     63	 * @list:
     64	 *
     65	 * Used to append this struct to the list of entities in the runqueue
     66	 * @rq under &drm_sched_rq.entities.
     67	 *
     68	 * Protected by &drm_sched_rq.lock of @rq.
     69	 */
     70	struct list_head		list;
     71
     72	/**
     73	 * @rq:
     74	 *
     75	 * Runqueue on which this entity is currently scheduled.
     76	 *
     77	 * FIXME: Locking is very unclear for this. Writers are protected by
     78	 * @rq_lock, but readers are generally lockless and seem to just race
     79	 * with not even a READ_ONCE.
     80	 */
     81	struct drm_sched_rq		*rq;
     82
     83	/**
     84	 * @sched_list:
     85	 *
     86	 * A list of schedulers (struct drm_gpu_scheduler).  Jobs from this entity can
     87	 * be scheduled on any scheduler on this list.
     88	 *
     89	 * This can be modified by calling drm_sched_entity_modify_sched().
     90	 * Locking is entirely up to the driver, see the above function for more
     91	 * details.
     92	 *
     93	 * This will be set to NULL if &num_sched_list equals 1 and @rq has been
     94	 * set already.
     95	 *
     96	 * FIXME: This means priority changes through
     97	 * drm_sched_entity_set_priority() will be lost henceforth in this case.
     98	 */
     99	struct drm_gpu_scheduler        **sched_list;
    100
    101	/**
    102	 * @num_sched_list:
    103	 *
    104	 * Number of drm_gpu_schedulers in the @sched_list.
    105	 */
    106	unsigned int                    num_sched_list;
    107
    108	/**
    109	 * @priority:
    110	 *
    111	 * Priority of the entity. This can be modified by calling
    112	 * drm_sched_entity_set_priority(). Protected by &rq_lock.
    113	 */
    114	enum drm_sched_priority         priority;
    115
    116	/**
    117	 * @rq_lock:
    118	 *
    119	 * Lock to modify the runqueue to which this entity belongs.
    120	 */
    121	spinlock_t			rq_lock;
    122
    123	/**
    124	 * @job_queue: the list of jobs of this entity.
    125	 */
    126	struct spsc_queue		job_queue;
    127
    128	/**
    129	 * @fence_seq:
    130	 *
    131	 * A linearly increasing seqno incremented with each new
    132	 * &drm_sched_fence which is part of the entity.
    133	 *
    134	 * FIXME: Callers of drm_sched_job_arm() need to ensure correct locking,
    135	 * this doesn't need to be atomic.
    136	 */
    137	atomic_t			fence_seq;
    138
    139	/**
    140	 * @fence_context:
    141	 *
    142	 * A unique context for all the fences which belong to this entity.  The
    143	 * &drm_sched_fence.scheduled uses the fence_context but
    144	 * &drm_sched_fence.finished uses fence_context + 1.
    145	 */
    146	uint64_t			fence_context;
    147
    148	/**
    149	 * @dependency:
    150	 *
    151	 * The dependency fence of the job which is on the top of the job queue.
    152	 */
    153	struct dma_fence		*dependency;
    154
    155	/**
    156	 * @cb:
    157	 *
    158	 * Callback for the dependency fence above.
    159	 */
    160	struct dma_fence_cb		cb;
    161
    162	/**
    163	 * @guilty:
    164	 *
    165	 * Points to entities' guilty.
    166	 */
    167	atomic_t			*guilty;
    168
    169	/**
    170	 * @last_scheduled:
    171	 *
    172	 * Points to the finished fence of the last scheduled job. Only written
    173	 * by the scheduler thread, can be accessed locklessly from
    174	 * drm_sched_job_arm() iff the queue is empty.
    175	 */
    176	struct dma_fence                *last_scheduled;
    177
    178	/**
    179	 * @last_user: last group leader pushing a job into the entity.
    180	 */
    181	struct task_struct		*last_user;
    182
    183	/**
    184	 * @stopped:
    185	 *
    186	 * Marks the enity as removed from rq and destined for
    187	 * termination. This is set by calling drm_sched_entity_flush() and by
    188	 * drm_sched_fini().
    189	 */
    190	bool 				stopped;
    191
    192	/**
    193	 * @entity_idle:
    194	 *
    195	 * Signals when entity is not in use, used to sequence entity cleanup in
    196	 * drm_sched_entity_fini().
    197	 */
    198	struct completion		entity_idle;
    199};
    200
    201/**
    202 * struct drm_sched_rq - queue of entities to be scheduled.
    203 *
    204 * @lock: to modify the entities list.
    205 * @sched: the scheduler to which this rq belongs to.
    206 * @entities: list of the entities to be scheduled.
    207 * @current_entity: the entity which is to be scheduled.
    208 *
    209 * Run queue is a set of entities scheduling command submissions for
    210 * one specific ring. It implements the scheduling policy that selects
    211 * the next entity to emit commands from.
    212 */
    213struct drm_sched_rq {
    214	spinlock_t			lock;
    215	struct drm_gpu_scheduler	*sched;
    216	struct list_head		entities;
    217	struct drm_sched_entity		*current_entity;
    218};
    219
    220/**
    221 * struct drm_sched_fence - fences corresponding to the scheduling of a job.
    222 */
    223struct drm_sched_fence {
    224        /**
    225         * @scheduled: this fence is what will be signaled by the scheduler
    226         * when the job is scheduled.
    227         */
    228	struct dma_fence		scheduled;
    229
    230        /**
    231         * @finished: this fence is what will be signaled by the scheduler
    232         * when the job is completed.
    233         *
    234         * When setting up an out fence for the job, you should use
    235         * this, since it's available immediately upon
    236         * drm_sched_job_init(), and the fence returned by the driver
    237         * from run_job() won't be created until the dependencies have
    238         * resolved.
    239         */
    240	struct dma_fence		finished;
    241
    242        /**
    243         * @parent: the fence returned by &drm_sched_backend_ops.run_job
    244         * when scheduling the job on hardware. We signal the
    245         * &drm_sched_fence.finished fence once parent is signalled.
    246         */
    247	struct dma_fence		*parent;
    248        /**
    249         * @sched: the scheduler instance to which the job having this struct
    250         * belongs to.
    251         */
    252	struct drm_gpu_scheduler	*sched;
    253        /**
    254         * @lock: the lock used by the scheduled and the finished fences.
    255         */
    256	spinlock_t			lock;
    257        /**
    258         * @owner: job owner for debugging
    259         */
    260	void				*owner;
    261};
    262
    263struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
    264
    265/**
    266 * struct drm_sched_job - A job to be run by an entity.
    267 *
    268 * @queue_node: used to append this struct to the queue of jobs in an entity.
    269 * @list: a job participates in a "pending" and "done" lists.
    270 * @sched: the scheduler instance on which this job is scheduled.
    271 * @s_fence: contains the fences for the scheduling of job.
    272 * @finish_cb: the callback for the finished fence.
    273 * @work: Helper to reschdeule job kill to different context.
    274 * @id: a unique id assigned to each job scheduled on the scheduler.
    275 * @karma: increment on every hang caused by this job. If this exceeds the hang
    276 *         limit of the scheduler then the job is marked guilty and will not
    277 *         be scheduled further.
    278 * @s_priority: the priority of the job.
    279 * @entity: the entity to which this job belongs.
    280 * @cb: the callback for the parent fence in s_fence.
    281 *
    282 * A job is created by the driver using drm_sched_job_init(), and
    283 * should call drm_sched_entity_push_job() once it wants the scheduler
    284 * to schedule the job.
    285 */
    286struct drm_sched_job {
    287	struct spsc_node		queue_node;
    288	struct list_head		list;
    289	struct drm_gpu_scheduler	*sched;
    290	struct drm_sched_fence		*s_fence;
    291
    292	/*
    293	 * work is used only after finish_cb has been used and will not be
    294	 * accessed anymore.
    295	 */
    296	union {
    297		struct dma_fence_cb		finish_cb;
    298		struct irq_work 		work;
    299	};
    300
    301	uint64_t			id;
    302	atomic_t			karma;
    303	enum drm_sched_priority		s_priority;
    304	struct drm_sched_entity         *entity;
    305	struct dma_fence_cb		cb;
    306	/**
    307	 * @dependencies:
    308	 *
    309	 * Contains the dependencies as struct dma_fence for this job, see
    310	 * drm_sched_job_add_dependency() and
    311	 * drm_sched_job_add_implicit_dependencies().
    312	 */
    313	struct xarray			dependencies;
    314
    315	/** @last_dependency: tracks @dependencies as they signal */
    316	unsigned long			last_dependency;
    317};
    318
    319static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
    320					    int threshold)
    321{
    322	return s_job && atomic_inc_return(&s_job->karma) > threshold;
    323}
    324
    325enum drm_gpu_sched_stat {
    326	DRM_GPU_SCHED_STAT_NONE, /* Reserve 0 */
    327	DRM_GPU_SCHED_STAT_NOMINAL,
    328	DRM_GPU_SCHED_STAT_ENODEV,
    329};
    330
    331/**
    332 * struct drm_sched_backend_ops
    333 *
    334 * Define the backend operations called by the scheduler,
    335 * these functions should be implemented in driver side.
    336 */
    337struct drm_sched_backend_ops {
    338	/**
    339	 * @dependency:
    340	 *
    341	 * Called when the scheduler is considering scheduling this job next, to
    342	 * get another struct dma_fence for this job to block on.  Once it
    343	 * returns NULL, run_job() may be called.
    344	 *
    345	 * If a driver exclusively uses drm_sched_job_add_dependency() and
    346	 * drm_sched_job_add_implicit_dependencies() this can be ommitted and
    347	 * left as NULL.
    348	 */
    349	struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
    350					struct drm_sched_entity *s_entity);
    351
    352	/**
    353         * @run_job: Called to execute the job once all of the dependencies
    354         * have been resolved.  This may be called multiple times, if
    355	 * timedout_job() has happened and drm_sched_job_recovery()
    356	 * decides to try it again.
    357	 */
    358	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
    359
    360	/**
    361	 * @timedout_job: Called when a job has taken too long to execute,
    362	 * to trigger GPU recovery.
    363	 *
    364	 * This method is called in a workqueue context.
    365	 *
    366	 * Drivers typically issue a reset to recover from GPU hangs, and this
    367	 * procedure usually follows the following workflow:
    368	 *
    369	 * 1. Stop the scheduler using drm_sched_stop(). This will park the
    370	 *    scheduler thread and cancel the timeout work, guaranteeing that
    371	 *    nothing is queued while we reset the hardware queue
    372	 * 2. Try to gracefully stop non-faulty jobs (optional)
    373	 * 3. Issue a GPU reset (driver-specific)
    374	 * 4. Re-submit jobs using drm_sched_resubmit_jobs()
    375	 * 5. Restart the scheduler using drm_sched_start(). At that point, new
    376	 *    jobs can be queued, and the scheduler thread is unblocked
    377	 *
    378	 * Note that some GPUs have distinct hardware queues but need to reset
    379	 * the GPU globally, which requires extra synchronization between the
    380	 * timeout handler of the different &drm_gpu_scheduler. One way to
    381	 * achieve this synchronization is to create an ordered workqueue
    382	 * (using alloc_ordered_workqueue()) at the driver level, and pass this
    383	 * queue to drm_sched_init(), to guarantee that timeout handlers are
    384	 * executed sequentially. The above workflow needs to be slightly
    385	 * adjusted in that case:
    386	 *
    387	 * 1. Stop all schedulers impacted by the reset using drm_sched_stop()
    388	 * 2. Try to gracefully stop non-faulty jobs on all queues impacted by
    389	 *    the reset (optional)
    390	 * 3. Issue a GPU reset on all faulty queues (driver-specific)
    391	 * 4. Re-submit jobs on all schedulers impacted by the reset using
    392	 *    drm_sched_resubmit_jobs()
    393	 * 5. Restart all schedulers that were stopped in step #1 using
    394	 *    drm_sched_start()
    395	 *
    396	 * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
    397	 * and the underlying driver has started or completed recovery.
    398	 *
    399	 * Return DRM_GPU_SCHED_STAT_ENODEV, if the device is no longer
    400	 * available, i.e. has been unplugged.
    401	 */
    402	enum drm_gpu_sched_stat (*timedout_job)(struct drm_sched_job *sched_job);
    403
    404	/**
    405         * @free_job: Called once the job's finished fence has been signaled
    406         * and it's time to clean it up.
    407	 */
    408	void (*free_job)(struct drm_sched_job *sched_job);
    409};
    410
    411/**
    412 * struct drm_gpu_scheduler
    413 *
    414 * @ops: backend operations provided by the driver.
    415 * @hw_submission_limit: the max size of the hardware queue.
    416 * @timeout: the time after which a job is removed from the scheduler.
    417 * @name: name of the ring for which this scheduler is being used.
    418 * @sched_rq: priority wise array of run queues.
    419 * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
    420 *                  is ready to be scheduled.
    421 * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
    422 *                 waits on this wait queue until all the scheduled jobs are
    423 *                 finished.
    424 * @hw_rq_count: the number of jobs currently in the hardware queue.
    425 * @job_id_count: used to assign unique id to the each job.
    426 * @timeout_wq: workqueue used to queue @work_tdr
    427 * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
    428 *            timeout interval is over.
    429 * @thread: the kthread on which the scheduler which run.
    430 * @pending_list: the list of jobs which are currently in the job queue.
    431 * @job_list_lock: lock to protect the pending_list.
    432 * @hang_limit: once the hangs by a job crosses this limit then it is marked
    433 *              guilty and it will no longer be considered for scheduling.
    434 * @score: score to help loadbalancer pick a idle sched
    435 * @_score: score used when the driver doesn't provide one
    436 * @ready: marks if the underlying HW is ready to work
    437 * @free_guilty: A hit to time out handler to free the guilty job.
    438 *
    439 * One scheduler is implemented for each hardware ring.
    440 */
    441struct drm_gpu_scheduler {
    442	const struct drm_sched_backend_ops	*ops;
    443	uint32_t			hw_submission_limit;
    444	long				timeout;
    445	const char			*name;
    446	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_COUNT];
    447	wait_queue_head_t		wake_up_worker;
    448	wait_queue_head_t		job_scheduled;
    449	atomic_t			hw_rq_count;
    450	atomic64_t			job_id_count;
    451	struct workqueue_struct		*timeout_wq;
    452	struct delayed_work		work_tdr;
    453	struct task_struct		*thread;
    454	struct list_head		pending_list;
    455	spinlock_t			job_list_lock;
    456	int				hang_limit;
    457	atomic_t                        *score;
    458	atomic_t                        _score;
    459	bool				ready;
    460	bool				free_guilty;
    461	struct device			*dev;
    462};
    463
    464int drm_sched_init(struct drm_gpu_scheduler *sched,
    465		   const struct drm_sched_backend_ops *ops,
    466		   uint32_t hw_submission, unsigned hang_limit,
    467		   long timeout, struct workqueue_struct *timeout_wq,
    468		   atomic_t *score, const char *name, struct device *dev);
    469
    470void drm_sched_fini(struct drm_gpu_scheduler *sched);
    471int drm_sched_job_init(struct drm_sched_job *job,
    472		       struct drm_sched_entity *entity,
    473		       void *owner);
    474void drm_sched_job_arm(struct drm_sched_job *job);
    475int drm_sched_job_add_dependency(struct drm_sched_job *job,
    476				 struct dma_fence *fence);
    477int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
    478					    struct drm_gem_object *obj,
    479					    bool write);
    480
    481
    482void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
    483				    struct drm_gpu_scheduler **sched_list,
    484                                   unsigned int num_sched_list);
    485
    486void drm_sched_job_cleanup(struct drm_sched_job *job);
    487void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
    488void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
    489void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
    490void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
    491void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max);
    492void drm_sched_increase_karma(struct drm_sched_job *bad);
    493void drm_sched_reset_karma(struct drm_sched_job *bad);
    494void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type);
    495bool drm_sched_dependency_optimized(struct dma_fence* fence,
    496				    struct drm_sched_entity *entity);
    497void drm_sched_fault(struct drm_gpu_scheduler *sched);
    498void drm_sched_job_kickout(struct drm_sched_job *s_job);
    499
    500void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
    501			     struct drm_sched_entity *entity);
    502void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
    503				struct drm_sched_entity *entity);
    504
    505int drm_sched_entity_init(struct drm_sched_entity *entity,
    506			  enum drm_sched_priority priority,
    507			  struct drm_gpu_scheduler **sched_list,
    508			  unsigned int num_sched_list,
    509			  atomic_t *guilty);
    510long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
    511void drm_sched_entity_fini(struct drm_sched_entity *entity);
    512void drm_sched_entity_destroy(struct drm_sched_entity *entity);
    513void drm_sched_entity_select_rq(struct drm_sched_entity *entity);
    514struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity);
    515void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
    516void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
    517				   enum drm_sched_priority priority);
    518bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
    519
    520struct drm_sched_fence *drm_sched_fence_alloc(
    521	struct drm_sched_entity *s_entity, void *owner);
    522void drm_sched_fence_init(struct drm_sched_fence *fence,
    523			  struct drm_sched_entity *entity);
    524void drm_sched_fence_free(struct drm_sched_fence *fence);
    525
    526void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
    527void drm_sched_fence_finished(struct drm_sched_fence *fence);
    528
    529unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
    530void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
    531		                unsigned long remaining);
    532struct drm_gpu_scheduler *
    533drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
    534		     unsigned int num_sched_list);
    535
    536#endif