cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cpufreq_schedutil.c (23980B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * CPUFreq governor based on scheduler-provided CPU utilization data.
      4 *
      5 * Copyright (C) 2016, Intel Corporation
      6 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
      7 */
      8
      9#define IOWAIT_BOOST_MIN	(SCHED_CAPACITY_SCALE / 8)
     10
     11struct sugov_tunables {
     12	struct gov_attr_set	attr_set;
     13	unsigned int		rate_limit_us;
     14};
     15
     16struct sugov_policy {
     17	struct cpufreq_policy	*policy;
     18
     19	struct sugov_tunables	*tunables;
     20	struct list_head	tunables_hook;
     21
     22	raw_spinlock_t		update_lock;
     23	u64			last_freq_update_time;
     24	s64			freq_update_delay_ns;
     25	unsigned int		next_freq;
     26	unsigned int		cached_raw_freq;
     27
     28	/* The next fields are only needed if fast switch cannot be used: */
     29	struct			irq_work irq_work;
     30	struct			kthread_work work;
     31	struct			mutex work_lock;
     32	struct			kthread_worker worker;
     33	struct task_struct	*thread;
     34	bool			work_in_progress;
     35
     36	bool			limits_changed;
     37	bool			need_freq_update;
     38};
     39
     40struct sugov_cpu {
     41	struct update_util_data	update_util;
     42	struct sugov_policy	*sg_policy;
     43	unsigned int		cpu;
     44
     45	bool			iowait_boost_pending;
     46	unsigned int		iowait_boost;
     47	u64			last_update;
     48
     49	unsigned long		util;
     50	unsigned long		bw_dl;
     51	unsigned long		max;
     52
     53	/* The field below is for single-CPU policies only: */
     54#ifdef CONFIG_NO_HZ_COMMON
     55	unsigned long		saved_idle_calls;
     56#endif
     57};
     58
     59static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
     60
     61/************************ Governor internals ***********************/
     62
     63static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
     64{
     65	s64 delta_ns;
     66
     67	/*
     68	 * Since cpufreq_update_util() is called with rq->lock held for
     69	 * the @target_cpu, our per-CPU data is fully serialized.
     70	 *
     71	 * However, drivers cannot in general deal with cross-CPU
     72	 * requests, so while get_next_freq() will work, our
     73	 * sugov_update_commit() call may not for the fast switching platforms.
     74	 *
     75	 * Hence stop here for remote requests if they aren't supported
     76	 * by the hardware, as calculating the frequency is pointless if
     77	 * we cannot in fact act on it.
     78	 *
     79	 * This is needed on the slow switching platforms too to prevent CPUs
     80	 * going offline from leaving stale IRQ work items behind.
     81	 */
     82	if (!cpufreq_this_cpu_can_update(sg_policy->policy))
     83		return false;
     84
     85	if (unlikely(sg_policy->limits_changed)) {
     86		sg_policy->limits_changed = false;
     87		sg_policy->need_freq_update = true;
     88		return true;
     89	}
     90
     91	delta_ns = time - sg_policy->last_freq_update_time;
     92
     93	return delta_ns >= sg_policy->freq_update_delay_ns;
     94}
     95
     96static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
     97				   unsigned int next_freq)
     98{
     99	if (sg_policy->need_freq_update)
    100		sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
    101	else if (sg_policy->next_freq == next_freq)
    102		return false;
    103
    104	sg_policy->next_freq = next_freq;
    105	sg_policy->last_freq_update_time = time;
    106
    107	return true;
    108}
    109
    110static void sugov_deferred_update(struct sugov_policy *sg_policy)
    111{
    112	if (!sg_policy->work_in_progress) {
    113		sg_policy->work_in_progress = true;
    114		irq_work_queue(&sg_policy->irq_work);
    115	}
    116}
    117
    118/**
    119 * get_next_freq - Compute a new frequency for a given cpufreq policy.
    120 * @sg_policy: schedutil policy object to compute the new frequency for.
    121 * @util: Current CPU utilization.
    122 * @max: CPU capacity.
    123 *
    124 * If the utilization is frequency-invariant, choose the new frequency to be
    125 * proportional to it, that is
    126 *
    127 * next_freq = C * max_freq * util / max
    128 *
    129 * Otherwise, approximate the would-be frequency-invariant utilization by
    130 * util_raw * (curr_freq / max_freq) which leads to
    131 *
    132 * next_freq = C * curr_freq * util_raw / max
    133 *
    134 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
    135 *
    136 * The lowest driver-supported frequency which is equal or greater than the raw
    137 * next_freq (as calculated above) is returned, subject to policy min/max and
    138 * cpufreq driver limitations.
    139 */
    140static unsigned int get_next_freq(struct sugov_policy *sg_policy,
    141				  unsigned long util, unsigned long max)
    142{
    143	struct cpufreq_policy *policy = sg_policy->policy;
    144	unsigned int freq = arch_scale_freq_invariant() ?
    145				policy->cpuinfo.max_freq : policy->cur;
    146
    147	util = map_util_perf(util);
    148	freq = map_util_freq(util, freq, max);
    149
    150	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
    151		return sg_policy->next_freq;
    152
    153	sg_policy->cached_raw_freq = freq;
    154	return cpufreq_driver_resolve_freq(policy, freq);
    155}
    156
    157static void sugov_get_util(struct sugov_cpu *sg_cpu)
    158{
    159	struct rq *rq = cpu_rq(sg_cpu->cpu);
    160	unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
    161
    162	sg_cpu->max = max;
    163	sg_cpu->bw_dl = cpu_bw_dl(rq);
    164	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max,
    165					  FREQUENCY_UTIL, NULL);
    166}
    167
    168/**
    169 * sugov_iowait_reset() - Reset the IO boost status of a CPU.
    170 * @sg_cpu: the sugov data for the CPU to boost
    171 * @time: the update time from the caller
    172 * @set_iowait_boost: true if an IO boost has been requested
    173 *
    174 * The IO wait boost of a task is disabled after a tick since the last update
    175 * of a CPU. If a new IO wait boost is requested after more then a tick, then
    176 * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
    177 * efficiency by ignoring sporadic wakeups from IO.
    178 */
    179static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
    180			       bool set_iowait_boost)
    181{
    182	s64 delta_ns = time - sg_cpu->last_update;
    183
    184	/* Reset boost only if a tick has elapsed since last request */
    185	if (delta_ns <= TICK_NSEC)
    186		return false;
    187
    188	sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
    189	sg_cpu->iowait_boost_pending = set_iowait_boost;
    190
    191	return true;
    192}
    193
    194/**
    195 * sugov_iowait_boost() - Updates the IO boost status of a CPU.
    196 * @sg_cpu: the sugov data for the CPU to boost
    197 * @time: the update time from the caller
    198 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
    199 *
    200 * Each time a task wakes up after an IO operation, the CPU utilization can be
    201 * boosted to a certain utilization which doubles at each "frequent and
    202 * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
    203 * of the maximum OPP.
    204 *
    205 * To keep doubling, an IO boost has to be requested at least once per tick,
    206 * otherwise we restart from the utilization of the minimum OPP.
    207 */
    208static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
    209			       unsigned int flags)
    210{
    211	bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
    212
    213	/* Reset boost if the CPU appears to have been idle enough */
    214	if (sg_cpu->iowait_boost &&
    215	    sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
    216		return;
    217
    218	/* Boost only tasks waking up after IO */
    219	if (!set_iowait_boost)
    220		return;
    221
    222	/* Ensure boost doubles only one time at each request */
    223	if (sg_cpu->iowait_boost_pending)
    224		return;
    225	sg_cpu->iowait_boost_pending = true;
    226
    227	/* Double the boost at each request */
    228	if (sg_cpu->iowait_boost) {
    229		sg_cpu->iowait_boost =
    230			min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
    231		return;
    232	}
    233
    234	/* First wakeup after IO: start with minimum boost */
    235	sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
    236}
    237
    238/**
    239 * sugov_iowait_apply() - Apply the IO boost to a CPU.
    240 * @sg_cpu: the sugov data for the cpu to boost
    241 * @time: the update time from the caller
    242 *
    243 * A CPU running a task which woken up after an IO operation can have its
    244 * utilization boosted to speed up the completion of those IO operations.
    245 * The IO boost value is increased each time a task wakes up from IO, in
    246 * sugov_iowait_apply(), and it's instead decreased by this function,
    247 * each time an increase has not been requested (!iowait_boost_pending).
    248 *
    249 * A CPU which also appears to have been idle for at least one tick has also
    250 * its IO boost utilization reset.
    251 *
    252 * This mechanism is designed to boost high frequently IO waiting tasks, while
    253 * being more conservative on tasks which does sporadic IO operations.
    254 */
    255static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
    256{
    257	unsigned long boost;
    258
    259	/* No boost currently required */
    260	if (!sg_cpu->iowait_boost)
    261		return;
    262
    263	/* Reset boost if the CPU appears to have been idle enough */
    264	if (sugov_iowait_reset(sg_cpu, time, false))
    265		return;
    266
    267	if (!sg_cpu->iowait_boost_pending) {
    268		/*
    269		 * No boost pending; reduce the boost value.
    270		 */
    271		sg_cpu->iowait_boost >>= 1;
    272		if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
    273			sg_cpu->iowait_boost = 0;
    274			return;
    275		}
    276	}
    277
    278	sg_cpu->iowait_boost_pending = false;
    279
    280	/*
    281	 * sg_cpu->util is already in capacity scale; convert iowait_boost
    282	 * into the same scale so we can compare.
    283	 */
    284	boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
    285	boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
    286	if (sg_cpu->util < boost)
    287		sg_cpu->util = boost;
    288}
    289
    290#ifdef CONFIG_NO_HZ_COMMON
    291static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
    292{
    293	unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
    294	bool ret = idle_calls == sg_cpu->saved_idle_calls;
    295
    296	sg_cpu->saved_idle_calls = idle_calls;
    297	return ret;
    298}
    299#else
    300static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
    301#endif /* CONFIG_NO_HZ_COMMON */
    302
    303/*
    304 * Make sugov_should_update_freq() ignore the rate limit when DL
    305 * has increased the utilization.
    306 */
    307static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
    308{
    309	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
    310		sg_cpu->sg_policy->limits_changed = true;
    311}
    312
    313static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
    314					      u64 time, unsigned int flags)
    315{
    316	sugov_iowait_boost(sg_cpu, time, flags);
    317	sg_cpu->last_update = time;
    318
    319	ignore_dl_rate_limit(sg_cpu);
    320
    321	if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
    322		return false;
    323
    324	sugov_get_util(sg_cpu);
    325	sugov_iowait_apply(sg_cpu, time);
    326
    327	return true;
    328}
    329
    330static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
    331				     unsigned int flags)
    332{
    333	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
    334	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
    335	unsigned int cached_freq = sg_policy->cached_raw_freq;
    336	unsigned int next_f;
    337
    338	if (!sugov_update_single_common(sg_cpu, time, flags))
    339		return;
    340
    341	next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
    342	/*
    343	 * Do not reduce the frequency if the CPU has not been idle
    344	 * recently, as the reduction is likely to be premature then.
    345	 *
    346	 * Except when the rq is capped by uclamp_max.
    347	 */
    348	if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
    349	    sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
    350		next_f = sg_policy->next_freq;
    351
    352		/* Restore cached freq as next_freq has changed */
    353		sg_policy->cached_raw_freq = cached_freq;
    354	}
    355
    356	if (!sugov_update_next_freq(sg_policy, time, next_f))
    357		return;
    358
    359	/*
    360	 * This code runs under rq->lock for the target CPU, so it won't run
    361	 * concurrently on two different CPUs for the same target and it is not
    362	 * necessary to acquire the lock in the fast switch case.
    363	 */
    364	if (sg_policy->policy->fast_switch_enabled) {
    365		cpufreq_driver_fast_switch(sg_policy->policy, next_f);
    366	} else {
    367		raw_spin_lock(&sg_policy->update_lock);
    368		sugov_deferred_update(sg_policy);
    369		raw_spin_unlock(&sg_policy->update_lock);
    370	}
    371}
    372
    373static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
    374				     unsigned int flags)
    375{
    376	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
    377	unsigned long prev_util = sg_cpu->util;
    378
    379	/*
    380	 * Fall back to the "frequency" path if frequency invariance is not
    381	 * supported, because the direct mapping between the utilization and
    382	 * the performance levels depends on the frequency invariance.
    383	 */
    384	if (!arch_scale_freq_invariant()) {
    385		sugov_update_single_freq(hook, time, flags);
    386		return;
    387	}
    388
    389	if (!sugov_update_single_common(sg_cpu, time, flags))
    390		return;
    391
    392	/*
    393	 * Do not reduce the target performance level if the CPU has not been
    394	 * idle recently, as the reduction is likely to be premature then.
    395	 *
    396	 * Except when the rq is capped by uclamp_max.
    397	 */
    398	if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
    399	    sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
    400		sg_cpu->util = prev_util;
    401
    402	cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
    403				   map_util_perf(sg_cpu->util), sg_cpu->max);
    404
    405	sg_cpu->sg_policy->last_freq_update_time = time;
    406}
    407
    408static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
    409{
    410	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
    411	struct cpufreq_policy *policy = sg_policy->policy;
    412	unsigned long util = 0, max = 1;
    413	unsigned int j;
    414
    415	for_each_cpu(j, policy->cpus) {
    416		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
    417		unsigned long j_util, j_max;
    418
    419		sugov_get_util(j_sg_cpu);
    420		sugov_iowait_apply(j_sg_cpu, time);
    421		j_util = j_sg_cpu->util;
    422		j_max = j_sg_cpu->max;
    423
    424		if (j_util * max > j_max * util) {
    425			util = j_util;
    426			max = j_max;
    427		}
    428	}
    429
    430	return get_next_freq(sg_policy, util, max);
    431}
    432
    433static void
    434sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
    435{
    436	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
    437	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
    438	unsigned int next_f;
    439
    440	raw_spin_lock(&sg_policy->update_lock);
    441
    442	sugov_iowait_boost(sg_cpu, time, flags);
    443	sg_cpu->last_update = time;
    444
    445	ignore_dl_rate_limit(sg_cpu);
    446
    447	if (sugov_should_update_freq(sg_policy, time)) {
    448		next_f = sugov_next_freq_shared(sg_cpu, time);
    449
    450		if (!sugov_update_next_freq(sg_policy, time, next_f))
    451			goto unlock;
    452
    453		if (sg_policy->policy->fast_switch_enabled)
    454			cpufreq_driver_fast_switch(sg_policy->policy, next_f);
    455		else
    456			sugov_deferred_update(sg_policy);
    457	}
    458unlock:
    459	raw_spin_unlock(&sg_policy->update_lock);
    460}
    461
    462static void sugov_work(struct kthread_work *work)
    463{
    464	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
    465	unsigned int freq;
    466	unsigned long flags;
    467
    468	/*
    469	 * Hold sg_policy->update_lock shortly to handle the case where:
    470	 * in case sg_policy->next_freq is read here, and then updated by
    471	 * sugov_deferred_update() just before work_in_progress is set to false
    472	 * here, we may miss queueing the new update.
    473	 *
    474	 * Note: If a work was queued after the update_lock is released,
    475	 * sugov_work() will just be called again by kthread_work code; and the
    476	 * request will be proceed before the sugov thread sleeps.
    477	 */
    478	raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
    479	freq = sg_policy->next_freq;
    480	sg_policy->work_in_progress = false;
    481	raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
    482
    483	mutex_lock(&sg_policy->work_lock);
    484	__cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L);
    485	mutex_unlock(&sg_policy->work_lock);
    486}
    487
    488static void sugov_irq_work(struct irq_work *irq_work)
    489{
    490	struct sugov_policy *sg_policy;
    491
    492	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
    493
    494	kthread_queue_work(&sg_policy->worker, &sg_policy->work);
    495}
    496
    497/************************** sysfs interface ************************/
    498
    499static struct sugov_tunables *global_tunables;
    500static DEFINE_MUTEX(global_tunables_lock);
    501
    502static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
    503{
    504	return container_of(attr_set, struct sugov_tunables, attr_set);
    505}
    506
    507static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
    508{
    509	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
    510
    511	return sprintf(buf, "%u\n", tunables->rate_limit_us);
    512}
    513
    514static ssize_t
    515rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
    516{
    517	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
    518	struct sugov_policy *sg_policy;
    519	unsigned int rate_limit_us;
    520
    521	if (kstrtouint(buf, 10, &rate_limit_us))
    522		return -EINVAL;
    523
    524	tunables->rate_limit_us = rate_limit_us;
    525
    526	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
    527		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
    528
    529	return count;
    530}
    531
    532static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
    533
    534static struct attribute *sugov_attrs[] = {
    535	&rate_limit_us.attr,
    536	NULL
    537};
    538ATTRIBUTE_GROUPS(sugov);
    539
    540static void sugov_tunables_free(struct kobject *kobj)
    541{
    542	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
    543
    544	kfree(to_sugov_tunables(attr_set));
    545}
    546
    547static struct kobj_type sugov_tunables_ktype = {
    548	.default_groups = sugov_groups,
    549	.sysfs_ops = &governor_sysfs_ops,
    550	.release = &sugov_tunables_free,
    551};
    552
    553/********************** cpufreq governor interface *********************/
    554
    555struct cpufreq_governor schedutil_gov;
    556
    557static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
    558{
    559	struct sugov_policy *sg_policy;
    560
    561	sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
    562	if (!sg_policy)
    563		return NULL;
    564
    565	sg_policy->policy = policy;
    566	raw_spin_lock_init(&sg_policy->update_lock);
    567	return sg_policy;
    568}
    569
    570static void sugov_policy_free(struct sugov_policy *sg_policy)
    571{
    572	kfree(sg_policy);
    573}
    574
    575static int sugov_kthread_create(struct sugov_policy *sg_policy)
    576{
    577	struct task_struct *thread;
    578	struct sched_attr attr = {
    579		.size		= sizeof(struct sched_attr),
    580		.sched_policy	= SCHED_DEADLINE,
    581		.sched_flags	= SCHED_FLAG_SUGOV,
    582		.sched_nice	= 0,
    583		.sched_priority	= 0,
    584		/*
    585		 * Fake (unused) bandwidth; workaround to "fix"
    586		 * priority inheritance.
    587		 */
    588		.sched_runtime	=  1000000,
    589		.sched_deadline = 10000000,
    590		.sched_period	= 10000000,
    591	};
    592	struct cpufreq_policy *policy = sg_policy->policy;
    593	int ret;
    594
    595	/* kthread only required for slow path */
    596	if (policy->fast_switch_enabled)
    597		return 0;
    598
    599	kthread_init_work(&sg_policy->work, sugov_work);
    600	kthread_init_worker(&sg_policy->worker);
    601	thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
    602				"sugov:%d",
    603				cpumask_first(policy->related_cpus));
    604	if (IS_ERR(thread)) {
    605		pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
    606		return PTR_ERR(thread);
    607	}
    608
    609	ret = sched_setattr_nocheck(thread, &attr);
    610	if (ret) {
    611		kthread_stop(thread);
    612		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
    613		return ret;
    614	}
    615
    616	sg_policy->thread = thread;
    617	kthread_bind_mask(thread, policy->related_cpus);
    618	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
    619	mutex_init(&sg_policy->work_lock);
    620
    621	wake_up_process(thread);
    622
    623	return 0;
    624}
    625
    626static void sugov_kthread_stop(struct sugov_policy *sg_policy)
    627{
    628	/* kthread only required for slow path */
    629	if (sg_policy->policy->fast_switch_enabled)
    630		return;
    631
    632	kthread_flush_worker(&sg_policy->worker);
    633	kthread_stop(sg_policy->thread);
    634	mutex_destroy(&sg_policy->work_lock);
    635}
    636
    637static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
    638{
    639	struct sugov_tunables *tunables;
    640
    641	tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
    642	if (tunables) {
    643		gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
    644		if (!have_governor_per_policy())
    645			global_tunables = tunables;
    646	}
    647	return tunables;
    648}
    649
    650static void sugov_clear_global_tunables(void)
    651{
    652	if (!have_governor_per_policy())
    653		global_tunables = NULL;
    654}
    655
    656static int sugov_init(struct cpufreq_policy *policy)
    657{
    658	struct sugov_policy *sg_policy;
    659	struct sugov_tunables *tunables;
    660	int ret = 0;
    661
    662	/* State should be equivalent to EXIT */
    663	if (policy->governor_data)
    664		return -EBUSY;
    665
    666	cpufreq_enable_fast_switch(policy);
    667
    668	sg_policy = sugov_policy_alloc(policy);
    669	if (!sg_policy) {
    670		ret = -ENOMEM;
    671		goto disable_fast_switch;
    672	}
    673
    674	ret = sugov_kthread_create(sg_policy);
    675	if (ret)
    676		goto free_sg_policy;
    677
    678	mutex_lock(&global_tunables_lock);
    679
    680	if (global_tunables) {
    681		if (WARN_ON(have_governor_per_policy())) {
    682			ret = -EINVAL;
    683			goto stop_kthread;
    684		}
    685		policy->governor_data = sg_policy;
    686		sg_policy->tunables = global_tunables;
    687
    688		gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
    689		goto out;
    690	}
    691
    692	tunables = sugov_tunables_alloc(sg_policy);
    693	if (!tunables) {
    694		ret = -ENOMEM;
    695		goto stop_kthread;
    696	}
    697
    698	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
    699
    700	policy->governor_data = sg_policy;
    701	sg_policy->tunables = tunables;
    702
    703	ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
    704				   get_governor_parent_kobj(policy), "%s",
    705				   schedutil_gov.name);
    706	if (ret)
    707		goto fail;
    708
    709out:
    710	mutex_unlock(&global_tunables_lock);
    711	return 0;
    712
    713fail:
    714	kobject_put(&tunables->attr_set.kobj);
    715	policy->governor_data = NULL;
    716	sugov_clear_global_tunables();
    717
    718stop_kthread:
    719	sugov_kthread_stop(sg_policy);
    720	mutex_unlock(&global_tunables_lock);
    721
    722free_sg_policy:
    723	sugov_policy_free(sg_policy);
    724
    725disable_fast_switch:
    726	cpufreq_disable_fast_switch(policy);
    727
    728	pr_err("initialization failed (error %d)\n", ret);
    729	return ret;
    730}
    731
    732static void sugov_exit(struct cpufreq_policy *policy)
    733{
    734	struct sugov_policy *sg_policy = policy->governor_data;
    735	struct sugov_tunables *tunables = sg_policy->tunables;
    736	unsigned int count;
    737
    738	mutex_lock(&global_tunables_lock);
    739
    740	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
    741	policy->governor_data = NULL;
    742	if (!count)
    743		sugov_clear_global_tunables();
    744
    745	mutex_unlock(&global_tunables_lock);
    746
    747	sugov_kthread_stop(sg_policy);
    748	sugov_policy_free(sg_policy);
    749	cpufreq_disable_fast_switch(policy);
    750}
    751
    752static int sugov_start(struct cpufreq_policy *policy)
    753{
    754	struct sugov_policy *sg_policy = policy->governor_data;
    755	void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
    756	unsigned int cpu;
    757
    758	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
    759	sg_policy->last_freq_update_time	= 0;
    760	sg_policy->next_freq			= 0;
    761	sg_policy->work_in_progress		= false;
    762	sg_policy->limits_changed		= false;
    763	sg_policy->cached_raw_freq		= 0;
    764
    765	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
    766
    767	for_each_cpu(cpu, policy->cpus) {
    768		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
    769
    770		memset(sg_cpu, 0, sizeof(*sg_cpu));
    771		sg_cpu->cpu			= cpu;
    772		sg_cpu->sg_policy		= sg_policy;
    773	}
    774
    775	if (policy_is_shared(policy))
    776		uu = sugov_update_shared;
    777	else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
    778		uu = sugov_update_single_perf;
    779	else
    780		uu = sugov_update_single_freq;
    781
    782	for_each_cpu(cpu, policy->cpus) {
    783		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
    784
    785		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
    786	}
    787	return 0;
    788}
    789
    790static void sugov_stop(struct cpufreq_policy *policy)
    791{
    792	struct sugov_policy *sg_policy = policy->governor_data;
    793	unsigned int cpu;
    794
    795	for_each_cpu(cpu, policy->cpus)
    796		cpufreq_remove_update_util_hook(cpu);
    797
    798	synchronize_rcu();
    799
    800	if (!policy->fast_switch_enabled) {
    801		irq_work_sync(&sg_policy->irq_work);
    802		kthread_cancel_work_sync(&sg_policy->work);
    803	}
    804}
    805
    806static void sugov_limits(struct cpufreq_policy *policy)
    807{
    808	struct sugov_policy *sg_policy = policy->governor_data;
    809
    810	if (!policy->fast_switch_enabled) {
    811		mutex_lock(&sg_policy->work_lock);
    812		cpufreq_policy_apply_limits(policy);
    813		mutex_unlock(&sg_policy->work_lock);
    814	}
    815
    816	sg_policy->limits_changed = true;
    817}
    818
    819struct cpufreq_governor schedutil_gov = {
    820	.name			= "schedutil",
    821	.owner			= THIS_MODULE,
    822	.flags			= CPUFREQ_GOV_DYNAMIC_SWITCHING,
    823	.init			= sugov_init,
    824	.exit			= sugov_exit,
    825	.start			= sugov_start,
    826	.stop			= sugov_stop,
    827	.limits			= sugov_limits,
    828};
    829
    830#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
    831struct cpufreq_governor *cpufreq_default_governor(void)
    832{
    833	return &schedutil_gov;
    834}
    835#endif
    836
    837cpufreq_governor_init(schedutil_gov);
    838
    839#ifdef CONFIG_ENERGY_MODEL
    840static void rebuild_sd_workfn(struct work_struct *work)
    841{
    842	rebuild_sched_domains_energy();
    843}
    844static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
    845
    846/*
    847 * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
    848 * on governor changes to make sure the scheduler knows about it.
    849 */
    850void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
    851				  struct cpufreq_governor *old_gov)
    852{
    853	if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
    854		/*
    855		 * When called from the cpufreq_register_driver() path, the
    856		 * cpu_hotplug_lock is already held, so use a work item to
    857		 * avoid nested locking in rebuild_sched_domains().
    858		 */
    859		schedule_work(&rebuild_sd_work);
    860	}
    861
    862}
    863#endif