bfq-cgroup.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
bfq-cgroup.c (40834B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * cgroups support for the BFQ I/O scheduler.
      4 */
      5#include <linux/module.h>
      6#include <linux/slab.h>
      7#include <linux/blkdev.h>
      8#include <linux/cgroup.h>
      9#include <linux/ktime.h>
     10#include <linux/rbtree.h>
     11#include <linux/ioprio.h>
     12#include <linux/sbitmap.h>
     13#include <linux/delay.h>
     14
     15#include "elevator.h"
     16#include "bfq-iosched.h"
     17
     18#ifdef CONFIG_BFQ_CGROUP_DEBUG
     19static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
     20{
     21	int ret;
     22
     23	ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
     24	if (ret)
     25		return ret;
     26
     27	atomic64_set(&stat->aux_cnt, 0);
     28	return 0;
     29}
     30
     31static void bfq_stat_exit(struct bfq_stat *stat)
     32{
     33	percpu_counter_destroy(&stat->cpu_cnt);
     34}
     35
     36/**
     37 * bfq_stat_add - add a value to a bfq_stat
     38 * @stat: target bfq_stat
     39 * @val: value to add
     40 *
     41 * Add @val to @stat.  The caller must ensure that IRQ on the same CPU
     42 * don't re-enter this function for the same counter.
     43 */
     44static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
     45{
     46	percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
     47}
     48
     49/**
     50 * bfq_stat_read - read the current value of a bfq_stat
     51 * @stat: bfq_stat to read
     52 */
     53static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
     54{
     55	return percpu_counter_sum_positive(&stat->cpu_cnt);
     56}
     57
     58/**
     59 * bfq_stat_reset - reset a bfq_stat
     60 * @stat: bfq_stat to reset
     61 */
     62static inline void bfq_stat_reset(struct bfq_stat *stat)
     63{
     64	percpu_counter_set(&stat->cpu_cnt, 0);
     65	atomic64_set(&stat->aux_cnt, 0);
     66}
     67
     68/**
     69 * bfq_stat_add_aux - add a bfq_stat into another's aux count
     70 * @to: the destination bfq_stat
     71 * @from: the source
     72 *
     73 * Add @from's count including the aux one to @to's aux count.
     74 */
     75static inline void bfq_stat_add_aux(struct bfq_stat *to,
     76				     struct bfq_stat *from)
     77{
     78	atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
     79		     &to->aux_cnt);
     80}
     81
     82/**
     83 * blkg_prfill_stat - prfill callback for bfq_stat
     84 * @sf: seq_file to print to
     85 * @pd: policy private data of interest
     86 * @off: offset to the bfq_stat in @pd
     87 *
     88 * prfill callback for printing a bfq_stat.
     89 */
     90static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
     91		int off)
     92{
     93	return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
     94}
     95
     96/* bfqg stats flags */
     97enum bfqg_stats_flags {
     98	BFQG_stats_waiting = 0,
     99	BFQG_stats_idling,
    100	BFQG_stats_empty,
    101};
    102
    103#define BFQG_FLAG_FNS(name)						\
    104static void bfqg_stats_mark_##name(struct bfqg_stats *stats)	\
    105{									\
    106	stats->flags |= (1 << BFQG_stats_##name);			\
    107}									\
    108static void bfqg_stats_clear_##name(struct bfqg_stats *stats)	\
    109{									\
    110	stats->flags &= ~(1 << BFQG_stats_##name);			\
    111}									\
    112static int bfqg_stats_##name(struct bfqg_stats *stats)		\
    113{									\
    114	return (stats->flags & (1 << BFQG_stats_##name)) != 0;		\
    115}									\
    116
    117BFQG_FLAG_FNS(waiting)
    118BFQG_FLAG_FNS(idling)
    119BFQG_FLAG_FNS(empty)
    120#undef BFQG_FLAG_FNS
    121
    122/* This should be called with the scheduler lock held. */
    123static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
    124{
    125	u64 now;
    126
    127	if (!bfqg_stats_waiting(stats))
    128		return;
    129
    130	now = ktime_get_ns();
    131	if (now > stats->start_group_wait_time)
    132		bfq_stat_add(&stats->group_wait_time,
    133			      now - stats->start_group_wait_time);
    134	bfqg_stats_clear_waiting(stats);
    135}
    136
    137/* This should be called with the scheduler lock held. */
    138static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
    139						 struct bfq_group *curr_bfqg)
    140{
    141	struct bfqg_stats *stats = &bfqg->stats;
    142
    143	if (bfqg_stats_waiting(stats))
    144		return;
    145	if (bfqg == curr_bfqg)
    146		return;
    147	stats->start_group_wait_time = ktime_get_ns();
    148	bfqg_stats_mark_waiting(stats);
    149}
    150
    151/* This should be called with the scheduler lock held. */
    152static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
    153{
    154	u64 now;
    155
    156	if (!bfqg_stats_empty(stats))
    157		return;
    158
    159	now = ktime_get_ns();
    160	if (now > stats->start_empty_time)
    161		bfq_stat_add(&stats->empty_time,
    162			      now - stats->start_empty_time);
    163	bfqg_stats_clear_empty(stats);
    164}
    165
    166void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
    167{
    168	bfq_stat_add(&bfqg->stats.dequeue, 1);
    169}
    170
    171void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
    172{
    173	struct bfqg_stats *stats = &bfqg->stats;
    174
    175	if (blkg_rwstat_total(&stats->queued))
    176		return;
    177
    178	/*
    179	 * group is already marked empty. This can happen if bfqq got new
    180	 * request in parent group and moved to this group while being added
    181	 * to service tree. Just ignore the event and move on.
    182	 */
    183	if (bfqg_stats_empty(stats))
    184		return;
    185
    186	stats->start_empty_time = ktime_get_ns();
    187	bfqg_stats_mark_empty(stats);
    188}
    189
    190void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
    191{
    192	struct bfqg_stats *stats = &bfqg->stats;
    193
    194	if (bfqg_stats_idling(stats)) {
    195		u64 now = ktime_get_ns();
    196
    197		if (now > stats->start_idle_time)
    198			bfq_stat_add(&stats->idle_time,
    199				      now - stats->start_idle_time);
    200		bfqg_stats_clear_idling(stats);
    201	}
    202}
    203
    204void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
    205{
    206	struct bfqg_stats *stats = &bfqg->stats;
    207
    208	stats->start_idle_time = ktime_get_ns();
    209	bfqg_stats_mark_idling(stats);
    210}
    211
    212void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
    213{
    214	struct bfqg_stats *stats = &bfqg->stats;
    215
    216	bfq_stat_add(&stats->avg_queue_size_sum,
    217		      blkg_rwstat_total(&stats->queued));
    218	bfq_stat_add(&stats->avg_queue_size_samples, 1);
    219	bfqg_stats_update_group_wait_time(stats);
    220}
    221
    222void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
    223			      unsigned int op)
    224{
    225	blkg_rwstat_add(&bfqg->stats.queued, op, 1);
    226	bfqg_stats_end_empty_time(&bfqg->stats);
    227	if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
    228		bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
    229}
    230
    231void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
    232{
    233	blkg_rwstat_add(&bfqg->stats.queued, op, -1);
    234}
    235
    236void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
    237{
    238	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
    239}
    240
    241void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
    242				  u64 io_start_time_ns, unsigned int op)
    243{
    244	struct bfqg_stats *stats = &bfqg->stats;
    245	u64 now = ktime_get_ns();
    246
    247	if (now > io_start_time_ns)
    248		blkg_rwstat_add(&stats->service_time, op,
    249				now - io_start_time_ns);
    250	if (io_start_time_ns > start_time_ns)
    251		blkg_rwstat_add(&stats->wait_time, op,
    252				io_start_time_ns - start_time_ns);
    253}
    254
    255#else /* CONFIG_BFQ_CGROUP_DEBUG */
    256
    257void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
    258			      unsigned int op) { }
    259void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
    260void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
    261void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
    262				  u64 io_start_time_ns, unsigned int op) { }
    263void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
    264void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
    265void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
    266void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
    267void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
    268
    269#endif /* CONFIG_BFQ_CGROUP_DEBUG */
    270
    271#ifdef CONFIG_BFQ_GROUP_IOSCHED
    272
    273/*
    274 * blk-cgroup policy-related handlers
    275 * The following functions help in converting between blk-cgroup
    276 * internal structures and BFQ-specific structures.
    277 */
    278
    279static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
    280{
    281	return pd ? container_of(pd, struct bfq_group, pd) : NULL;
    282}
    283
    284struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
    285{
    286	return pd_to_blkg(&bfqg->pd);
    287}
    288
    289static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
    290{
    291	return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
    292}
    293
    294/*
    295 * bfq_group handlers
    296 * The following functions help in navigating the bfq_group hierarchy
    297 * by allowing to find the parent of a bfq_group or the bfq_group
    298 * associated to a bfq_queue.
    299 */
    300
    301static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
    302{
    303	struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
    304
    305	return pblkg ? blkg_to_bfqg(pblkg) : NULL;
    306}
    307
    308struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
    309{
    310	struct bfq_entity *group_entity = bfqq->entity.parent;
    311
    312	return group_entity ? container_of(group_entity, struct bfq_group,
    313					   entity) :
    314			      bfqq->bfqd->root_group;
    315}
    316
    317/*
    318 * The following two functions handle get and put of a bfq_group by
    319 * wrapping the related blk-cgroup hooks.
    320 */
    321
    322static void bfqg_get(struct bfq_group *bfqg)
    323{
    324	bfqg->ref++;
    325}
    326
    327static void bfqg_put(struct bfq_group *bfqg)
    328{
    329	bfqg->ref--;
    330
    331	if (bfqg->ref == 0)
    332		kfree(bfqg);
    333}
    334
    335static void bfqg_and_blkg_get(struct bfq_group *bfqg)
    336{
    337	/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
    338	bfqg_get(bfqg);
    339
    340	blkg_get(bfqg_to_blkg(bfqg));
    341}
    342
    343void bfqg_and_blkg_put(struct bfq_group *bfqg)
    344{
    345	blkg_put(bfqg_to_blkg(bfqg));
    346
    347	bfqg_put(bfqg);
    348}
    349
    350void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
    351{
    352	struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
    353
    354	if (!bfqg)
    355		return;
    356
    357	blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
    358	blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
    359}
    360
    361/* @stats = 0 */
    362static void bfqg_stats_reset(struct bfqg_stats *stats)
    363{
    364#ifdef CONFIG_BFQ_CGROUP_DEBUG
    365	/* queued stats shouldn't be cleared */
    366	blkg_rwstat_reset(&stats->merged);
    367	blkg_rwstat_reset(&stats->service_time);
    368	blkg_rwstat_reset(&stats->wait_time);
    369	bfq_stat_reset(&stats->time);
    370	bfq_stat_reset(&stats->avg_queue_size_sum);
    371	bfq_stat_reset(&stats->avg_queue_size_samples);
    372	bfq_stat_reset(&stats->dequeue);
    373	bfq_stat_reset(&stats->group_wait_time);
    374	bfq_stat_reset(&stats->idle_time);
    375	bfq_stat_reset(&stats->empty_time);
    376#endif
    377}
    378
    379/* @to += @from */
    380static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
    381{
    382	if (!to || !from)
    383		return;
    384
    385#ifdef CONFIG_BFQ_CGROUP_DEBUG
    386	/* queued stats shouldn't be cleared */
    387	blkg_rwstat_add_aux(&to->merged, &from->merged);
    388	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
    389	blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
    390	bfq_stat_add_aux(&from->time, &from->time);
    391	bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
    392	bfq_stat_add_aux(&to->avg_queue_size_samples,
    393			  &from->avg_queue_size_samples);
    394	bfq_stat_add_aux(&to->dequeue, &from->dequeue);
    395	bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
    396	bfq_stat_add_aux(&to->idle_time, &from->idle_time);
    397	bfq_stat_add_aux(&to->empty_time, &from->empty_time);
    398#endif
    399}
    400
    401/*
    402 * Transfer @bfqg's stats to its parent's aux counts so that the ancestors'
    403 * recursive stats can still account for the amount used by this bfqg after
    404 * it's gone.
    405 */
    406static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
    407{
    408	struct bfq_group *parent;
    409
    410	if (!bfqg) /* root_group */
    411		return;
    412
    413	parent = bfqg_parent(bfqg);
    414
    415	lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
    416
    417	if (unlikely(!parent))
    418		return;
    419
    420	bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
    421	bfqg_stats_reset(&bfqg->stats);
    422}
    423
    424void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
    425{
    426	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
    427
    428	entity->weight = entity->new_weight;
    429	entity->orig_weight = entity->new_weight;
    430	if (bfqq) {
    431		bfqq->ioprio = bfqq->new_ioprio;
    432		bfqq->ioprio_class = bfqq->new_ioprio_class;
    433		/*
    434		 * Make sure that bfqg and its associated blkg do not
    435		 * disappear before entity.
    436		 */
    437		bfqg_and_blkg_get(bfqg);
    438	}
    439	entity->parent = bfqg->my_entity; /* NULL for root group */
    440	entity->sched_data = &bfqg->sched_data;
    441}
    442
    443static void bfqg_stats_exit(struct bfqg_stats *stats)
    444{
    445	blkg_rwstat_exit(&stats->bytes);
    446	blkg_rwstat_exit(&stats->ios);
    447#ifdef CONFIG_BFQ_CGROUP_DEBUG
    448	blkg_rwstat_exit(&stats->merged);
    449	blkg_rwstat_exit(&stats->service_time);
    450	blkg_rwstat_exit(&stats->wait_time);
    451	blkg_rwstat_exit(&stats->queued);
    452	bfq_stat_exit(&stats->time);
    453	bfq_stat_exit(&stats->avg_queue_size_sum);
    454	bfq_stat_exit(&stats->avg_queue_size_samples);
    455	bfq_stat_exit(&stats->dequeue);
    456	bfq_stat_exit(&stats->group_wait_time);
    457	bfq_stat_exit(&stats->idle_time);
    458	bfq_stat_exit(&stats->empty_time);
    459#endif
    460}
    461
    462static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
    463{
    464	if (blkg_rwstat_init(&stats->bytes, gfp) ||
    465	    blkg_rwstat_init(&stats->ios, gfp))
    466		goto error;
    467
    468#ifdef CONFIG_BFQ_CGROUP_DEBUG
    469	if (blkg_rwstat_init(&stats->merged, gfp) ||
    470	    blkg_rwstat_init(&stats->service_time, gfp) ||
    471	    blkg_rwstat_init(&stats->wait_time, gfp) ||
    472	    blkg_rwstat_init(&stats->queued, gfp) ||
    473	    bfq_stat_init(&stats->time, gfp) ||
    474	    bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
    475	    bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
    476	    bfq_stat_init(&stats->dequeue, gfp) ||
    477	    bfq_stat_init(&stats->group_wait_time, gfp) ||
    478	    bfq_stat_init(&stats->idle_time, gfp) ||
    479	    bfq_stat_init(&stats->empty_time, gfp))
    480		goto error;
    481#endif
    482
    483	return 0;
    484
    485error:
    486	bfqg_stats_exit(stats);
    487	return -ENOMEM;
    488}
    489
    490static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
    491{
    492	return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
    493}
    494
    495static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
    496{
    497	return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
    498}
    499
    500static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
    501{
    502	struct bfq_group_data *bgd;
    503
    504	bgd = kzalloc(sizeof(*bgd), gfp);
    505	if (!bgd)
    506		return NULL;
    507	return &bgd->pd;
    508}
    509
    510static void bfq_cpd_init(struct blkcg_policy_data *cpd)
    511{
    512	struct bfq_group_data *d = cpd_to_bfqgd(cpd);
    513
    514	d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
    515		CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
    516}
    517
    518static void bfq_cpd_free(struct blkcg_policy_data *cpd)
    519{
    520	kfree(cpd_to_bfqgd(cpd));
    521}
    522
    523static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
    524					     struct blkcg *blkcg)
    525{
    526	struct bfq_group *bfqg;
    527
    528	bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
    529	if (!bfqg)
    530		return NULL;
    531
    532	if (bfqg_stats_init(&bfqg->stats, gfp)) {
    533		kfree(bfqg);
    534		return NULL;
    535	}
    536
    537	/* see comments in bfq_bic_update_cgroup for why refcounting */
    538	bfqg_get(bfqg);
    539	return &bfqg->pd;
    540}
    541
    542static void bfq_pd_init(struct blkg_policy_data *pd)
    543{
    544	struct blkcg_gq *blkg = pd_to_blkg(pd);
    545	struct bfq_group *bfqg = blkg_to_bfqg(blkg);
    546	struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
    547	struct bfq_entity *entity = &bfqg->entity;
    548	struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
    549
    550	entity->orig_weight = entity->weight = entity->new_weight = d->weight;
    551	entity->my_sched_data = &bfqg->sched_data;
    552	entity->last_bfqq_created = NULL;
    553
    554	bfqg->my_entity = entity; /*
    555				   * the root_group's will be set to NULL
    556				   * in bfq_init_queue()
    557				   */
    558	bfqg->bfqd = bfqd;
    559	bfqg->active_entities = 0;
    560	bfqg->online = true;
    561	bfqg->rq_pos_tree = RB_ROOT;
    562}
    563
    564static void bfq_pd_free(struct blkg_policy_data *pd)
    565{
    566	struct bfq_group *bfqg = pd_to_bfqg(pd);
    567
    568	bfqg_stats_exit(&bfqg->stats);
    569	bfqg_put(bfqg);
    570}
    571
    572static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
    573{
    574	struct bfq_group *bfqg = pd_to_bfqg(pd);
    575
    576	bfqg_stats_reset(&bfqg->stats);
    577}
    578
    579static void bfq_group_set_parent(struct bfq_group *bfqg,
    580					struct bfq_group *parent)
    581{
    582	struct bfq_entity *entity;
    583
    584	entity = &bfqg->entity;
    585	entity->parent = parent->my_entity;
    586	entity->sched_data = &parent->sched_data;
    587}
    588
    589static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
    590{
    591	struct bfq_group *parent;
    592	struct bfq_entity *entity;
    593
    594	/*
    595	 * Update chain of bfq_groups as we might be handling a leaf group
    596	 * which, along with some of its relatives, has not been hooked yet
    597	 * to the private hierarchy of BFQ.
    598	 */
    599	entity = &bfqg->entity;
    600	for_each_entity(entity) {
    601		struct bfq_group *curr_bfqg = container_of(entity,
    602						struct bfq_group, entity);
    603		if (curr_bfqg != bfqd->root_group) {
    604			parent = bfqg_parent(curr_bfqg);
    605			if (!parent)
    606				parent = bfqd->root_group;
    607			bfq_group_set_parent(curr_bfqg, parent);
    608		}
    609	}
    610}
    611
    612struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
    613{
    614	struct blkcg_gq *blkg = bio->bi_blkg;
    615	struct bfq_group *bfqg;
    616
    617	while (blkg) {
    618		bfqg = blkg_to_bfqg(blkg);
    619		if (bfqg->online) {
    620			bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
    621			return bfqg;
    622		}
    623		blkg = blkg->parent;
    624	}
    625	bio_associate_blkg_from_css(bio,
    626				&bfqg_to_blkg(bfqd->root_group)->blkcg->css);
    627	return bfqd->root_group;
    628}
    629
    630/**
    631 * bfq_bfqq_move - migrate @bfqq to @bfqg.
    632 * @bfqd: queue descriptor.
    633 * @bfqq: the queue to move.
    634 * @bfqg: the group to move to.
    635 *
    636 * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
    637 * it on the new one.  Avoid putting the entity on the old group idle tree.
    638 *
    639 * Must be called under the scheduler lock, to make sure that the blkg
    640 * owning @bfqg does not disappear (see comments in
    641 * bfq_bic_update_cgroup on guaranteeing the consistency of blkg
    642 * objects).
    643 */
    644void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
    645		   struct bfq_group *bfqg)
    646{
    647	struct bfq_entity *entity = &bfqq->entity;
    648	struct bfq_group *old_parent = bfqq_group(bfqq);
    649
    650	/*
    651	 * No point to move bfqq to the same group, which can happen when
    652	 * root group is offlined
    653	 */
    654	if (old_parent == bfqg)
    655		return;
    656
    657	/*
    658	 * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group
    659	 * until elevator exit.
    660	 */
    661	if (bfqq == &bfqd->oom_bfqq)
    662		return;
    663	/*
    664	 * Get extra reference to prevent bfqq from being freed in
    665	 * next possible expire or deactivate.
    666	 */
    667	bfqq->ref++;
    668
    669	/* If bfqq is empty, then bfq_bfqq_expire also invokes
    670	 * bfq_del_bfqq_busy, thereby removing bfqq and its entity
    671	 * from data structures related to current group. Otherwise we
    672	 * need to remove bfqq explicitly with bfq_deactivate_bfqq, as
    673	 * we do below.
    674	 */
    675	if (bfqq == bfqd->in_service_queue)
    676		bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
    677				false, BFQQE_PREEMPTED);
    678
    679	if (bfq_bfqq_busy(bfqq))
    680		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
    681	else if (entity->on_st_or_in_serv)
    682		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
    683	bfqg_and_blkg_put(old_parent);
    684
    685	if (entity->parent &&
    686	    entity->parent->last_bfqq_created == bfqq)
    687		entity->parent->last_bfqq_created = NULL;
    688	else if (bfqd->last_bfqq_created == bfqq)
    689		bfqd->last_bfqq_created = NULL;
    690
    691	entity->parent = bfqg->my_entity;
    692	entity->sched_data = &bfqg->sched_data;
    693	/* pin down bfqg and its associated blkg  */
    694	bfqg_and_blkg_get(bfqg);
    695
    696	if (bfq_bfqq_busy(bfqq)) {
    697		if (unlikely(!bfqd->nonrot_with_queueing))
    698			bfq_pos_tree_add_move(bfqd, bfqq);
    699		bfq_activate_bfqq(bfqd, bfqq);
    700	}
    701
    702	if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
    703		bfq_schedule_dispatch(bfqd);
    704	/* release extra ref taken above, bfqq may happen to be freed now */
    705	bfq_put_queue(bfqq);
    706}
    707
    708/**
    709 * __bfq_bic_change_cgroup - move @bic to @cgroup.
    710 * @bfqd: the queue descriptor.
    711 * @bic: the bic to move.
    712 * @blkcg: the blk-cgroup to move to.
    713 *
    714 * Move bic to blkcg, assuming that bfqd->lock is held; which makes
    715 * sure that the reference to cgroup is valid across the call (see
    716 * comments in bfq_bic_update_cgroup on this issue)
    717 */
    718static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
    719				     struct bfq_io_cq *bic,
    720				     struct bfq_group *bfqg)
    721{
    722	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
    723	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
    724	struct bfq_entity *entity;
    725
    726	if (async_bfqq) {
    727		entity = &async_bfqq->entity;
    728
    729		if (entity->sched_data != &bfqg->sched_data) {
    730			bic_set_bfqq(bic, NULL, 0);
    731			bfq_release_process_ref(bfqd, async_bfqq);
    732		}
    733	}
    734
    735	if (sync_bfqq) {
    736		if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
    737			/* We are the only user of this bfqq, just move it */
    738			if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
    739				bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
    740		} else {
    741			struct bfq_queue *bfqq;
    742
    743			/*
    744			 * The queue was merged to a different queue. Check
    745			 * that the merge chain still belongs to the same
    746			 * cgroup.
    747			 */
    748			for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
    749				if (bfqq->entity.sched_data !=
    750				    &bfqg->sched_data)
    751					break;
    752			if (bfqq) {
    753				/*
    754				 * Some queue changed cgroup so the merge is
    755				 * not valid anymore. We cannot easily just
    756				 * cancel the merge (by clearing new_bfqq) as
    757				 * there may be other processes using this
    758				 * queue and holding refs to all queues below
    759				 * sync_bfqq->new_bfqq. Similarly if the merge
    760				 * already happened, we need to detach from
    761				 * bfqq now so that we cannot merge bio to a
    762				 * request from the old cgroup.
    763				 */
    764				bfq_put_cooperator(sync_bfqq);
    765				bfq_release_process_ref(bfqd, sync_bfqq);
    766				bic_set_bfqq(bic, NULL, 1);
    767			}
    768		}
    769	}
    770
    771	return bfqg;
    772}
    773
    774void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
    775{
    776	struct bfq_data *bfqd = bic_to_bfqd(bic);
    777	struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
    778	uint64_t serial_nr;
    779
    780	serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
    781
    782	/*
    783	 * Check whether blkcg has changed.  The condition may trigger
    784	 * spuriously on a newly created cic but there's no harm.
    785	 */
    786	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
    787		return;
    788
    789	/*
    790	 * New cgroup for this process. Make sure it is linked to bfq internal
    791	 * cgroup hierarchy.
    792	 */
    793	bfq_link_bfqg(bfqd, bfqg);
    794	__bfq_bic_change_cgroup(bfqd, bic, bfqg);
    795	/*
    796	 * Update blkg_path for bfq_log_* functions. We cache this
    797	 * path, and update it here, for the following
    798	 * reasons. Operations on blkg objects in blk-cgroup are
    799	 * protected with the request_queue lock, and not with the
    800	 * lock that protects the instances of this scheduler
    801	 * (bfqd->lock). This exposes BFQ to the following sort of
    802	 * race.
    803	 *
    804	 * The blkg_lookup performed in bfq_get_queue, protected
    805	 * through rcu, may happen to return the address of a copy of
    806	 * the original blkg. If this is the case, then the
    807	 * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
    808	 * the blkg, is useless: it does not prevent blk-cgroup code
    809	 * from destroying both the original blkg and all objects
    810	 * directly or indirectly referred by the copy of the
    811	 * blkg.
    812	 *
    813	 * On the bright side, destroy operations on a blkg invoke, as
    814	 * a first step, hooks of the scheduler associated with the
    815	 * blkg. And these hooks are executed with bfqd->lock held for
    816	 * BFQ. As a consequence, for any blkg associated with the
    817	 * request queue this instance of the scheduler is attached
    818	 * to, we are guaranteed that such a blkg is not destroyed, and
    819	 * that all the pointers it contains are consistent, while we
    820	 * are holding bfqd->lock. A blkg_lookup performed with
    821	 * bfqd->lock held then returns a fully consistent blkg, which
    822	 * remains consistent until this lock is held.
    823	 *
    824	 * Thanks to the last fact, and to the fact that: (1) bfqg has
    825	 * been obtained through a blkg_lookup in the above
    826	 * assignment, and (2) bfqd->lock is being held, here we can
    827	 * safely use the policy data for the involved blkg (i.e., the
    828	 * field bfqg->pd) to get to the blkg associated with bfqg,
    829	 * and then we can safely use any field of blkg. After we
    830	 * release bfqd->lock, even just getting blkg through this
    831	 * bfqg may cause dangling references to be traversed, as
    832	 * bfqg->pd may not exist any more.
    833	 *
    834	 * In view of the above facts, here we cache, in the bfqg, any
    835	 * blkg data we may need for this bic, and for its associated
    836	 * bfq_queue. As of now, we need to cache only the path of the
    837	 * blkg, which is used in the bfq_log_* functions.
    838	 *
    839	 * Finally, note that bfqg itself needs to be protected from
    840	 * destruction on the blkg_free of the original blkg (which
    841	 * invokes bfq_pd_free). We use an additional private
    842	 * refcounter for bfqg, to let it disappear only after no
    843	 * bfq_queue refers to it any longer.
    844	 */
    845	blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
    846	bic->blkcg_serial_nr = serial_nr;
    847}
    848
    849/**
    850 * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
    851 * @st: the service tree being flushed.
    852 */
    853static void bfq_flush_idle_tree(struct bfq_service_tree *st)
    854{
    855	struct bfq_entity *entity = st->first_idle;
    856
    857	for (; entity ; entity = st->first_idle)
    858		__bfq_deactivate_entity(entity, false);
    859}
    860
    861/**
    862 * bfq_reparent_leaf_entity - move leaf entity to the root_group.
    863 * @bfqd: the device data structure with the root group.
    864 * @entity: the entity to move, if entity is a leaf; or the parent entity
    865 *	    of an active leaf entity to move, if entity is not a leaf.
    866 */
    867static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
    868				     struct bfq_entity *entity,
    869				     int ioprio_class)
    870{
    871	struct bfq_queue *bfqq;
    872	struct bfq_entity *child_entity = entity;
    873
    874	while (child_entity->my_sched_data) { /* leaf not reached yet */
    875		struct bfq_sched_data *child_sd = child_entity->my_sched_data;
    876		struct bfq_service_tree *child_st = child_sd->service_tree +
    877			ioprio_class;
    878		struct rb_root *child_active = &child_st->active;
    879
    880		child_entity = bfq_entity_of(rb_first(child_active));
    881
    882		if (!child_entity)
    883			child_entity = child_sd->in_service_entity;
    884	}
    885
    886	bfqq = bfq_entity_to_bfqq(child_entity);
    887	bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
    888}
    889
    890/**
    891 * bfq_reparent_active_queues - move to the root group all active queues.
    892 * @bfqd: the device data structure with the root group.
    893 * @bfqg: the group to move from.
    894 * @st: the service tree to start the search from.
    895 */
    896static void bfq_reparent_active_queues(struct bfq_data *bfqd,
    897				       struct bfq_group *bfqg,
    898				       struct bfq_service_tree *st,
    899				       int ioprio_class)
    900{
    901	struct rb_root *active = &st->active;
    902	struct bfq_entity *entity;
    903
    904	while ((entity = bfq_entity_of(rb_first(active))))
    905		bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
    906
    907	if (bfqg->sched_data.in_service_entity)
    908		bfq_reparent_leaf_entity(bfqd,
    909					 bfqg->sched_data.in_service_entity,
    910					 ioprio_class);
    911}
    912
    913/**
    914 * bfq_pd_offline - deactivate the entity associated with @pd,
    915 *		    and reparent its children entities.
    916 * @pd: descriptor of the policy going offline.
    917 *
    918 * blkio already grabs the queue_lock for us, so no need to use
    919 * RCU-based magic
    920 */
    921static void bfq_pd_offline(struct blkg_policy_data *pd)
    922{
    923	struct bfq_service_tree *st;
    924	struct bfq_group *bfqg = pd_to_bfqg(pd);
    925	struct bfq_data *bfqd = bfqg->bfqd;
    926	struct bfq_entity *entity = bfqg->my_entity;
    927	unsigned long flags;
    928	int i;
    929
    930	spin_lock_irqsave(&bfqd->lock, flags);
    931
    932	if (!entity) /* root group */
    933		goto put_async_queues;
    934
    935	/*
    936	 * Empty all service_trees belonging to this group before
    937	 * deactivating the group itself.
    938	 */
    939	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
    940		st = bfqg->sched_data.service_tree + i;
    941
    942		/*
    943		 * It may happen that some queues are still active
    944		 * (busy) upon group destruction (if the corresponding
    945		 * processes have been forced to terminate). We move
    946		 * all the leaf entities corresponding to these queues
    947		 * to the root_group.
    948		 * Also, it may happen that the group has an entity
    949		 * in service, which is disconnected from the active
    950		 * tree: it must be moved, too.
    951		 * There is no need to put the sync queues, as the
    952		 * scheduler has taken no reference.
    953		 */
    954		bfq_reparent_active_queues(bfqd, bfqg, st, i);
    955
    956		/*
    957		 * The idle tree may still contain bfq_queues
    958		 * belonging to exited task because they never
    959		 * migrated to a different cgroup from the one being
    960		 * destroyed now. In addition, even
    961		 * bfq_reparent_active_queues() may happen to add some
    962		 * entities to the idle tree. It happens if, in some
    963		 * of the calls to bfq_bfqq_move() performed by
    964		 * bfq_reparent_active_queues(), the queue to move is
    965		 * empty and gets expired.
    966		 */
    967		bfq_flush_idle_tree(st);
    968	}
    969
    970	__bfq_deactivate_entity(entity, false);
    971
    972put_async_queues:
    973	bfq_put_async_queues(bfqd, bfqg);
    974	bfqg->online = false;
    975
    976	spin_unlock_irqrestore(&bfqd->lock, flags);
    977	/*
    978	 * @blkg is going offline and will be ignored by
    979	 * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
    980	 * that they don't get lost.  If IOs complete after this point, the
    981	 * stats for them will be lost.  Oh well...
    982	 */
    983	bfqg_stats_xfer_dead(bfqg);
    984}
    985
    986void bfq_end_wr_async(struct bfq_data *bfqd)
    987{
    988	struct blkcg_gq *blkg;
    989
    990	list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
    991		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
    992
    993		bfq_end_wr_async_queues(bfqd, bfqg);
    994	}
    995	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
    996}
    997
    998static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
    999{
   1000	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
   1001	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
   1002	unsigned int val = 0;
   1003
   1004	if (bfqgd)
   1005		val = bfqgd->weight;
   1006
   1007	seq_printf(sf, "%u\n", val);
   1008
   1009	return 0;
   1010}
   1011
   1012static u64 bfqg_prfill_weight_device(struct seq_file *sf,
   1013				     struct blkg_policy_data *pd, int off)
   1014{
   1015	struct bfq_group *bfqg = pd_to_bfqg(pd);
   1016
   1017	if (!bfqg->entity.dev_weight)
   1018		return 0;
   1019	return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
   1020}
   1021
   1022static int bfq_io_show_weight(struct seq_file *sf, void *v)
   1023{
   1024	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
   1025	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
   1026
   1027	seq_printf(sf, "default %u\n", bfqgd->weight);
   1028	blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
   1029			  &blkcg_policy_bfq, 0, false);
   1030	return 0;
   1031}
   1032
   1033static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
   1034{
   1035	weight = dev_weight ?: weight;
   1036
   1037	bfqg->entity.dev_weight = dev_weight;
   1038	/*
   1039	 * Setting the prio_changed flag of the entity
   1040	 * to 1 with new_weight == weight would re-set
   1041	 * the value of the weight to its ioprio mapping.
   1042	 * Set the flag only if necessary.
   1043	 */
   1044	if ((unsigned short)weight != bfqg->entity.new_weight) {
   1045		bfqg->entity.new_weight = (unsigned short)weight;
   1046		/*
   1047		 * Make sure that the above new value has been
   1048		 * stored in bfqg->entity.new_weight before
   1049		 * setting the prio_changed flag. In fact,
   1050		 * this flag may be read asynchronously (in
   1051		 * critical sections protected by a different
   1052		 * lock than that held here), and finding this
   1053		 * flag set may cause the execution of the code
   1054		 * for updating parameters whose value may
   1055		 * depend also on bfqg->entity.new_weight (in
   1056		 * __bfq_entity_update_weight_prio).
   1057		 * This barrier makes sure that the new value
   1058		 * of bfqg->entity.new_weight is correctly
   1059		 * seen in that code.
   1060		 */
   1061		smp_wmb();
   1062		bfqg->entity.prio_changed = 1;
   1063	}
   1064}
   1065
   1066static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
   1067				    struct cftype *cftype,
   1068				    u64 val)
   1069{
   1070	struct blkcg *blkcg = css_to_blkcg(css);
   1071	struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
   1072	struct blkcg_gq *blkg;
   1073	int ret = -ERANGE;
   1074
   1075	if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
   1076		return ret;
   1077
   1078	ret = 0;
   1079	spin_lock_irq(&blkcg->lock);
   1080	bfqgd->weight = (unsigned short)val;
   1081	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
   1082		struct bfq_group *bfqg = blkg_to_bfqg(blkg);
   1083
   1084		if (bfqg)
   1085			bfq_group_set_weight(bfqg, val, 0);
   1086	}
   1087	spin_unlock_irq(&blkcg->lock);
   1088
   1089	return ret;
   1090}
   1091
   1092static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
   1093					char *buf, size_t nbytes,
   1094					loff_t off)
   1095{
   1096	int ret;
   1097	struct blkg_conf_ctx ctx;
   1098	struct blkcg *blkcg = css_to_blkcg(of_css(of));
   1099	struct bfq_group *bfqg;
   1100	u64 v;
   1101
   1102	ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
   1103	if (ret)
   1104		return ret;
   1105
   1106	if (sscanf(ctx.body, "%llu", &v) == 1) {
   1107		/* require "default" on dfl */
   1108		ret = -ERANGE;
   1109		if (!v)
   1110			goto out;
   1111	} else if (!strcmp(strim(ctx.body), "default")) {
   1112		v = 0;
   1113	} else {
   1114		ret = -EINVAL;
   1115		goto out;
   1116	}
   1117
   1118	bfqg = blkg_to_bfqg(ctx.blkg);
   1119
   1120	ret = -ERANGE;
   1121	if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
   1122		bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
   1123		ret = 0;
   1124	}
   1125out:
   1126	blkg_conf_finish(&ctx);
   1127	return ret ?: nbytes;
   1128}
   1129
   1130static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
   1131				 char *buf, size_t nbytes,
   1132				 loff_t off)
   1133{
   1134	char *endp;
   1135	int ret;
   1136	u64 v;
   1137
   1138	buf = strim(buf);
   1139
   1140	/* "WEIGHT" or "default WEIGHT" sets the default weight */
   1141	v = simple_strtoull(buf, &endp, 0);
   1142	if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
   1143		ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
   1144		return ret ?: nbytes;
   1145	}
   1146
   1147	return bfq_io_set_device_weight(of, buf, nbytes, off);
   1148}
   1149
   1150static int bfqg_print_rwstat(struct seq_file *sf, void *v)
   1151{
   1152	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
   1153			  &blkcg_policy_bfq, seq_cft(sf)->private, true);
   1154	return 0;
   1155}
   1156
   1157static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
   1158					struct blkg_policy_data *pd, int off)
   1159{
   1160	struct blkg_rwstat_sample sum;
   1161
   1162	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
   1163	return __blkg_prfill_rwstat(sf, pd, &sum);
   1164}
   1165
   1166static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
   1167{
   1168	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
   1169			  bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
   1170			  seq_cft(sf)->private, true);
   1171	return 0;
   1172}
   1173
   1174#ifdef CONFIG_BFQ_CGROUP_DEBUG
   1175static int bfqg_print_stat(struct seq_file *sf, void *v)
   1176{
   1177	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
   1178			  &blkcg_policy_bfq, seq_cft(sf)->private, false);
   1179	return 0;
   1180}
   1181
   1182static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
   1183				      struct blkg_policy_data *pd, int off)
   1184{
   1185	struct blkcg_gq *blkg = pd_to_blkg(pd);
   1186	struct blkcg_gq *pos_blkg;
   1187	struct cgroup_subsys_state *pos_css;
   1188	u64 sum = 0;
   1189
   1190	lockdep_assert_held(&blkg->q->queue_lock);
   1191
   1192	rcu_read_lock();
   1193	blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
   1194		struct bfq_stat *stat;
   1195
   1196		if (!pos_blkg->online)
   1197			continue;
   1198
   1199		stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
   1200		sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
   1201	}
   1202	rcu_read_unlock();
   1203
   1204	return __blkg_prfill_u64(sf, pd, sum);
   1205}
   1206
   1207static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
   1208{
   1209	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
   1210			  bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
   1211			  seq_cft(sf)->private, false);
   1212	return 0;
   1213}
   1214
   1215static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
   1216			       int off)
   1217{
   1218	struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
   1219	u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
   1220
   1221	return __blkg_prfill_u64(sf, pd, sum >> 9);
   1222}
   1223
   1224static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
   1225{
   1226	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
   1227			  bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
   1228	return 0;
   1229}
   1230
   1231static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
   1232					 struct blkg_policy_data *pd, int off)
   1233{
   1234	struct blkg_rwstat_sample tmp;
   1235
   1236	blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
   1237			offsetof(struct bfq_group, stats.bytes), &tmp);
   1238
   1239	return __blkg_prfill_u64(sf, pd,
   1240		(tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
   1241}
   1242
   1243static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
   1244{
   1245	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
   1246			  bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
   1247			  false);
   1248	return 0;
   1249}
   1250
   1251static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
   1252				      struct blkg_policy_data *pd, int off)
   1253{
   1254	struct bfq_group *bfqg = pd_to_bfqg(pd);
   1255	u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
   1256	u64 v = 0;
   1257
   1258	if (samples) {
   1259		v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
   1260		v = div64_u64(v, samples);
   1261	}
   1262	__blkg_prfill_u64(sf, pd, v);
   1263	return 0;
   1264}
   1265
   1266/* print avg_queue_size */
   1267static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
   1268{
   1269	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
   1270			  bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
   1271			  0, false);
   1272	return 0;
   1273}
   1274#endif /* CONFIG_BFQ_CGROUP_DEBUG */
   1275
   1276struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
   1277{
   1278	int ret;
   1279
   1280	ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
   1281	if (ret)
   1282		return NULL;
   1283
   1284	return blkg_to_bfqg(bfqd->queue->root_blkg);
   1285}
   1286
   1287struct blkcg_policy blkcg_policy_bfq = {
   1288	.dfl_cftypes		= bfq_blkg_files,
   1289	.legacy_cftypes		= bfq_blkcg_legacy_files,
   1290
   1291	.cpd_alloc_fn		= bfq_cpd_alloc,
   1292	.cpd_init_fn		= bfq_cpd_init,
   1293	.cpd_bind_fn	        = bfq_cpd_init,
   1294	.cpd_free_fn		= bfq_cpd_free,
   1295
   1296	.pd_alloc_fn		= bfq_pd_alloc,
   1297	.pd_init_fn		= bfq_pd_init,
   1298	.pd_offline_fn		= bfq_pd_offline,
   1299	.pd_free_fn		= bfq_pd_free,
   1300	.pd_reset_stats_fn	= bfq_pd_reset_stats,
   1301};
   1302
   1303struct cftype bfq_blkcg_legacy_files[] = {
   1304	{
   1305		.name = "bfq.weight",
   1306		.flags = CFTYPE_NOT_ON_ROOT,
   1307		.seq_show = bfq_io_show_weight_legacy,
   1308		.write_u64 = bfq_io_set_weight_legacy,
   1309	},
   1310	{
   1311		.name = "bfq.weight_device",
   1312		.flags = CFTYPE_NOT_ON_ROOT,
   1313		.seq_show = bfq_io_show_weight,
   1314		.write = bfq_io_set_weight,
   1315	},
   1316
   1317	/* statistics, covers only the tasks in the bfqg */
   1318	{
   1319		.name = "bfq.io_service_bytes",
   1320		.private = offsetof(struct bfq_group, stats.bytes),
   1321		.seq_show = bfqg_print_rwstat,
   1322	},
   1323	{
   1324		.name = "bfq.io_serviced",
   1325		.private = offsetof(struct bfq_group, stats.ios),
   1326		.seq_show = bfqg_print_rwstat,
   1327	},
   1328#ifdef CONFIG_BFQ_CGROUP_DEBUG
   1329	{
   1330		.name = "bfq.time",
   1331		.private = offsetof(struct bfq_group, stats.time),
   1332		.seq_show = bfqg_print_stat,
   1333	},
   1334	{
   1335		.name = "bfq.sectors",
   1336		.seq_show = bfqg_print_stat_sectors,
   1337	},
   1338	{
   1339		.name = "bfq.io_service_time",
   1340		.private = offsetof(struct bfq_group, stats.service_time),
   1341		.seq_show = bfqg_print_rwstat,
   1342	},
   1343	{
   1344		.name = "bfq.io_wait_time",
   1345		.private = offsetof(struct bfq_group, stats.wait_time),
   1346		.seq_show = bfqg_print_rwstat,
   1347	},
   1348	{
   1349		.name = "bfq.io_merged",
   1350		.private = offsetof(struct bfq_group, stats.merged),
   1351		.seq_show = bfqg_print_rwstat,
   1352	},
   1353	{
   1354		.name = "bfq.io_queued",
   1355		.private = offsetof(struct bfq_group, stats.queued),
   1356		.seq_show = bfqg_print_rwstat,
   1357	},
   1358#endif /* CONFIG_BFQ_CGROUP_DEBUG */
   1359
   1360	/* the same statistics which cover the bfqg and its descendants */
   1361	{
   1362		.name = "bfq.io_service_bytes_recursive",
   1363		.private = offsetof(struct bfq_group, stats.bytes),
   1364		.seq_show = bfqg_print_rwstat_recursive,
   1365	},
   1366	{
   1367		.name = "bfq.io_serviced_recursive",
   1368		.private = offsetof(struct bfq_group, stats.ios),
   1369		.seq_show = bfqg_print_rwstat_recursive,
   1370	},
   1371#ifdef CONFIG_BFQ_CGROUP_DEBUG
   1372	{
   1373		.name = "bfq.time_recursive",
   1374		.private = offsetof(struct bfq_group, stats.time),
   1375		.seq_show = bfqg_print_stat_recursive,
   1376	},
   1377	{
   1378		.name = "bfq.sectors_recursive",
   1379		.seq_show = bfqg_print_stat_sectors_recursive,
   1380	},
   1381	{
   1382		.name = "bfq.io_service_time_recursive",
   1383		.private = offsetof(struct bfq_group, stats.service_time),
   1384		.seq_show = bfqg_print_rwstat_recursive,
   1385	},
   1386	{
   1387		.name = "bfq.io_wait_time_recursive",
   1388		.private = offsetof(struct bfq_group, stats.wait_time),
   1389		.seq_show = bfqg_print_rwstat_recursive,
   1390	},
   1391	{
   1392		.name = "bfq.io_merged_recursive",
   1393		.private = offsetof(struct bfq_group, stats.merged),
   1394		.seq_show = bfqg_print_rwstat_recursive,
   1395	},
   1396	{
   1397		.name = "bfq.io_queued_recursive",
   1398		.private = offsetof(struct bfq_group, stats.queued),
   1399		.seq_show = bfqg_print_rwstat_recursive,
   1400	},
   1401	{
   1402		.name = "bfq.avg_queue_size",
   1403		.seq_show = bfqg_print_avg_queue_size,
   1404	},
   1405	{
   1406		.name = "bfq.group_wait_time",
   1407		.private = offsetof(struct bfq_group, stats.group_wait_time),
   1408		.seq_show = bfqg_print_stat,
   1409	},
   1410	{
   1411		.name = "bfq.idle_time",
   1412		.private = offsetof(struct bfq_group, stats.idle_time),
   1413		.seq_show = bfqg_print_stat,
   1414	},
   1415	{
   1416		.name = "bfq.empty_time",
   1417		.private = offsetof(struct bfq_group, stats.empty_time),
   1418		.seq_show = bfqg_print_stat,
   1419	},
   1420	{
   1421		.name = "bfq.dequeue",
   1422		.private = offsetof(struct bfq_group, stats.dequeue),
   1423		.seq_show = bfqg_print_stat,
   1424	},
   1425#endif	/* CONFIG_BFQ_CGROUP_DEBUG */
   1426	{ }	/* terminate */
   1427};
   1428
   1429struct cftype bfq_blkg_files[] = {
   1430	{
   1431		.name = "bfq.weight",
   1432		.flags = CFTYPE_NOT_ON_ROOT,
   1433		.seq_show = bfq_io_show_weight,
   1434		.write = bfq_io_set_weight,
   1435	},
   1436	{} /* terminate */
   1437};
   1438
   1439#else	/* CONFIG_BFQ_GROUP_IOSCHED */
   1440
   1441void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
   1442		   struct bfq_group *bfqg) {}
   1443
   1444void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
   1445{
   1446	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
   1447
   1448	entity->weight = entity->new_weight;
   1449	entity->orig_weight = entity->new_weight;
   1450	if (bfqq) {
   1451		bfqq->ioprio = bfqq->new_ioprio;
   1452		bfqq->ioprio_class = bfqq->new_ioprio_class;
   1453	}
   1454	entity->sched_data = &bfqg->sched_data;
   1455}
   1456
   1457void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
   1458
   1459void bfq_end_wr_async(struct bfq_data *bfqd)
   1460{
   1461	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
   1462}
   1463
   1464struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
   1465{
   1466	return bfqd->root_group;
   1467}
   1468
   1469struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
   1470{
   1471	return bfqq->bfqd->root_group;
   1472}
   1473
   1474void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
   1475
   1476void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
   1477
   1478struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
   1479{
   1480	struct bfq_group *bfqg;
   1481	int i;
   1482
   1483	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
   1484	if (!bfqg)
   1485		return NULL;
   1486
   1487	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
   1488		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
   1489
   1490	return bfqg;
   1491}
   1492#endif	/* CONFIG_BFQ_GROUP_IOSCHED */