cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

blk-merge.c (30448B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Functions related to segment and merge handling
      4 */
      5#include <linux/kernel.h>
      6#include <linux/module.h>
      7#include <linux/bio.h>
      8#include <linux/blkdev.h>
      9#include <linux/blk-integrity.h>
     10#include <linux/scatterlist.h>
     11#include <linux/part_stat.h>
     12#include <linux/blk-cgroup.h>
     13
     14#include <trace/events/block.h>
     15
     16#include "blk.h"
     17#include "blk-mq-sched.h"
     18#include "blk-rq-qos.h"
     19#include "blk-throttle.h"
     20
     21static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
     22{
     23	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
     24}
     25
     26static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
     27{
     28	struct bvec_iter iter = bio->bi_iter;
     29	int idx;
     30
     31	bio_get_first_bvec(bio, bv);
     32	if (bv->bv_len == bio->bi_iter.bi_size)
     33		return;		/* this bio only has a single bvec */
     34
     35	bio_advance_iter(bio, &iter, iter.bi_size);
     36
     37	if (!iter.bi_bvec_done)
     38		idx = iter.bi_idx - 1;
     39	else	/* in the middle of bvec */
     40		idx = iter.bi_idx;
     41
     42	*bv = bio->bi_io_vec[idx];
     43
     44	/*
     45	 * iter.bi_bvec_done records actual length of the last bvec
     46	 * if this bio ends in the middle of one io vector
     47	 */
     48	if (iter.bi_bvec_done)
     49		bv->bv_len = iter.bi_bvec_done;
     50}
     51
     52static inline bool bio_will_gap(struct request_queue *q,
     53		struct request *prev_rq, struct bio *prev, struct bio *next)
     54{
     55	struct bio_vec pb, nb;
     56
     57	if (!bio_has_data(prev) || !queue_virt_boundary(q))
     58		return false;
     59
     60	/*
     61	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
     62	 * is quite difficult to respect the sg gap limit.  We work hard to
     63	 * merge a huge number of small single bios in case of mkfs.
     64	 */
     65	if (prev_rq)
     66		bio_get_first_bvec(prev_rq->bio, &pb);
     67	else
     68		bio_get_first_bvec(prev, &pb);
     69	if (pb.bv_offset & queue_virt_boundary(q))
     70		return true;
     71
     72	/*
     73	 * We don't need to worry about the situation that the merged segment
     74	 * ends in unaligned virt boundary:
     75	 *
     76	 * - if 'pb' ends aligned, the merged segment ends aligned
     77	 * - if 'pb' ends unaligned, the next bio must include
     78	 *   one single bvec of 'nb', otherwise the 'nb' can't
     79	 *   merge with 'pb'
     80	 */
     81	bio_get_last_bvec(prev, &pb);
     82	bio_get_first_bvec(next, &nb);
     83	if (biovec_phys_mergeable(q, &pb, &nb))
     84		return false;
     85	return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
     86}
     87
     88static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
     89{
     90	return bio_will_gap(req->q, req, req->biotail, bio);
     91}
     92
     93static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
     94{
     95	return bio_will_gap(req->q, NULL, bio, req->bio);
     96}
     97
     98static struct bio *blk_bio_discard_split(struct request_queue *q,
     99					 struct bio *bio,
    100					 struct bio_set *bs,
    101					 unsigned *nsegs)
    102{
    103	unsigned int max_discard_sectors, granularity;
    104	int alignment;
    105	sector_t tmp;
    106	unsigned split_sectors;
    107
    108	*nsegs = 1;
    109
    110	/* Zero-sector (unknown) and one-sector granularities are the same.  */
    111	granularity = max(q->limits.discard_granularity >> 9, 1U);
    112
    113	max_discard_sectors = min(q->limits.max_discard_sectors,
    114			bio_allowed_max_sectors(q));
    115	max_discard_sectors -= max_discard_sectors % granularity;
    116
    117	if (unlikely(!max_discard_sectors)) {
    118		/* XXX: warn */
    119		return NULL;
    120	}
    121
    122	if (bio_sectors(bio) <= max_discard_sectors)
    123		return NULL;
    124
    125	split_sectors = max_discard_sectors;
    126
    127	/*
    128	 * If the next starting sector would be misaligned, stop the discard at
    129	 * the previous aligned sector.
    130	 */
    131	alignment = (q->limits.discard_alignment >> 9) % granularity;
    132
    133	tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
    134	tmp = sector_div(tmp, granularity);
    135
    136	if (split_sectors > tmp)
    137		split_sectors -= tmp;
    138
    139	return bio_split(bio, split_sectors, GFP_NOIO, bs);
    140}
    141
    142static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
    143		struct bio *bio, struct bio_set *bs, unsigned *nsegs)
    144{
    145	*nsegs = 0;
    146
    147	if (!q->limits.max_write_zeroes_sectors)
    148		return NULL;
    149
    150	if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
    151		return NULL;
    152
    153	return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
    154}
    155
    156/*
    157 * Return the maximum number of sectors from the start of a bio that may be
    158 * submitted as a single request to a block device. If enough sectors remain,
    159 * align the end to the physical block size. Otherwise align the end to the
    160 * logical block size. This approach minimizes the number of non-aligned
    161 * requests that are submitted to a block device if the start of a bio is not
    162 * aligned to a physical block boundary.
    163 */
    164static inline unsigned get_max_io_size(struct request_queue *q,
    165				       struct bio *bio)
    166{
    167	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
    168	unsigned max_sectors = sectors;
    169	unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
    170	unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
    171	unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1);
    172
    173	max_sectors += start_offset;
    174	max_sectors &= ~(pbs - 1);
    175	if (max_sectors > start_offset)
    176		return max_sectors - start_offset;
    177
    178	return sectors & ~(lbs - 1);
    179}
    180
    181static inline unsigned get_max_segment_size(const struct request_queue *q,
    182					    struct page *start_page,
    183					    unsigned long offset)
    184{
    185	unsigned long mask = queue_segment_boundary(q);
    186
    187	offset = mask & (page_to_phys(start_page) + offset);
    188
    189	/*
    190	 * overflow may be triggered in case of zero page physical address
    191	 * on 32bit arch, use queue's max segment size when that happens.
    192	 */
    193	return min_not_zero(mask - offset + 1,
    194			(unsigned long)queue_max_segment_size(q));
    195}
    196
    197/**
    198 * bvec_split_segs - verify whether or not a bvec should be split in the middle
    199 * @q:        [in] request queue associated with the bio associated with @bv
    200 * @bv:       [in] bvec to examine
    201 * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
    202 *            by the number of segments from @bv that may be appended to that
    203 *            bio without exceeding @max_segs
    204 * @sectors:  [in,out] Number of sectors in the bio being built. Incremented
    205 *            by the number of sectors from @bv that may be appended to that
    206 *            bio without exceeding @max_sectors
    207 * @max_segs: [in] upper bound for *@nsegs
    208 * @max_sectors: [in] upper bound for *@sectors
    209 *
    210 * When splitting a bio, it can happen that a bvec is encountered that is too
    211 * big to fit in a single segment and hence that it has to be split in the
    212 * middle. This function verifies whether or not that should happen. The value
    213 * %true is returned if and only if appending the entire @bv to a bio with
    214 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
    215 * the block driver.
    216 */
    217static bool bvec_split_segs(const struct request_queue *q,
    218			    const struct bio_vec *bv, unsigned *nsegs,
    219			    unsigned *sectors, unsigned max_segs,
    220			    unsigned max_sectors)
    221{
    222	unsigned max_len = (min(max_sectors, UINT_MAX >> 9) - *sectors) << 9;
    223	unsigned len = min(bv->bv_len, max_len);
    224	unsigned total_len = 0;
    225	unsigned seg_size = 0;
    226
    227	while (len && *nsegs < max_segs) {
    228		seg_size = get_max_segment_size(q, bv->bv_page,
    229						bv->bv_offset + total_len);
    230		seg_size = min(seg_size, len);
    231
    232		(*nsegs)++;
    233		total_len += seg_size;
    234		len -= seg_size;
    235
    236		if ((bv->bv_offset + total_len) & queue_virt_boundary(q))
    237			break;
    238	}
    239
    240	*sectors += total_len >> 9;
    241
    242	/* tell the caller to split the bvec if it is too big to fit */
    243	return len > 0 || bv->bv_len > max_len;
    244}
    245
    246/**
    247 * blk_bio_segment_split - split a bio in two bios
    248 * @q:    [in] request queue pointer
    249 * @bio:  [in] bio to be split
    250 * @bs:	  [in] bio set to allocate the clone from
    251 * @segs: [out] number of segments in the bio with the first half of the sectors
    252 *
    253 * Clone @bio, update the bi_iter of the clone to represent the first sectors
    254 * of @bio and update @bio->bi_iter to represent the remaining sectors. The
    255 * following is guaranteed for the cloned bio:
    256 * - That it has at most get_max_io_size(@q, @bio) sectors.
    257 * - That it has at most queue_max_segments(@q) segments.
    258 *
    259 * Except for discard requests the cloned bio will point at the bi_io_vec of
    260 * the original bio. It is the responsibility of the caller to ensure that the
    261 * original bio is not freed before the cloned bio. The caller is also
    262 * responsible for ensuring that @bs is only destroyed after processing of the
    263 * split bio has finished.
    264 */
    265static struct bio *blk_bio_segment_split(struct request_queue *q,
    266					 struct bio *bio,
    267					 struct bio_set *bs,
    268					 unsigned *segs)
    269{
    270	struct bio_vec bv, bvprv, *bvprvp = NULL;
    271	struct bvec_iter iter;
    272	unsigned nsegs = 0, sectors = 0;
    273	const unsigned max_sectors = get_max_io_size(q, bio);
    274	const unsigned max_segs = queue_max_segments(q);
    275
    276	bio_for_each_bvec(bv, bio, iter) {
    277		/*
    278		 * If the queue doesn't support SG gaps and adding this
    279		 * offset would create a gap, disallow it.
    280		 */
    281		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
    282			goto split;
    283
    284		if (nsegs < max_segs &&
    285		    sectors + (bv.bv_len >> 9) <= max_sectors &&
    286		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
    287			nsegs++;
    288			sectors += bv.bv_len >> 9;
    289		} else if (bvec_split_segs(q, &bv, &nsegs, &sectors, max_segs,
    290					 max_sectors)) {
    291			goto split;
    292		}
    293
    294		bvprv = bv;
    295		bvprvp = &bvprv;
    296	}
    297
    298	*segs = nsegs;
    299	return NULL;
    300split:
    301	*segs = nsegs;
    302
    303	/*
    304	 * Bio splitting may cause subtle trouble such as hang when doing sync
    305	 * iopoll in direct IO routine. Given performance gain of iopoll for
    306	 * big IO can be trival, disable iopoll when split needed.
    307	 */
    308	bio_clear_polled(bio);
    309	return bio_split(bio, sectors, GFP_NOIO, bs);
    310}
    311
    312/**
    313 * __blk_queue_split - split a bio and submit the second half
    314 * @q:       [in] request_queue new bio is being queued at
    315 * @bio:     [in, out] bio to be split
    316 * @nr_segs: [out] number of segments in the first bio
    317 *
    318 * Split a bio into two bios, chain the two bios, submit the second half and
    319 * store a pointer to the first half in *@bio. If the second bio is still too
    320 * big it will be split by a recursive call to this function. Since this
    321 * function may allocate a new bio from q->bio_split, it is the responsibility
    322 * of the caller to ensure that q->bio_split is only released after processing
    323 * of the split bio has finished.
    324 */
    325void __blk_queue_split(struct request_queue *q, struct bio **bio,
    326		       unsigned int *nr_segs)
    327{
    328	struct bio *split = NULL;
    329
    330	switch (bio_op(*bio)) {
    331	case REQ_OP_DISCARD:
    332	case REQ_OP_SECURE_ERASE:
    333		split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
    334		break;
    335	case REQ_OP_WRITE_ZEROES:
    336		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
    337				nr_segs);
    338		break;
    339	default:
    340		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
    341		break;
    342	}
    343
    344	if (split) {
    345		/* there isn't chance to merge the splitted bio */
    346		split->bi_opf |= REQ_NOMERGE;
    347
    348		bio_chain(split, *bio);
    349		trace_block_split(split, (*bio)->bi_iter.bi_sector);
    350		submit_bio_noacct(*bio);
    351		*bio = split;
    352	}
    353}
    354
    355/**
    356 * blk_queue_split - split a bio and submit the second half
    357 * @bio: [in, out] bio to be split
    358 *
    359 * Split a bio into two bios, chains the two bios, submit the second half and
    360 * store a pointer to the first half in *@bio. Since this function may allocate
    361 * a new bio from q->bio_split, it is the responsibility of the caller to ensure
    362 * that q->bio_split is only released after processing of the split bio has
    363 * finished.
    364 */
    365void blk_queue_split(struct bio **bio)
    366{
    367	struct request_queue *q = bdev_get_queue((*bio)->bi_bdev);
    368	unsigned int nr_segs;
    369
    370	if (blk_may_split(q, *bio))
    371		__blk_queue_split(q, bio, &nr_segs);
    372}
    373EXPORT_SYMBOL(blk_queue_split);
    374
    375unsigned int blk_recalc_rq_segments(struct request *rq)
    376{
    377	unsigned int nr_phys_segs = 0;
    378	unsigned int nr_sectors = 0;
    379	struct req_iterator iter;
    380	struct bio_vec bv;
    381
    382	if (!rq->bio)
    383		return 0;
    384
    385	switch (bio_op(rq->bio)) {
    386	case REQ_OP_DISCARD:
    387	case REQ_OP_SECURE_ERASE:
    388		if (queue_max_discard_segments(rq->q) > 1) {
    389			struct bio *bio = rq->bio;
    390
    391			for_each_bio(bio)
    392				nr_phys_segs++;
    393			return nr_phys_segs;
    394		}
    395		return 1;
    396	case REQ_OP_WRITE_ZEROES:
    397		return 0;
    398	}
    399
    400	rq_for_each_bvec(bv, rq, iter)
    401		bvec_split_segs(rq->q, &bv, &nr_phys_segs, &nr_sectors,
    402				UINT_MAX, UINT_MAX);
    403	return nr_phys_segs;
    404}
    405
    406static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
    407		struct scatterlist *sglist)
    408{
    409	if (!*sg)
    410		return sglist;
    411
    412	/*
    413	 * If the driver previously mapped a shorter list, we could see a
    414	 * termination bit prematurely unless it fully inits the sg table
    415	 * on each mapping. We KNOW that there must be more entries here
    416	 * or the driver would be buggy, so force clear the termination bit
    417	 * to avoid doing a full sg_init_table() in drivers for each command.
    418	 */
    419	sg_unmark_end(*sg);
    420	return sg_next(*sg);
    421}
    422
    423static unsigned blk_bvec_map_sg(struct request_queue *q,
    424		struct bio_vec *bvec, struct scatterlist *sglist,
    425		struct scatterlist **sg)
    426{
    427	unsigned nbytes = bvec->bv_len;
    428	unsigned nsegs = 0, total = 0;
    429
    430	while (nbytes > 0) {
    431		unsigned offset = bvec->bv_offset + total;
    432		unsigned len = min(get_max_segment_size(q, bvec->bv_page,
    433					offset), nbytes);
    434		struct page *page = bvec->bv_page;
    435
    436		/*
    437		 * Unfortunately a fair number of drivers barf on scatterlists
    438		 * that have an offset larger than PAGE_SIZE, despite other
    439		 * subsystems dealing with that invariant just fine.  For now
    440		 * stick to the legacy format where we never present those from
    441		 * the block layer, but the code below should be removed once
    442		 * these offenders (mostly MMC/SD drivers) are fixed.
    443		 */
    444		page += (offset >> PAGE_SHIFT);
    445		offset &= ~PAGE_MASK;
    446
    447		*sg = blk_next_sg(sg, sglist);
    448		sg_set_page(*sg, page, len, offset);
    449
    450		total += len;
    451		nbytes -= len;
    452		nsegs++;
    453	}
    454
    455	return nsegs;
    456}
    457
    458static inline int __blk_bvec_map_sg(struct bio_vec bv,
    459		struct scatterlist *sglist, struct scatterlist **sg)
    460{
    461	*sg = blk_next_sg(sg, sglist);
    462	sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
    463	return 1;
    464}
    465
    466/* only try to merge bvecs into one sg if they are from two bios */
    467static inline bool
    468__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
    469			   struct bio_vec *bvprv, struct scatterlist **sg)
    470{
    471
    472	int nbytes = bvec->bv_len;
    473
    474	if (!*sg)
    475		return false;
    476
    477	if ((*sg)->length + nbytes > queue_max_segment_size(q))
    478		return false;
    479
    480	if (!biovec_phys_mergeable(q, bvprv, bvec))
    481		return false;
    482
    483	(*sg)->length += nbytes;
    484
    485	return true;
    486}
    487
    488static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
    489			     struct scatterlist *sglist,
    490			     struct scatterlist **sg)
    491{
    492	struct bio_vec bvec, bvprv = { NULL };
    493	struct bvec_iter iter;
    494	int nsegs = 0;
    495	bool new_bio = false;
    496
    497	for_each_bio(bio) {
    498		bio_for_each_bvec(bvec, bio, iter) {
    499			/*
    500			 * Only try to merge bvecs from two bios given we
    501			 * have done bio internal merge when adding pages
    502			 * to bio
    503			 */
    504			if (new_bio &&
    505			    __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
    506				goto next_bvec;
    507
    508			if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
    509				nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
    510			else
    511				nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
    512 next_bvec:
    513			new_bio = false;
    514		}
    515		if (likely(bio->bi_iter.bi_size)) {
    516			bvprv = bvec;
    517			new_bio = true;
    518		}
    519	}
    520
    521	return nsegs;
    522}
    523
    524/*
    525 * map a request to scatterlist, return number of sg entries setup. Caller
    526 * must make sure sg can hold rq->nr_phys_segments entries
    527 */
    528int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
    529		struct scatterlist *sglist, struct scatterlist **last_sg)
    530{
    531	int nsegs = 0;
    532
    533	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
    534		nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
    535	else if (rq->bio)
    536		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
    537
    538	if (*last_sg)
    539		sg_mark_end(*last_sg);
    540
    541	/*
    542	 * Something must have been wrong if the figured number of
    543	 * segment is bigger than number of req's physical segments
    544	 */
    545	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
    546
    547	return nsegs;
    548}
    549EXPORT_SYMBOL(__blk_rq_map_sg);
    550
    551static inline unsigned int blk_rq_get_max_segments(struct request *rq)
    552{
    553	if (req_op(rq) == REQ_OP_DISCARD)
    554		return queue_max_discard_segments(rq->q);
    555	return queue_max_segments(rq->q);
    556}
    557
    558static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
    559						  sector_t offset)
    560{
    561	struct request_queue *q = rq->q;
    562
    563	if (blk_rq_is_passthrough(rq))
    564		return q->limits.max_hw_sectors;
    565
    566	if (!q->limits.chunk_sectors ||
    567	    req_op(rq) == REQ_OP_DISCARD ||
    568	    req_op(rq) == REQ_OP_SECURE_ERASE)
    569		return blk_queue_get_max_sectors(q, req_op(rq));
    570
    571	return min(blk_max_size_offset(q, offset, 0),
    572			blk_queue_get_max_sectors(q, req_op(rq)));
    573}
    574
    575static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
    576		unsigned int nr_phys_segs)
    577{
    578	if (!blk_cgroup_mergeable(req, bio))
    579		goto no_merge;
    580
    581	if (blk_integrity_merge_bio(req->q, req, bio) == false)
    582		goto no_merge;
    583
    584	/* discard request merge won't add new segment */
    585	if (req_op(req) == REQ_OP_DISCARD)
    586		return 1;
    587
    588	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
    589		goto no_merge;
    590
    591	/*
    592	 * This will form the start of a new hw segment.  Bump both
    593	 * counters.
    594	 */
    595	req->nr_phys_segments += nr_phys_segs;
    596	return 1;
    597
    598no_merge:
    599	req_set_nomerge(req->q, req);
    600	return 0;
    601}
    602
    603int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
    604{
    605	if (req_gap_back_merge(req, bio))
    606		return 0;
    607	if (blk_integrity_rq(req) &&
    608	    integrity_req_gap_back_merge(req, bio))
    609		return 0;
    610	if (!bio_crypt_ctx_back_mergeable(req, bio))
    611		return 0;
    612	if (blk_rq_sectors(req) + bio_sectors(bio) >
    613	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
    614		req_set_nomerge(req->q, req);
    615		return 0;
    616	}
    617
    618	return ll_new_hw_segment(req, bio, nr_segs);
    619}
    620
    621static int ll_front_merge_fn(struct request *req, struct bio *bio,
    622		unsigned int nr_segs)
    623{
    624	if (req_gap_front_merge(req, bio))
    625		return 0;
    626	if (blk_integrity_rq(req) &&
    627	    integrity_req_gap_front_merge(req, bio))
    628		return 0;
    629	if (!bio_crypt_ctx_front_mergeable(req, bio))
    630		return 0;
    631	if (blk_rq_sectors(req) + bio_sectors(bio) >
    632	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
    633		req_set_nomerge(req->q, req);
    634		return 0;
    635	}
    636
    637	return ll_new_hw_segment(req, bio, nr_segs);
    638}
    639
    640static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
    641		struct request *next)
    642{
    643	unsigned short segments = blk_rq_nr_discard_segments(req);
    644
    645	if (segments >= queue_max_discard_segments(q))
    646		goto no_merge;
    647	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
    648	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
    649		goto no_merge;
    650
    651	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
    652	return true;
    653no_merge:
    654	req_set_nomerge(q, req);
    655	return false;
    656}
    657
    658static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
    659				struct request *next)
    660{
    661	int total_phys_segments;
    662
    663	if (req_gap_back_merge(req, next->bio))
    664		return 0;
    665
    666	/*
    667	 * Will it become too large?
    668	 */
    669	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
    670	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
    671		return 0;
    672
    673	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
    674	if (total_phys_segments > blk_rq_get_max_segments(req))
    675		return 0;
    676
    677	if (!blk_cgroup_mergeable(req, next->bio))
    678		return 0;
    679
    680	if (blk_integrity_merge_rq(q, req, next) == false)
    681		return 0;
    682
    683	if (!bio_crypt_ctx_merge_rq(req, next))
    684		return 0;
    685
    686	/* Merge is OK... */
    687	req->nr_phys_segments = total_phys_segments;
    688	return 1;
    689}
    690
    691/**
    692 * blk_rq_set_mixed_merge - mark a request as mixed merge
    693 * @rq: request to mark as mixed merge
    694 *
    695 * Description:
    696 *     @rq is about to be mixed merged.  Make sure the attributes
    697 *     which can be mixed are set in each bio and mark @rq as mixed
    698 *     merged.
    699 */
    700void blk_rq_set_mixed_merge(struct request *rq)
    701{
    702	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
    703	struct bio *bio;
    704
    705	if (rq->rq_flags & RQF_MIXED_MERGE)
    706		return;
    707
    708	/*
    709	 * @rq will no longer represent mixable attributes for all the
    710	 * contained bios.  It will just track those of the first one.
    711	 * Distributes the attributs to each bio.
    712	 */
    713	for (bio = rq->bio; bio; bio = bio->bi_next) {
    714		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
    715			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
    716		bio->bi_opf |= ff;
    717	}
    718	rq->rq_flags |= RQF_MIXED_MERGE;
    719}
    720
    721static void blk_account_io_merge_request(struct request *req)
    722{
    723	if (blk_do_io_stat(req)) {
    724		part_stat_lock();
    725		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
    726		part_stat_unlock();
    727	}
    728}
    729
    730static enum elv_merge blk_try_req_merge(struct request *req,
    731					struct request *next)
    732{
    733	if (blk_discard_mergable(req))
    734		return ELEVATOR_DISCARD_MERGE;
    735	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
    736		return ELEVATOR_BACK_MERGE;
    737
    738	return ELEVATOR_NO_MERGE;
    739}
    740
    741/*
    742 * For non-mq, this has to be called with the request spinlock acquired.
    743 * For mq with scheduling, the appropriate queue wide lock should be held.
    744 */
    745static struct request *attempt_merge(struct request_queue *q,
    746				     struct request *req, struct request *next)
    747{
    748	if (!rq_mergeable(req) || !rq_mergeable(next))
    749		return NULL;
    750
    751	if (req_op(req) != req_op(next))
    752		return NULL;
    753
    754	if (rq_data_dir(req) != rq_data_dir(next))
    755		return NULL;
    756
    757	if (req->ioprio != next->ioprio)
    758		return NULL;
    759
    760	/*
    761	 * If we are allowed to merge, then append bio list
    762	 * from next to rq and release next. merge_requests_fn
    763	 * will have updated segment counts, update sector
    764	 * counts here. Handle DISCARDs separately, as they
    765	 * have separate settings.
    766	 */
    767
    768	switch (blk_try_req_merge(req, next)) {
    769	case ELEVATOR_DISCARD_MERGE:
    770		if (!req_attempt_discard_merge(q, req, next))
    771			return NULL;
    772		break;
    773	case ELEVATOR_BACK_MERGE:
    774		if (!ll_merge_requests_fn(q, req, next))
    775			return NULL;
    776		break;
    777	default:
    778		return NULL;
    779	}
    780
    781	/*
    782	 * If failfast settings disagree or any of the two is already
    783	 * a mixed merge, mark both as mixed before proceeding.  This
    784	 * makes sure that all involved bios have mixable attributes
    785	 * set properly.
    786	 */
    787	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
    788	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
    789	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
    790		blk_rq_set_mixed_merge(req);
    791		blk_rq_set_mixed_merge(next);
    792	}
    793
    794	/*
    795	 * At this point we have either done a back merge or front merge. We
    796	 * need the smaller start_time_ns of the merged requests to be the
    797	 * current request for accounting purposes.
    798	 */
    799	if (next->start_time_ns < req->start_time_ns)
    800		req->start_time_ns = next->start_time_ns;
    801
    802	req->biotail->bi_next = next->bio;
    803	req->biotail = next->biotail;
    804
    805	req->__data_len += blk_rq_bytes(next);
    806
    807	if (!blk_discard_mergable(req))
    808		elv_merge_requests(q, req, next);
    809
    810	/*
    811	 * 'next' is going away, so update stats accordingly
    812	 */
    813	blk_account_io_merge_request(next);
    814
    815	trace_block_rq_merge(next);
    816
    817	/*
    818	 * ownership of bio passed from next to req, return 'next' for
    819	 * the caller to free
    820	 */
    821	next->bio = NULL;
    822	return next;
    823}
    824
    825static struct request *attempt_back_merge(struct request_queue *q,
    826		struct request *rq)
    827{
    828	struct request *next = elv_latter_request(q, rq);
    829
    830	if (next)
    831		return attempt_merge(q, rq, next);
    832
    833	return NULL;
    834}
    835
    836static struct request *attempt_front_merge(struct request_queue *q,
    837		struct request *rq)
    838{
    839	struct request *prev = elv_former_request(q, rq);
    840
    841	if (prev)
    842		return attempt_merge(q, prev, rq);
    843
    844	return NULL;
    845}
    846
    847/*
    848 * Try to merge 'next' into 'rq'. Return true if the merge happened, false
    849 * otherwise. The caller is responsible for freeing 'next' if the merge
    850 * happened.
    851 */
    852bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
    853			   struct request *next)
    854{
    855	return attempt_merge(q, rq, next);
    856}
    857
    858bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
    859{
    860	if (!rq_mergeable(rq) || !bio_mergeable(bio))
    861		return false;
    862
    863	if (req_op(rq) != bio_op(bio))
    864		return false;
    865
    866	/* different data direction or already started, don't merge */
    867	if (bio_data_dir(bio) != rq_data_dir(rq))
    868		return false;
    869
    870	/* don't merge across cgroup boundaries */
    871	if (!blk_cgroup_mergeable(rq, bio))
    872		return false;
    873
    874	/* only merge integrity protected bio into ditto rq */
    875	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
    876		return false;
    877
    878	/* Only merge if the crypt contexts are compatible */
    879	if (!bio_crypt_rq_ctx_compatible(rq, bio))
    880		return false;
    881
    882	if (rq->ioprio != bio_prio(bio))
    883		return false;
    884
    885	return true;
    886}
    887
    888enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
    889{
    890	if (blk_discard_mergable(rq))
    891		return ELEVATOR_DISCARD_MERGE;
    892	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
    893		return ELEVATOR_BACK_MERGE;
    894	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
    895		return ELEVATOR_FRONT_MERGE;
    896	return ELEVATOR_NO_MERGE;
    897}
    898
    899static void blk_account_io_merge_bio(struct request *req)
    900{
    901	if (!blk_do_io_stat(req))
    902		return;
    903
    904	part_stat_lock();
    905	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
    906	part_stat_unlock();
    907}
    908
    909enum bio_merge_status {
    910	BIO_MERGE_OK,
    911	BIO_MERGE_NONE,
    912	BIO_MERGE_FAILED,
    913};
    914
    915static enum bio_merge_status bio_attempt_back_merge(struct request *req,
    916		struct bio *bio, unsigned int nr_segs)
    917{
    918	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
    919
    920	if (!ll_back_merge_fn(req, bio, nr_segs))
    921		return BIO_MERGE_FAILED;
    922
    923	trace_block_bio_backmerge(bio);
    924	rq_qos_merge(req->q, req, bio);
    925
    926	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
    927		blk_rq_set_mixed_merge(req);
    928
    929	req->biotail->bi_next = bio;
    930	req->biotail = bio;
    931	req->__data_len += bio->bi_iter.bi_size;
    932
    933	bio_crypt_free_ctx(bio);
    934
    935	blk_account_io_merge_bio(req);
    936	return BIO_MERGE_OK;
    937}
    938
    939static enum bio_merge_status bio_attempt_front_merge(struct request *req,
    940		struct bio *bio, unsigned int nr_segs)
    941{
    942	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
    943
    944	if (!ll_front_merge_fn(req, bio, nr_segs))
    945		return BIO_MERGE_FAILED;
    946
    947	trace_block_bio_frontmerge(bio);
    948	rq_qos_merge(req->q, req, bio);
    949
    950	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
    951		blk_rq_set_mixed_merge(req);
    952
    953	bio->bi_next = req->bio;
    954	req->bio = bio;
    955
    956	req->__sector = bio->bi_iter.bi_sector;
    957	req->__data_len += bio->bi_iter.bi_size;
    958
    959	bio_crypt_do_front_merge(req, bio);
    960
    961	blk_account_io_merge_bio(req);
    962	return BIO_MERGE_OK;
    963}
    964
    965static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
    966		struct request *req, struct bio *bio)
    967{
    968	unsigned short segments = blk_rq_nr_discard_segments(req);
    969
    970	if (segments >= queue_max_discard_segments(q))
    971		goto no_merge;
    972	if (blk_rq_sectors(req) + bio_sectors(bio) >
    973	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
    974		goto no_merge;
    975
    976	rq_qos_merge(q, req, bio);
    977
    978	req->biotail->bi_next = bio;
    979	req->biotail = bio;
    980	req->__data_len += bio->bi_iter.bi_size;
    981	req->nr_phys_segments = segments + 1;
    982
    983	blk_account_io_merge_bio(req);
    984	return BIO_MERGE_OK;
    985no_merge:
    986	req_set_nomerge(q, req);
    987	return BIO_MERGE_FAILED;
    988}
    989
    990static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
    991						   struct request *rq,
    992						   struct bio *bio,
    993						   unsigned int nr_segs,
    994						   bool sched_allow_merge)
    995{
    996	if (!blk_rq_merge_ok(rq, bio))
    997		return BIO_MERGE_NONE;
    998
    999	switch (blk_try_merge(rq, bio)) {
   1000	case ELEVATOR_BACK_MERGE:
   1001		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
   1002			return bio_attempt_back_merge(rq, bio, nr_segs);
   1003		break;
   1004	case ELEVATOR_FRONT_MERGE:
   1005		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
   1006			return bio_attempt_front_merge(rq, bio, nr_segs);
   1007		break;
   1008	case ELEVATOR_DISCARD_MERGE:
   1009		return bio_attempt_discard_merge(q, rq, bio);
   1010	default:
   1011		return BIO_MERGE_NONE;
   1012	}
   1013
   1014	return BIO_MERGE_FAILED;
   1015}
   1016
   1017/**
   1018 * blk_attempt_plug_merge - try to merge with %current's plugged list
   1019 * @q: request_queue new bio is being queued at
   1020 * @bio: new bio being queued
   1021 * @nr_segs: number of segments in @bio
   1022 * from the passed in @q already in the plug list
   1023 *
   1024 * Determine whether @bio being queued on @q can be merged with the previous
   1025 * request on %current's plugged list.  Returns %true if merge was successful,
   1026 * otherwise %false.
   1027 *
   1028 * Plugging coalesces IOs from the same issuer for the same purpose without
   1029 * going through @q->queue_lock.  As such it's more of an issuing mechanism
   1030 * than scheduling, and the request, while may have elvpriv data, is not
   1031 * added on the elevator at this point.  In addition, we don't have
   1032 * reliable access to the elevator outside queue lock.  Only check basic
   1033 * merging parameters without querying the elevator.
   1034 *
   1035 * Caller must ensure !blk_queue_nomerges(q) beforehand.
   1036 */
   1037bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
   1038		unsigned int nr_segs)
   1039{
   1040	struct blk_plug *plug;
   1041	struct request *rq;
   1042
   1043	plug = blk_mq_plug(q, bio);
   1044	if (!plug || rq_list_empty(plug->mq_list))
   1045		return false;
   1046
   1047	rq_list_for_each(&plug->mq_list, rq) {
   1048		if (rq->q == q) {
   1049			if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
   1050			    BIO_MERGE_OK)
   1051				return true;
   1052			break;
   1053		}
   1054
   1055		/*
   1056		 * Only keep iterating plug list for merges if we have multiple
   1057		 * queues
   1058		 */
   1059		if (!plug->multiple_queues)
   1060			break;
   1061	}
   1062	return false;
   1063}
   1064
   1065/*
   1066 * Iterate list of requests and see if we can merge this bio with any
   1067 * of them.
   1068 */
   1069bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
   1070			struct bio *bio, unsigned int nr_segs)
   1071{
   1072	struct request *rq;
   1073	int checked = 8;
   1074
   1075	list_for_each_entry_reverse(rq, list, queuelist) {
   1076		if (!checked--)
   1077			break;
   1078
   1079		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
   1080		case BIO_MERGE_NONE:
   1081			continue;
   1082		case BIO_MERGE_OK:
   1083			return true;
   1084		case BIO_MERGE_FAILED:
   1085			return false;
   1086		}
   1087
   1088	}
   1089
   1090	return false;
   1091}
   1092EXPORT_SYMBOL_GPL(blk_bio_list_merge);
   1093
   1094bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
   1095		unsigned int nr_segs, struct request **merged_request)
   1096{
   1097	struct request *rq;
   1098
   1099	switch (elv_merge(q, &rq, bio)) {
   1100	case ELEVATOR_BACK_MERGE:
   1101		if (!blk_mq_sched_allow_merge(q, rq, bio))
   1102			return false;
   1103		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
   1104			return false;
   1105		*merged_request = attempt_back_merge(q, rq);
   1106		if (!*merged_request)
   1107			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
   1108		return true;
   1109	case ELEVATOR_FRONT_MERGE:
   1110		if (!blk_mq_sched_allow_merge(q, rq, bio))
   1111			return false;
   1112		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
   1113			return false;
   1114		*merged_request = attempt_front_merge(q, rq);
   1115		if (!*merged_request)
   1116			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
   1117		return true;
   1118	case ELEVATOR_DISCARD_MERGE:
   1119		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
   1120	default:
   1121		return false;
   1122	}
   1123}
   1124EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);