cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

io-cmd-bdev.c (11892B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * NVMe I/O command implementation.
      4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
      5 */
      6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      7#include <linux/blkdev.h>
      8#include <linux/blk-integrity.h>
      9#include <linux/memremap.h>
     10#include <linux/module.h>
     11#include "nvmet.h"
     12
     13void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
     14{
     15	const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
     16	/* Number of logical blocks per physical block. */
     17	const u32 lpp = ql->physical_block_size / ql->logical_block_size;
     18	/* Logical blocks per physical block, 0's based. */
     19	const __le16 lpp0b = to0based(lpp);
     20
     21	/*
     22	 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
     23	 * NAWUPF, and NACWU are defined for this namespace and should be
     24	 * used by the host for this namespace instead of the AWUN, AWUPF,
     25	 * and ACWU fields in the Identify Controller data structure. If
     26	 * any of these fields are zero that means that the corresponding
     27	 * field from the identify controller data structure should be used.
     28	 */
     29	id->nsfeat |= 1 << 1;
     30	id->nawun = lpp0b;
     31	id->nawupf = lpp0b;
     32	id->nacwu = lpp0b;
     33
     34	/*
     35	 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
     36	 * NOWS are defined for this namespace and should be used by
     37	 * the host for I/O optimization.
     38	 */
     39	id->nsfeat |= 1 << 4;
     40	/* NPWG = Namespace Preferred Write Granularity. 0's based */
     41	id->npwg = lpp0b;
     42	/* NPWA = Namespace Preferred Write Alignment. 0's based */
     43	id->npwa = id->npwg;
     44	/* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
     45	id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
     46	/* NPDG = Namespace Preferred Deallocate Alignment */
     47	id->npda = id->npdg;
     48	/* NOWS = Namespace Optimal Write Size */
     49	id->nows = to0based(ql->io_opt / ql->logical_block_size);
     50}
     51
     52void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
     53{
     54	if (ns->bdev) {
     55		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
     56		ns->bdev = NULL;
     57	}
     58}
     59
     60static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
     61{
     62	struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
     63
     64	if (bi) {
     65		ns->metadata_size = bi->tuple_size;
     66		if (bi->profile == &t10_pi_type1_crc)
     67			ns->pi_type = NVME_NS_DPS_PI_TYPE1;
     68		else if (bi->profile == &t10_pi_type3_crc)
     69			ns->pi_type = NVME_NS_DPS_PI_TYPE3;
     70		else
     71			/* Unsupported metadata type */
     72			ns->metadata_size = 0;
     73	}
     74}
     75
     76int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
     77{
     78	int ret;
     79
     80	/*
     81	 * When buffered_io namespace attribute is enabled that means user want
     82	 * this block device to be used as a file, so block device can take
     83	 * an advantage of cache.
     84	 */
     85	if (ns->buffered_io)
     86		return -ENOTBLK;
     87
     88	ns->bdev = blkdev_get_by_path(ns->device_path,
     89			FMODE_READ | FMODE_WRITE, NULL);
     90	if (IS_ERR(ns->bdev)) {
     91		ret = PTR_ERR(ns->bdev);
     92		if (ret != -ENOTBLK) {
     93			pr_err("failed to open block device %s: (%ld)\n",
     94					ns->device_path, PTR_ERR(ns->bdev));
     95		}
     96		ns->bdev = NULL;
     97		return ret;
     98	}
     99	ns->size = bdev_nr_bytes(ns->bdev);
    100	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
    101
    102	ns->pi_type = 0;
    103	ns->metadata_size = 0;
    104	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
    105		nvmet_bdev_ns_enable_integrity(ns);
    106
    107	if (bdev_is_zoned(ns->bdev)) {
    108		if (!nvmet_bdev_zns_enable(ns)) {
    109			nvmet_bdev_ns_disable(ns);
    110			return -EINVAL;
    111		}
    112		ns->csi = NVME_CSI_ZNS;
    113	}
    114
    115	return 0;
    116}
    117
    118void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
    119{
    120	ns->size = bdev_nr_bytes(ns->bdev);
    121}
    122
    123u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
    124{
    125	u16 status = NVME_SC_SUCCESS;
    126
    127	if (likely(blk_sts == BLK_STS_OK))
    128		return status;
    129	/*
    130	 * Right now there exists M : 1 mapping between block layer error
    131	 * to the NVMe status code (see nvme_error_status()). For consistency,
    132	 * when we reverse map we use most appropriate NVMe Status code from
    133	 * the group of the NVMe staus codes used in the nvme_error_status().
    134	 */
    135	switch (blk_sts) {
    136	case BLK_STS_NOSPC:
    137		status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
    138		req->error_loc = offsetof(struct nvme_rw_command, length);
    139		break;
    140	case BLK_STS_TARGET:
    141		status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
    142		req->error_loc = offsetof(struct nvme_rw_command, slba);
    143		break;
    144	case BLK_STS_NOTSUPP:
    145		req->error_loc = offsetof(struct nvme_common_command, opcode);
    146		switch (req->cmd->common.opcode) {
    147		case nvme_cmd_dsm:
    148		case nvme_cmd_write_zeroes:
    149			status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
    150			break;
    151		default:
    152			status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
    153		}
    154		break;
    155	case BLK_STS_MEDIUM:
    156		status = NVME_SC_ACCESS_DENIED;
    157		req->error_loc = offsetof(struct nvme_rw_command, nsid);
    158		break;
    159	case BLK_STS_IOERR:
    160	default:
    161		status = NVME_SC_INTERNAL | NVME_SC_DNR;
    162		req->error_loc = offsetof(struct nvme_common_command, opcode);
    163	}
    164
    165	switch (req->cmd->common.opcode) {
    166	case nvme_cmd_read:
    167	case nvme_cmd_write:
    168		req->error_slba = le64_to_cpu(req->cmd->rw.slba);
    169		break;
    170	case nvme_cmd_write_zeroes:
    171		req->error_slba =
    172			le64_to_cpu(req->cmd->write_zeroes.slba);
    173		break;
    174	default:
    175		req->error_slba = 0;
    176	}
    177	return status;
    178}
    179
    180static void nvmet_bio_done(struct bio *bio)
    181{
    182	struct nvmet_req *req = bio->bi_private;
    183
    184	nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
    185	nvmet_req_bio_put(req, bio);
    186}
    187
    188#ifdef CONFIG_BLK_DEV_INTEGRITY
    189static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
    190				struct sg_mapping_iter *miter)
    191{
    192	struct blk_integrity *bi;
    193	struct bio_integrity_payload *bip;
    194	int rc;
    195	size_t resid, len;
    196
    197	bi = bdev_get_integrity(req->ns->bdev);
    198	if (unlikely(!bi)) {
    199		pr_err("Unable to locate bio_integrity\n");
    200		return -ENODEV;
    201	}
    202
    203	bip = bio_integrity_alloc(bio, GFP_NOIO,
    204					bio_max_segs(req->metadata_sg_cnt));
    205	if (IS_ERR(bip)) {
    206		pr_err("Unable to allocate bio_integrity_payload\n");
    207		return PTR_ERR(bip);
    208	}
    209
    210	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
    211	/* virtual start sector must be in integrity interval units */
    212	bip_set_seed(bip, bio->bi_iter.bi_sector >>
    213		     (bi->interval_exp - SECTOR_SHIFT));
    214
    215	resid = bip->bip_iter.bi_size;
    216	while (resid > 0 && sg_miter_next(miter)) {
    217		len = min_t(size_t, miter->length, resid);
    218		rc = bio_integrity_add_page(bio, miter->page, len,
    219					    offset_in_page(miter->addr));
    220		if (unlikely(rc != len)) {
    221			pr_err("bio_integrity_add_page() failed; %d\n", rc);
    222			sg_miter_stop(miter);
    223			return -ENOMEM;
    224		}
    225
    226		resid -= len;
    227		if (len < miter->length)
    228			miter->consumed -= miter->length - len;
    229	}
    230	sg_miter_stop(miter);
    231
    232	return 0;
    233}
    234#else
    235static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
    236				struct sg_mapping_iter *miter)
    237{
    238	return -EINVAL;
    239}
    240#endif /* CONFIG_BLK_DEV_INTEGRITY */
    241
    242static void nvmet_bdev_execute_rw(struct nvmet_req *req)
    243{
    244	unsigned int sg_cnt = req->sg_cnt;
    245	struct bio *bio;
    246	struct scatterlist *sg;
    247	struct blk_plug plug;
    248	sector_t sector;
    249	int op, i, rc;
    250	struct sg_mapping_iter prot_miter;
    251	unsigned int iter_flags;
    252	unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
    253
    254	if (!nvmet_check_transfer_len(req, total_len))
    255		return;
    256
    257	if (!req->sg_cnt) {
    258		nvmet_req_complete(req, 0);
    259		return;
    260	}
    261
    262	if (req->cmd->rw.opcode == nvme_cmd_write) {
    263		op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
    264		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
    265			op |= REQ_FUA;
    266		iter_flags = SG_MITER_TO_SG;
    267	} else {
    268		op = REQ_OP_READ;
    269		iter_flags = SG_MITER_FROM_SG;
    270	}
    271
    272	if (is_pci_p2pdma_page(sg_page(req->sg)))
    273		op |= REQ_NOMERGE;
    274
    275	sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
    276
    277	if (nvmet_use_inline_bvec(req)) {
    278		bio = &req->b.inline_bio;
    279		bio_init(bio, req->ns->bdev, req->inline_bvec,
    280			 ARRAY_SIZE(req->inline_bvec), op);
    281	} else {
    282		bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op,
    283				GFP_KERNEL);
    284	}
    285	bio->bi_iter.bi_sector = sector;
    286	bio->bi_private = req;
    287	bio->bi_end_io = nvmet_bio_done;
    288
    289	blk_start_plug(&plug);
    290	if (req->metadata_len)
    291		sg_miter_start(&prot_miter, req->metadata_sg,
    292			       req->metadata_sg_cnt, iter_flags);
    293
    294	for_each_sg(req->sg, sg, req->sg_cnt, i) {
    295		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
    296				!= sg->length) {
    297			struct bio *prev = bio;
    298
    299			if (req->metadata_len) {
    300				rc = nvmet_bdev_alloc_bip(req, bio,
    301							  &prot_miter);
    302				if (unlikely(rc)) {
    303					bio_io_error(bio);
    304					return;
    305				}
    306			}
    307
    308			bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
    309					op, GFP_KERNEL);
    310			bio->bi_iter.bi_sector = sector;
    311
    312			bio_chain(bio, prev);
    313			submit_bio(prev);
    314		}
    315
    316		sector += sg->length >> 9;
    317		sg_cnt--;
    318	}
    319
    320	if (req->metadata_len) {
    321		rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
    322		if (unlikely(rc)) {
    323			bio_io_error(bio);
    324			return;
    325		}
    326	}
    327
    328	submit_bio(bio);
    329	blk_finish_plug(&plug);
    330}
    331
    332static void nvmet_bdev_execute_flush(struct nvmet_req *req)
    333{
    334	struct bio *bio = &req->b.inline_bio;
    335
    336	if (!nvmet_check_transfer_len(req, 0))
    337		return;
    338
    339	bio_init(bio, req->ns->bdev, req->inline_bvec,
    340		 ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
    341	bio->bi_private = req;
    342	bio->bi_end_io = nvmet_bio_done;
    343
    344	submit_bio(bio);
    345}
    346
    347u16 nvmet_bdev_flush(struct nvmet_req *req)
    348{
    349	if (blkdev_issue_flush(req->ns->bdev))
    350		return NVME_SC_INTERNAL | NVME_SC_DNR;
    351	return 0;
    352}
    353
    354static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
    355		struct nvme_dsm_range *range, struct bio **bio)
    356{
    357	struct nvmet_ns *ns = req->ns;
    358	int ret;
    359
    360	ret = __blkdev_issue_discard(ns->bdev,
    361			nvmet_lba_to_sect(ns, range->slba),
    362			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
    363			GFP_KERNEL, bio);
    364	if (ret && ret != -EOPNOTSUPP) {
    365		req->error_slba = le64_to_cpu(range->slba);
    366		return errno_to_nvme_status(req, ret);
    367	}
    368	return NVME_SC_SUCCESS;
    369}
    370
    371static void nvmet_bdev_execute_discard(struct nvmet_req *req)
    372{
    373	struct nvme_dsm_range range;
    374	struct bio *bio = NULL;
    375	int i;
    376	u16 status;
    377
    378	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
    379		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
    380				sizeof(range));
    381		if (status)
    382			break;
    383
    384		status = nvmet_bdev_discard_range(req, &range, &bio);
    385		if (status)
    386			break;
    387	}
    388
    389	if (bio) {
    390		bio->bi_private = req;
    391		bio->bi_end_io = nvmet_bio_done;
    392		if (status)
    393			bio_io_error(bio);
    394		else
    395			submit_bio(bio);
    396	} else {
    397		nvmet_req_complete(req, status);
    398	}
    399}
    400
    401static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
    402{
    403	if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
    404		return;
    405
    406	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
    407	case NVME_DSMGMT_AD:
    408		nvmet_bdev_execute_discard(req);
    409		return;
    410	case NVME_DSMGMT_IDR:
    411	case NVME_DSMGMT_IDW:
    412	default:
    413		/* Not supported yet */
    414		nvmet_req_complete(req, 0);
    415		return;
    416	}
    417}
    418
    419static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
    420{
    421	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
    422	struct bio *bio = NULL;
    423	sector_t sector;
    424	sector_t nr_sector;
    425	int ret;
    426
    427	if (!nvmet_check_transfer_len(req, 0))
    428		return;
    429
    430	sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
    431	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
    432		(req->ns->blksize_shift - 9));
    433
    434	ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
    435			GFP_KERNEL, &bio, 0);
    436	if (bio) {
    437		bio->bi_private = req;
    438		bio->bi_end_io = nvmet_bio_done;
    439		submit_bio(bio);
    440	} else {
    441		nvmet_req_complete(req, errno_to_nvme_status(req, ret));
    442	}
    443}
    444
    445u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
    446{
    447	switch (req->cmd->common.opcode) {
    448	case nvme_cmd_read:
    449	case nvme_cmd_write:
    450		req->execute = nvmet_bdev_execute_rw;
    451		if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
    452			req->metadata_len = nvmet_rw_metadata_len(req);
    453		return 0;
    454	case nvme_cmd_flush:
    455		req->execute = nvmet_bdev_execute_flush;
    456		return 0;
    457	case nvme_cmd_dsm:
    458		req->execute = nvmet_bdev_execute_dsm;
    459		return 0;
    460	case nvme_cmd_write_zeroes:
    461		req->execute = nvmet_bdev_execute_write_zeroes;
    462		return 0;
    463	default:
    464		return nvmet_report_invalid_opcode(req);
    465	}
    466}