cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

apple.c (42803B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Apple ANS NVM Express device driver
      4 * Copyright The Asahi Linux Contributors
      5 *
      6 * Based on the pci.c NVM Express device driver
      7 * Copyright (c) 2011-2014, Intel Corporation.
      8 * and on the rdma.c NVMe over Fabrics RDMA host code.
      9 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
     10 */
     11
     12#include <linux/async.h>
     13#include <linux/blkdev.h>
     14#include <linux/blk-mq.h>
     15#include <linux/device.h>
     16#include <linux/dma-mapping.h>
     17#include <linux/dmapool.h>
     18#include <linux/interrupt.h>
     19#include <linux/io-64-nonatomic-lo-hi.h>
     20#include <linux/io.h>
     21#include <linux/iopoll.h>
     22#include <linux/jiffies.h>
     23#include <linux/mempool.h>
     24#include <linux/module.h>
     25#include <linux/of.h>
     26#include <linux/of_platform.h>
     27#include <linux/once.h>
     28#include <linux/platform_device.h>
     29#include <linux/pm_domain.h>
     30#include <linux/soc/apple/rtkit.h>
     31#include <linux/soc/apple/sart.h>
     32#include <linux/reset.h>
     33#include <linux/time64.h>
     34
     35#include "nvme.h"
     36
     37#define APPLE_ANS_BOOT_TIMEOUT	  USEC_PER_SEC
     38#define APPLE_ANS_MAX_QUEUE_DEPTH 64
     39
     40#define APPLE_ANS_COPROC_CPU_CONTROL	 0x44
     41#define APPLE_ANS_COPROC_CPU_CONTROL_RUN BIT(4)
     42
     43#define APPLE_ANS_ACQ_DB  0x1004
     44#define APPLE_ANS_IOCQ_DB 0x100c
     45
     46#define APPLE_ANS_MAX_PEND_CMDS_CTRL 0x1210
     47
     48#define APPLE_ANS_BOOT_STATUS	 0x1300
     49#define APPLE_ANS_BOOT_STATUS_OK 0xde71ce55
     50
     51#define APPLE_ANS_UNKNOWN_CTRL	 0x24008
     52#define APPLE_ANS_PRP_NULL_CHECK BIT(11)
     53
     54#define APPLE_ANS_LINEAR_SQ_CTRL 0x24908
     55#define APPLE_ANS_LINEAR_SQ_EN	 BIT(0)
     56
     57#define APPLE_ANS_LINEAR_ASQ_DB	 0x2490c
     58#define APPLE_ANS_LINEAR_IOSQ_DB 0x24910
     59
     60#define APPLE_NVMMU_NUM_TCBS	  0x28100
     61#define APPLE_NVMMU_ASQ_TCB_BASE  0x28108
     62#define APPLE_NVMMU_IOSQ_TCB_BASE 0x28110
     63#define APPLE_NVMMU_TCB_INVAL	  0x28118
     64#define APPLE_NVMMU_TCB_STAT	  0x28120
     65
     66/*
     67 * This controller is a bit weird in the way command tags works: Both the
     68 * admin and the IO queue share the same tag space. Additionally, tags
     69 * cannot be higher than 0x40 which effectively limits the combined
     70 * queue depth to 0x40. Instead of wasting half of that on the admin queue
     71 * which gets much less traffic we instead reduce its size here.
     72 * The controller also doesn't support async event such that no space must
     73 * be reserved for NVME_NR_AEN_COMMANDS.
     74 */
     75#define APPLE_NVME_AQ_DEPTH	   2
     76#define APPLE_NVME_AQ_MQ_TAG_DEPTH (APPLE_NVME_AQ_DEPTH - 1)
     77
     78/*
     79 * These can be higher, but we need to ensure that any command doesn't
     80 * require an sg allocation that needs more than a page of data.
     81 */
     82#define NVME_MAX_KB_SZ 4096
     83#define NVME_MAX_SEGS  127
     84
     85/*
     86 * This controller comes with an embedded IOMMU known as NVMMU.
     87 * The NVMMU is pointed to an array of TCBs indexed by the command tag.
     88 * Each command must be configured inside this structure before it's allowed
     89 * to execute, including commands that don't require DMA transfers.
     90 *
     91 * An exception to this are Apple's vendor-specific commands (opcode 0xD8 on the
     92 * admin queue): Those commands must still be added to the NVMMU but the DMA
     93 * buffers cannot be represented as PRPs and must instead be allowed using SART.
     94 *
     95 * Programming the PRPs to the same values as those in the submission queue
     96 * looks rather silly at first. This hardware is however designed for a kernel
     97 * that runs the NVMMU code in a higher exception level than the NVMe driver.
     98 * In that setting the NVMe driver first programs the submission queue entry
     99 * and then executes a hypercall to the code that is allowed to program the
    100 * NVMMU. The NVMMU driver then creates a shadow copy of the PRPs while
    101 * verifying that they don't point to kernel text, data, pagetables, or similar
    102 * protected areas before programming the TCB to point to this shadow copy.
    103 * Since Linux doesn't do any of that we may as well just point both the queue
    104 * and the TCB PRP pointer to the same memory.
    105 */
    106struct apple_nvmmu_tcb {
    107	u8 opcode;
    108
    109#define APPLE_ANS_TCB_DMA_FROM_DEVICE BIT(0)
    110#define APPLE_ANS_TCB_DMA_TO_DEVICE   BIT(1)
    111	u8 dma_flags;
    112
    113	u8 command_id;
    114	u8 _unk0;
    115	__le16 length;
    116	u8 _unk1[18];
    117	__le64 prp1;
    118	__le64 prp2;
    119	u8 _unk2[16];
    120	u8 aes_iv[8];
    121	u8 _aes_unk[64];
    122};
    123
    124/*
    125 * The Apple NVMe controller only supports a single admin and a single IO queue
    126 * which are both limited to 64 entries and share a single interrupt.
    127 *
    128 * The completion queue works as usual. The submission "queue" instead is
    129 * an array indexed by the command tag on this hardware. Commands must also be
    130 * present in the NVMMU's tcb array. They are triggered by writing their tag to
    131 * a MMIO register.
    132 */
    133struct apple_nvme_queue {
    134	struct nvme_command *sqes;
    135	struct nvme_completion *cqes;
    136	struct apple_nvmmu_tcb *tcbs;
    137
    138	dma_addr_t sq_dma_addr;
    139	dma_addr_t cq_dma_addr;
    140	dma_addr_t tcb_dma_addr;
    141
    142	u32 __iomem *sq_db;
    143	u32 __iomem *cq_db;
    144
    145	u16 cq_head;
    146	u8 cq_phase;
    147
    148	bool is_adminq;
    149	bool enabled;
    150};
    151
    152/*
    153 * The apple_nvme_iod describes the data in an I/O.
    154 *
    155 * The sg pointer contains the list of PRP chunk allocations in addition
    156 * to the actual struct scatterlist.
    157 */
    158struct apple_nvme_iod {
    159	struct nvme_request req;
    160	struct nvme_command cmd;
    161	struct apple_nvme_queue *q;
    162	int npages; /* In the PRP list. 0 means small pool in use */
    163	int nents; /* Used in scatterlist */
    164	dma_addr_t first_dma;
    165	unsigned int dma_len; /* length of single DMA segment mapping */
    166	struct scatterlist *sg;
    167};
    168
    169struct apple_nvme {
    170	struct device *dev;
    171
    172	void __iomem *mmio_coproc;
    173	void __iomem *mmio_nvme;
    174
    175	struct device **pd_dev;
    176	struct device_link **pd_link;
    177	int pd_count;
    178
    179	struct apple_sart *sart;
    180	struct apple_rtkit *rtk;
    181	struct reset_control *reset;
    182
    183	struct dma_pool *prp_page_pool;
    184	struct dma_pool *prp_small_pool;
    185	mempool_t *iod_mempool;
    186
    187	struct nvme_ctrl ctrl;
    188	struct work_struct remove_work;
    189
    190	struct apple_nvme_queue adminq;
    191	struct apple_nvme_queue ioq;
    192
    193	struct blk_mq_tag_set admin_tagset;
    194	struct blk_mq_tag_set tagset;
    195
    196	int irq;
    197	spinlock_t lock;
    198};
    199
    200static_assert(sizeof(struct nvme_command) == 64);
    201static_assert(sizeof(struct apple_nvmmu_tcb) == 128);
    202
    203static inline struct apple_nvme *ctrl_to_apple_nvme(struct nvme_ctrl *ctrl)
    204{
    205	return container_of(ctrl, struct apple_nvme, ctrl);
    206}
    207
    208static inline struct apple_nvme *queue_to_apple_nvme(struct apple_nvme_queue *q)
    209{
    210	if (q->is_adminq)
    211		return container_of(q, struct apple_nvme, adminq);
    212	else
    213		return container_of(q, struct apple_nvme, ioq);
    214}
    215
    216static unsigned int apple_nvme_queue_depth(struct apple_nvme_queue *q)
    217{
    218	if (q->is_adminq)
    219		return APPLE_NVME_AQ_DEPTH;
    220	else
    221		return APPLE_ANS_MAX_QUEUE_DEPTH;
    222}
    223
    224static void apple_nvme_rtkit_crashed(void *cookie)
    225{
    226	struct apple_nvme *anv = cookie;
    227
    228	dev_warn(anv->dev, "RTKit crashed; unable to recover without a reboot");
    229	nvme_reset_ctrl(&anv->ctrl);
    230}
    231
    232static int apple_nvme_sart_dma_setup(void *cookie,
    233				     struct apple_rtkit_shmem *bfr)
    234{
    235	struct apple_nvme *anv = cookie;
    236	int ret;
    237
    238	if (bfr->iova)
    239		return -EINVAL;
    240	if (!bfr->size)
    241		return -EINVAL;
    242
    243	bfr->buffer =
    244		dma_alloc_coherent(anv->dev, bfr->size, &bfr->iova, GFP_KERNEL);
    245	if (!bfr->buffer)
    246		return -ENOMEM;
    247
    248	ret = apple_sart_add_allowed_region(anv->sart, bfr->iova, bfr->size);
    249	if (ret) {
    250		dma_free_coherent(anv->dev, bfr->size, bfr->buffer, bfr->iova);
    251		bfr->buffer = NULL;
    252		return -ENOMEM;
    253	}
    254
    255	return 0;
    256}
    257
    258static void apple_nvme_sart_dma_destroy(void *cookie,
    259					struct apple_rtkit_shmem *bfr)
    260{
    261	struct apple_nvme *anv = cookie;
    262
    263	apple_sart_remove_allowed_region(anv->sart, bfr->iova, bfr->size);
    264	dma_free_coherent(anv->dev, bfr->size, bfr->buffer, bfr->iova);
    265}
    266
    267static const struct apple_rtkit_ops apple_nvme_rtkit_ops = {
    268	.crashed = apple_nvme_rtkit_crashed,
    269	.shmem_setup = apple_nvme_sart_dma_setup,
    270	.shmem_destroy = apple_nvme_sart_dma_destroy,
    271};
    272
    273static void apple_nvmmu_inval(struct apple_nvme_queue *q, unsigned int tag)
    274{
    275	struct apple_nvme *anv = queue_to_apple_nvme(q);
    276
    277	writel(tag, anv->mmio_nvme + APPLE_NVMMU_TCB_INVAL);
    278	if (readl(anv->mmio_nvme + APPLE_NVMMU_TCB_STAT))
    279		dev_warn_ratelimited(anv->dev,
    280				     "NVMMU TCB invalidation failed\n");
    281}
    282
    283static void apple_nvme_submit_cmd(struct apple_nvme_queue *q,
    284				  struct nvme_command *cmd)
    285{
    286	struct apple_nvme *anv = queue_to_apple_nvme(q);
    287	u32 tag = nvme_tag_from_cid(cmd->common.command_id);
    288	struct apple_nvmmu_tcb *tcb = &q->tcbs[tag];
    289
    290	tcb->opcode = cmd->common.opcode;
    291	tcb->prp1 = cmd->common.dptr.prp1;
    292	tcb->prp2 = cmd->common.dptr.prp2;
    293	tcb->length = cmd->rw.length;
    294	tcb->command_id = tag;
    295
    296	if (nvme_is_write(cmd))
    297		tcb->dma_flags = APPLE_ANS_TCB_DMA_TO_DEVICE;
    298	else
    299		tcb->dma_flags = APPLE_ANS_TCB_DMA_FROM_DEVICE;
    300
    301	memcpy(&q->sqes[tag], cmd, sizeof(*cmd));
    302
    303	/*
    304	 * This lock here doesn't make much sense at a first glace but
    305	 * removing it will result in occasional missed completetion
    306	 * interrupts even though the commands still appear on the CQ.
    307	 * It's unclear why this happens but our best guess is that
    308	 * there is a bug in the firmware triggered when a new command
    309	 * is issued while we're inside the irq handler between the
    310	 * NVMMU invalidation (and making the tag available again)
    311	 * and the final CQ update.
    312	 */
    313	spin_lock_irq(&anv->lock);
    314	writel(tag, q->sq_db);
    315	spin_unlock_irq(&anv->lock);
    316}
    317
    318/*
    319 * From pci.c:
    320 * Will slightly overestimate the number of pages needed.  This is OK
    321 * as it only leads to a small amount of wasted memory for the lifetime of
    322 * the I/O.
    323 */
    324static inline size_t apple_nvme_iod_alloc_size(void)
    325{
    326	const unsigned int nprps = DIV_ROUND_UP(
    327		NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE, NVME_CTRL_PAGE_SIZE);
    328	const int npages = DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
    329	const size_t alloc_size = sizeof(__le64 *) * npages +
    330				  sizeof(struct scatterlist) * NVME_MAX_SEGS;
    331
    332	return alloc_size;
    333}
    334
    335static void **apple_nvme_iod_list(struct request *req)
    336{
    337	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    338
    339	return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
    340}
    341
    342static void apple_nvme_free_prps(struct apple_nvme *anv, struct request *req)
    343{
    344	const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
    345	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    346	dma_addr_t dma_addr = iod->first_dma;
    347	int i;
    348
    349	for (i = 0; i < iod->npages; i++) {
    350		__le64 *prp_list = apple_nvme_iod_list(req)[i];
    351		dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
    352
    353		dma_pool_free(anv->prp_page_pool, prp_list, dma_addr);
    354		dma_addr = next_dma_addr;
    355	}
    356}
    357
    358static void apple_nvme_unmap_data(struct apple_nvme *anv, struct request *req)
    359{
    360	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    361
    362	if (iod->dma_len) {
    363		dma_unmap_page(anv->dev, iod->first_dma, iod->dma_len,
    364			       rq_dma_dir(req));
    365		return;
    366	}
    367
    368	WARN_ON_ONCE(!iod->nents);
    369
    370	dma_unmap_sg(anv->dev, iod->sg, iod->nents, rq_dma_dir(req));
    371	if (iod->npages == 0)
    372		dma_pool_free(anv->prp_small_pool, apple_nvme_iod_list(req)[0],
    373			      iod->first_dma);
    374	else
    375		apple_nvme_free_prps(anv, req);
    376	mempool_free(iod->sg, anv->iod_mempool);
    377}
    378
    379static void apple_nvme_print_sgl(struct scatterlist *sgl, int nents)
    380{
    381	int i;
    382	struct scatterlist *sg;
    383
    384	for_each_sg(sgl, sg, nents, i) {
    385		dma_addr_t phys = sg_phys(sg);
    386
    387		pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d dma_address:%pad dma_length:%d\n",
    388			i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
    389			sg_dma_len(sg));
    390	}
    391}
    392
    393static blk_status_t apple_nvme_setup_prps(struct apple_nvme *anv,
    394					  struct request *req,
    395					  struct nvme_rw_command *cmnd)
    396{
    397	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    398	struct dma_pool *pool;
    399	int length = blk_rq_payload_bytes(req);
    400	struct scatterlist *sg = iod->sg;
    401	int dma_len = sg_dma_len(sg);
    402	u64 dma_addr = sg_dma_address(sg);
    403	int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
    404	__le64 *prp_list;
    405	void **list = apple_nvme_iod_list(req);
    406	dma_addr_t prp_dma;
    407	int nprps, i;
    408
    409	length -= (NVME_CTRL_PAGE_SIZE - offset);
    410	if (length <= 0) {
    411		iod->first_dma = 0;
    412		goto done;
    413	}
    414
    415	dma_len -= (NVME_CTRL_PAGE_SIZE - offset);
    416	if (dma_len) {
    417		dma_addr += (NVME_CTRL_PAGE_SIZE - offset);
    418	} else {
    419		sg = sg_next(sg);
    420		dma_addr = sg_dma_address(sg);
    421		dma_len = sg_dma_len(sg);
    422	}
    423
    424	if (length <= NVME_CTRL_PAGE_SIZE) {
    425		iod->first_dma = dma_addr;
    426		goto done;
    427	}
    428
    429	nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
    430	if (nprps <= (256 / 8)) {
    431		pool = anv->prp_small_pool;
    432		iod->npages = 0;
    433	} else {
    434		pool = anv->prp_page_pool;
    435		iod->npages = 1;
    436	}
    437
    438	prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
    439	if (!prp_list) {
    440		iod->first_dma = dma_addr;
    441		iod->npages = -1;
    442		return BLK_STS_RESOURCE;
    443	}
    444	list[0] = prp_list;
    445	iod->first_dma = prp_dma;
    446	i = 0;
    447	for (;;) {
    448		if (i == NVME_CTRL_PAGE_SIZE >> 3) {
    449			__le64 *old_prp_list = prp_list;
    450
    451			prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
    452			if (!prp_list)
    453				goto free_prps;
    454			list[iod->npages++] = prp_list;
    455			prp_list[0] = old_prp_list[i - 1];
    456			old_prp_list[i - 1] = cpu_to_le64(prp_dma);
    457			i = 1;
    458		}
    459		prp_list[i++] = cpu_to_le64(dma_addr);
    460		dma_len -= NVME_CTRL_PAGE_SIZE;
    461		dma_addr += NVME_CTRL_PAGE_SIZE;
    462		length -= NVME_CTRL_PAGE_SIZE;
    463		if (length <= 0)
    464			break;
    465		if (dma_len > 0)
    466			continue;
    467		if (unlikely(dma_len < 0))
    468			goto bad_sgl;
    469		sg = sg_next(sg);
    470		dma_addr = sg_dma_address(sg);
    471		dma_len = sg_dma_len(sg);
    472	}
    473done:
    474	cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
    475	cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
    476	return BLK_STS_OK;
    477free_prps:
    478	apple_nvme_free_prps(anv, req);
    479	return BLK_STS_RESOURCE;
    480bad_sgl:
    481	WARN(DO_ONCE(apple_nvme_print_sgl, iod->sg, iod->nents),
    482	     "Invalid SGL for payload:%d nents:%d\n", blk_rq_payload_bytes(req),
    483	     iod->nents);
    484	return BLK_STS_IOERR;
    485}
    486
    487static blk_status_t apple_nvme_setup_prp_simple(struct apple_nvme *anv,
    488						struct request *req,
    489						struct nvme_rw_command *cmnd,
    490						struct bio_vec *bv)
    491{
    492	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    493	unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1);
    494	unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset;
    495
    496	iod->first_dma = dma_map_bvec(anv->dev, bv, rq_dma_dir(req), 0);
    497	if (dma_mapping_error(anv->dev, iod->first_dma))
    498		return BLK_STS_RESOURCE;
    499	iod->dma_len = bv->bv_len;
    500
    501	cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
    502	if (bv->bv_len > first_prp_len)
    503		cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
    504	return BLK_STS_OK;
    505}
    506
    507static blk_status_t apple_nvme_map_data(struct apple_nvme *anv,
    508					struct request *req,
    509					struct nvme_command *cmnd)
    510{
    511	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    512	blk_status_t ret = BLK_STS_RESOURCE;
    513	int nr_mapped;
    514
    515	if (blk_rq_nr_phys_segments(req) == 1) {
    516		struct bio_vec bv = req_bvec(req);
    517
    518		if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
    519			return apple_nvme_setup_prp_simple(anv, req, &cmnd->rw,
    520							   &bv);
    521	}
    522
    523	iod->dma_len = 0;
    524	iod->sg = mempool_alloc(anv->iod_mempool, GFP_ATOMIC);
    525	if (!iod->sg)
    526		return BLK_STS_RESOURCE;
    527	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
    528	iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
    529	if (!iod->nents)
    530		goto out_free_sg;
    531
    532	nr_mapped = dma_map_sg_attrs(anv->dev, iod->sg, iod->nents,
    533				     rq_dma_dir(req), DMA_ATTR_NO_WARN);
    534	if (!nr_mapped)
    535		goto out_free_sg;
    536
    537	ret = apple_nvme_setup_prps(anv, req, &cmnd->rw);
    538	if (ret != BLK_STS_OK)
    539		goto out_unmap_sg;
    540	return BLK_STS_OK;
    541
    542out_unmap_sg:
    543	dma_unmap_sg(anv->dev, iod->sg, iod->nents, rq_dma_dir(req));
    544out_free_sg:
    545	mempool_free(iod->sg, anv->iod_mempool);
    546	return ret;
    547}
    548
    549static __always_inline void apple_nvme_unmap_rq(struct request *req)
    550{
    551	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    552	struct apple_nvme *anv = queue_to_apple_nvme(iod->q);
    553
    554	if (blk_rq_nr_phys_segments(req))
    555		apple_nvme_unmap_data(anv, req);
    556}
    557
    558static void apple_nvme_complete_rq(struct request *req)
    559{
    560	apple_nvme_unmap_rq(req);
    561	nvme_complete_rq(req);
    562}
    563
    564static void apple_nvme_complete_batch(struct io_comp_batch *iob)
    565{
    566	nvme_complete_batch(iob, apple_nvme_unmap_rq);
    567}
    568
    569static inline bool apple_nvme_cqe_pending(struct apple_nvme_queue *q)
    570{
    571	struct nvme_completion *hcqe = &q->cqes[q->cq_head];
    572
    573	return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == q->cq_phase;
    574}
    575
    576static inline struct blk_mq_tags *
    577apple_nvme_queue_tagset(struct apple_nvme *anv, struct apple_nvme_queue *q)
    578{
    579	if (q->is_adminq)
    580		return anv->admin_tagset.tags[0];
    581	else
    582		return anv->tagset.tags[0];
    583}
    584
    585static inline void apple_nvme_handle_cqe(struct apple_nvme_queue *q,
    586					 struct io_comp_batch *iob, u16 idx)
    587{
    588	struct apple_nvme *anv = queue_to_apple_nvme(q);
    589	struct nvme_completion *cqe = &q->cqes[idx];
    590	__u16 command_id = READ_ONCE(cqe->command_id);
    591	struct request *req;
    592
    593	apple_nvmmu_inval(q, command_id);
    594
    595	req = nvme_find_rq(apple_nvme_queue_tagset(anv, q), command_id);
    596	if (unlikely(!req)) {
    597		dev_warn(anv->dev, "invalid id %d completed", command_id);
    598		return;
    599	}
    600
    601	if (!nvme_try_complete_req(req, cqe->status, cqe->result) &&
    602	    !blk_mq_add_to_batch(req, iob, nvme_req(req)->status,
    603				 apple_nvme_complete_batch))
    604		apple_nvme_complete_rq(req);
    605}
    606
    607static inline void apple_nvme_update_cq_head(struct apple_nvme_queue *q)
    608{
    609	u32 tmp = q->cq_head + 1;
    610
    611	if (tmp == apple_nvme_queue_depth(q)) {
    612		q->cq_head = 0;
    613		q->cq_phase ^= 1;
    614	} else {
    615		q->cq_head = tmp;
    616	}
    617}
    618
    619static bool apple_nvme_poll_cq(struct apple_nvme_queue *q,
    620			       struct io_comp_batch *iob)
    621{
    622	bool found = false;
    623
    624	while (apple_nvme_cqe_pending(q)) {
    625		found = true;
    626
    627		/*
    628		 * load-load control dependency between phase and the rest of
    629		 * the cqe requires a full read memory barrier
    630		 */
    631		dma_rmb();
    632		apple_nvme_handle_cqe(q, iob, q->cq_head);
    633		apple_nvme_update_cq_head(q);
    634	}
    635
    636	if (found)
    637		writel(q->cq_head, q->cq_db);
    638
    639	return found;
    640}
    641
    642static bool apple_nvme_handle_cq(struct apple_nvme_queue *q, bool force)
    643{
    644	bool found;
    645	DEFINE_IO_COMP_BATCH(iob);
    646
    647	if (!READ_ONCE(q->enabled) && !force)
    648		return false;
    649
    650	found = apple_nvme_poll_cq(q, &iob);
    651
    652	if (!rq_list_empty(iob.req_list))
    653		apple_nvme_complete_batch(&iob);
    654
    655	return found;
    656}
    657
    658static irqreturn_t apple_nvme_irq(int irq, void *data)
    659{
    660	struct apple_nvme *anv = data;
    661	bool handled = false;
    662	unsigned long flags;
    663
    664	spin_lock_irqsave(&anv->lock, flags);
    665	if (apple_nvme_handle_cq(&anv->ioq, false))
    666		handled = true;
    667	if (apple_nvme_handle_cq(&anv->adminq, false))
    668		handled = true;
    669	spin_unlock_irqrestore(&anv->lock, flags);
    670
    671	if (handled)
    672		return IRQ_HANDLED;
    673	return IRQ_NONE;
    674}
    675
    676static int apple_nvme_create_cq(struct apple_nvme *anv)
    677{
    678	struct nvme_command c = {};
    679
    680	/*
    681	 * Note: we (ab)use the fact that the prp fields survive if no data
    682	 * is attached to the request.
    683	 */
    684	c.create_cq.opcode = nvme_admin_create_cq;
    685	c.create_cq.prp1 = cpu_to_le64(anv->ioq.cq_dma_addr);
    686	c.create_cq.cqid = cpu_to_le16(1);
    687	c.create_cq.qsize = cpu_to_le16(APPLE_ANS_MAX_QUEUE_DEPTH - 1);
    688	c.create_cq.cq_flags = cpu_to_le16(NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED);
    689	c.create_cq.irq_vector = cpu_to_le16(0);
    690
    691	return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0);
    692}
    693
    694static int apple_nvme_remove_cq(struct apple_nvme *anv)
    695{
    696	struct nvme_command c = {};
    697
    698	c.delete_queue.opcode = nvme_admin_delete_cq;
    699	c.delete_queue.qid = cpu_to_le16(1);
    700
    701	return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0);
    702}
    703
    704static int apple_nvme_create_sq(struct apple_nvme *anv)
    705{
    706	struct nvme_command c = {};
    707
    708	/*
    709	 * Note: we (ab)use the fact that the prp fields survive if no data
    710	 * is attached to the request.
    711	 */
    712	c.create_sq.opcode = nvme_admin_create_sq;
    713	c.create_sq.prp1 = cpu_to_le64(anv->ioq.sq_dma_addr);
    714	c.create_sq.sqid = cpu_to_le16(1);
    715	c.create_sq.qsize = cpu_to_le16(APPLE_ANS_MAX_QUEUE_DEPTH - 1);
    716	c.create_sq.sq_flags = cpu_to_le16(NVME_QUEUE_PHYS_CONTIG);
    717	c.create_sq.cqid = cpu_to_le16(1);
    718
    719	return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0);
    720}
    721
    722static int apple_nvme_remove_sq(struct apple_nvme *anv)
    723{
    724	struct nvme_command c = {};
    725
    726	c.delete_queue.opcode = nvme_admin_delete_sq;
    727	c.delete_queue.qid = cpu_to_le16(1);
    728
    729	return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0);
    730}
    731
    732static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
    733					const struct blk_mq_queue_data *bd)
    734{
    735	struct nvme_ns *ns = hctx->queue->queuedata;
    736	struct apple_nvme_queue *q = hctx->driver_data;
    737	struct apple_nvme *anv = queue_to_apple_nvme(q);
    738	struct request *req = bd->rq;
    739	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    740	struct nvme_command *cmnd = &iod->cmd;
    741	blk_status_t ret;
    742
    743	iod->npages = -1;
    744	iod->nents = 0;
    745
    746	/*
    747	 * We should not need to do this, but we're still using this to
    748	 * ensure we can drain requests on a dying queue.
    749	 */
    750	if (unlikely(!READ_ONCE(q->enabled)))
    751		return BLK_STS_IOERR;
    752
    753	if (!nvme_check_ready(&anv->ctrl, req, true))
    754		return nvme_fail_nonready_command(&anv->ctrl, req);
    755
    756	ret = nvme_setup_cmd(ns, req);
    757	if (ret)
    758		return ret;
    759
    760	if (blk_rq_nr_phys_segments(req)) {
    761		ret = apple_nvme_map_data(anv, req, cmnd);
    762		if (ret)
    763			goto out_free_cmd;
    764	}
    765
    766	blk_mq_start_request(req);
    767	apple_nvme_submit_cmd(q, cmnd);
    768	return BLK_STS_OK;
    769
    770out_free_cmd:
    771	nvme_cleanup_cmd(req);
    772	return ret;
    773}
    774
    775static int apple_nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
    776				unsigned int hctx_idx)
    777{
    778	hctx->driver_data = data;
    779	return 0;
    780}
    781
    782static int apple_nvme_init_request(struct blk_mq_tag_set *set,
    783				   struct request *req, unsigned int hctx_idx,
    784				   unsigned int numa_node)
    785{
    786	struct apple_nvme_queue *q = set->driver_data;
    787	struct apple_nvme *anv = queue_to_apple_nvme(q);
    788	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    789	struct nvme_request *nreq = nvme_req(req);
    790
    791	iod->q = q;
    792	nreq->ctrl = &anv->ctrl;
    793	nreq->cmd = &iod->cmd;
    794
    795	return 0;
    796}
    797
    798static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
    799{
    800	u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS);
    801	bool dead = false, freeze = false;
    802	unsigned long flags;
    803
    804	if (apple_rtkit_is_crashed(anv->rtk))
    805		dead = true;
    806	if (!(csts & NVME_CSTS_RDY))
    807		dead = true;
    808	if (csts & NVME_CSTS_CFS)
    809		dead = true;
    810
    811	if (anv->ctrl.state == NVME_CTRL_LIVE ||
    812	    anv->ctrl.state == NVME_CTRL_RESETTING) {
    813		freeze = true;
    814		nvme_start_freeze(&anv->ctrl);
    815	}
    816
    817	/*
    818	 * Give the controller a chance to complete all entered requests if
    819	 * doing a safe shutdown.
    820	 */
    821	if (!dead && shutdown && freeze)
    822		nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT);
    823
    824	nvme_stop_queues(&anv->ctrl);
    825
    826	if (!dead) {
    827		if (READ_ONCE(anv->ioq.enabled)) {
    828			apple_nvme_remove_sq(anv);
    829			apple_nvme_remove_cq(anv);
    830		}
    831
    832		if (shutdown)
    833			nvme_shutdown_ctrl(&anv->ctrl);
    834		nvme_disable_ctrl(&anv->ctrl);
    835	}
    836
    837	WRITE_ONCE(anv->ioq.enabled, false);
    838	WRITE_ONCE(anv->adminq.enabled, false);
    839	mb(); /* ensure that nvme_queue_rq() sees that enabled is cleared */
    840	nvme_stop_admin_queue(&anv->ctrl);
    841
    842	/* last chance to complete any requests before nvme_cancel_request */
    843	spin_lock_irqsave(&anv->lock, flags);
    844	apple_nvme_handle_cq(&anv->ioq, true);
    845	apple_nvme_handle_cq(&anv->adminq, true);
    846	spin_unlock_irqrestore(&anv->lock, flags);
    847
    848	blk_mq_tagset_busy_iter(&anv->tagset, nvme_cancel_request, &anv->ctrl);
    849	blk_mq_tagset_busy_iter(&anv->admin_tagset, nvme_cancel_request,
    850				&anv->ctrl);
    851	blk_mq_tagset_wait_completed_request(&anv->tagset);
    852	blk_mq_tagset_wait_completed_request(&anv->admin_tagset);
    853
    854	/*
    855	 * The driver will not be starting up queues again if shutting down so
    856	 * must flush all entered requests to their failed completion to avoid
    857	 * deadlocking blk-mq hot-cpu notifier.
    858	 */
    859	if (shutdown) {
    860		nvme_start_queues(&anv->ctrl);
    861		nvme_start_admin_queue(&anv->ctrl);
    862	}
    863}
    864
    865static enum blk_eh_timer_return apple_nvme_timeout(struct request *req,
    866						   bool reserved)
    867{
    868	struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req);
    869	struct apple_nvme_queue *q = iod->q;
    870	struct apple_nvme *anv = queue_to_apple_nvme(q);
    871	unsigned long flags;
    872	u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS);
    873
    874	if (anv->ctrl.state != NVME_CTRL_LIVE) {
    875		/*
    876		 * From rdma.c:
    877		 * If we are resetting, connecting or deleting we should
    878		 * complete immediately because we may block controller
    879		 * teardown or setup sequence
    880		 * - ctrl disable/shutdown fabrics requests
    881		 * - connect requests
    882		 * - initialization admin requests
    883		 * - I/O requests that entered after unquiescing and
    884		 *   the controller stopped responding
    885		 *
    886		 * All other requests should be cancelled by the error
    887		 * recovery work, so it's fine that we fail it here.
    888		 */
    889		dev_warn(anv->dev,
    890			 "I/O %d(aq:%d) timeout while not in live state\n",
    891			 req->tag, q->is_adminq);
    892		if (blk_mq_request_started(req) &&
    893		    !blk_mq_request_completed(req)) {
    894			nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
    895			nvme_req(req)->flags |= NVME_REQ_CANCELLED;
    896			blk_mq_complete_request(req);
    897		}
    898		return BLK_EH_DONE;
    899	}
    900
    901	/* check if we just missed an interrupt if we're still alive */
    902	if (!apple_rtkit_is_crashed(anv->rtk) && !(csts & NVME_CSTS_CFS)) {
    903		spin_lock_irqsave(&anv->lock, flags);
    904		apple_nvme_handle_cq(q, false);
    905		spin_unlock_irqrestore(&anv->lock, flags);
    906		if (blk_mq_request_completed(req)) {
    907			dev_warn(anv->dev,
    908				 "I/O %d(aq:%d) timeout: completion polled\n",
    909				 req->tag, q->is_adminq);
    910			return BLK_EH_DONE;
    911		}
    912	}
    913
    914	/*
    915	 * aborting commands isn't supported which leaves a full reset as our
    916	 * only option here
    917	 */
    918	dev_warn(anv->dev, "I/O %d(aq:%d) timeout: resetting controller\n",
    919		 req->tag, q->is_adminq);
    920	nvme_req(req)->flags |= NVME_REQ_CANCELLED;
    921	apple_nvme_disable(anv, false);
    922	nvme_reset_ctrl(&anv->ctrl);
    923	return BLK_EH_DONE;
    924}
    925
    926static int apple_nvme_poll(struct blk_mq_hw_ctx *hctx,
    927			   struct io_comp_batch *iob)
    928{
    929	struct apple_nvme_queue *q = hctx->driver_data;
    930	struct apple_nvme *anv = queue_to_apple_nvme(q);
    931	bool found;
    932	unsigned long flags;
    933
    934	spin_lock_irqsave(&anv->lock, flags);
    935	found = apple_nvme_poll_cq(q, iob);
    936	spin_unlock_irqrestore(&anv->lock, flags);
    937
    938	return found;
    939}
    940
    941static const struct blk_mq_ops apple_nvme_mq_admin_ops = {
    942	.queue_rq = apple_nvme_queue_rq,
    943	.complete = apple_nvme_complete_rq,
    944	.init_hctx = apple_nvme_init_hctx,
    945	.init_request = apple_nvme_init_request,
    946	.timeout = apple_nvme_timeout,
    947};
    948
    949static const struct blk_mq_ops apple_nvme_mq_ops = {
    950	.queue_rq = apple_nvme_queue_rq,
    951	.complete = apple_nvme_complete_rq,
    952	.init_hctx = apple_nvme_init_hctx,
    953	.init_request = apple_nvme_init_request,
    954	.timeout = apple_nvme_timeout,
    955	.poll = apple_nvme_poll,
    956};
    957
    958static void apple_nvme_init_queue(struct apple_nvme_queue *q)
    959{
    960	unsigned int depth = apple_nvme_queue_depth(q);
    961
    962	q->cq_head = 0;
    963	q->cq_phase = 1;
    964	memset(q->tcbs, 0,
    965	       APPLE_ANS_MAX_QUEUE_DEPTH * sizeof(struct apple_nvmmu_tcb));
    966	memset(q->cqes, 0, depth * sizeof(struct nvme_completion));
    967	WRITE_ONCE(q->enabled, true);
    968	wmb(); /* ensure the first interrupt sees the initialization */
    969}
    970
    971static void apple_nvme_reset_work(struct work_struct *work)
    972{
    973	unsigned int nr_io_queues = 1;
    974	int ret;
    975	u32 boot_status, aqa;
    976	struct apple_nvme *anv =
    977		container_of(work, struct apple_nvme, ctrl.reset_work);
    978
    979	if (anv->ctrl.state != NVME_CTRL_RESETTING) {
    980		dev_warn(anv->dev, "ctrl state %d is not RESETTING\n",
    981			 anv->ctrl.state);
    982		ret = -ENODEV;
    983		goto out;
    984	}
    985
    986	/* there's unfortunately no known way to recover if RTKit crashed :( */
    987	if (apple_rtkit_is_crashed(anv->rtk)) {
    988		dev_err(anv->dev,
    989			"RTKit has crashed without any way to recover.");
    990		ret = -EIO;
    991		goto out;
    992	}
    993
    994	if (anv->ctrl.ctrl_config & NVME_CC_ENABLE)
    995		apple_nvme_disable(anv, false);
    996
    997	/* RTKit must be shut down cleanly for the (soft)-reset to work */
    998	if (apple_rtkit_is_running(anv->rtk)) {
    999		dev_dbg(anv->dev, "Trying to shut down RTKit before reset.");
   1000		ret = apple_rtkit_shutdown(anv->rtk);
   1001		if (ret)
   1002			goto out;
   1003	}
   1004
   1005	writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
   1006
   1007	ret = reset_control_assert(anv->reset);
   1008	if (ret)
   1009		goto out;
   1010
   1011	ret = apple_rtkit_reinit(anv->rtk);
   1012	if (ret)
   1013		goto out;
   1014
   1015	ret = reset_control_deassert(anv->reset);
   1016	if (ret)
   1017		goto out;
   1018
   1019	writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN,
   1020	       anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
   1021	ret = apple_rtkit_boot(anv->rtk);
   1022	if (ret) {
   1023		dev_err(anv->dev, "ANS did not boot");
   1024		goto out;
   1025	}
   1026
   1027	ret = readl_poll_timeout(anv->mmio_nvme + APPLE_ANS_BOOT_STATUS,
   1028				 boot_status,
   1029				 boot_status == APPLE_ANS_BOOT_STATUS_OK,
   1030				 USEC_PER_MSEC, APPLE_ANS_BOOT_TIMEOUT);
   1031	if (ret) {
   1032		dev_err(anv->dev, "ANS did not initialize");
   1033		goto out;
   1034	}
   1035
   1036	dev_dbg(anv->dev, "ANS booted successfully.");
   1037
   1038	/*
   1039	 * Limit the max command size to prevent iod->sg allocations going
   1040	 * over a single page.
   1041	 */
   1042	anv->ctrl.max_hw_sectors = min_t(u32, NVME_MAX_KB_SZ << 1,
   1043					 dma_max_mapping_size(anv->dev) >> 9);
   1044	anv->ctrl.max_segments = NVME_MAX_SEGS;
   1045
   1046	/*
   1047	 * Enable NVMMU and linear submission queues.
   1048	 * While we could keep those disabled and pretend this is slightly
   1049	 * more common NVMe controller we'd still need some quirks (e.g.
   1050	 * sq entries will be 128 bytes) and Apple might drop support for
   1051	 * that mode in the future.
   1052	 */
   1053	writel(APPLE_ANS_LINEAR_SQ_EN,
   1054	       anv->mmio_nvme + APPLE_ANS_LINEAR_SQ_CTRL);
   1055
   1056	/* Allow as many pending command as possible for both queues */
   1057	writel(APPLE_ANS_MAX_QUEUE_DEPTH | (APPLE_ANS_MAX_QUEUE_DEPTH << 16),
   1058	       anv->mmio_nvme + APPLE_ANS_MAX_PEND_CMDS_CTRL);
   1059
   1060	/* Setup the NVMMU for the maximum admin and IO queue depth */
   1061	writel(APPLE_ANS_MAX_QUEUE_DEPTH - 1,
   1062	       anv->mmio_nvme + APPLE_NVMMU_NUM_TCBS);
   1063
   1064	/*
   1065	 * This is probably a chicken bit: without it all commands where any PRP
   1066	 * is set to zero (including those that don't use that field) fail and
   1067	 * the co-processor complains about "completed with err BAD_CMD-" or
   1068	 * a "NULL_PRP_PTR_ERR" in the syslog
   1069	 */
   1070	writel(readl(anv->mmio_nvme + APPLE_ANS_UNKNOWN_CTRL) &
   1071		       ~APPLE_ANS_PRP_NULL_CHECK,
   1072	       anv->mmio_nvme + APPLE_ANS_UNKNOWN_CTRL);
   1073
   1074	/* Setup the admin queue */
   1075	aqa = APPLE_NVME_AQ_DEPTH - 1;
   1076	aqa |= aqa << 16;
   1077	writel(aqa, anv->mmio_nvme + NVME_REG_AQA);
   1078	writeq(anv->adminq.sq_dma_addr, anv->mmio_nvme + NVME_REG_ASQ);
   1079	writeq(anv->adminq.cq_dma_addr, anv->mmio_nvme + NVME_REG_ACQ);
   1080
   1081	/* Setup NVMMU for both queues */
   1082	writeq(anv->adminq.tcb_dma_addr,
   1083	       anv->mmio_nvme + APPLE_NVMMU_ASQ_TCB_BASE);
   1084	writeq(anv->ioq.tcb_dma_addr,
   1085	       anv->mmio_nvme + APPLE_NVMMU_IOSQ_TCB_BASE);
   1086
   1087	anv->ctrl.sqsize =
   1088		APPLE_ANS_MAX_QUEUE_DEPTH - 1; /* 0's based queue depth */
   1089	anv->ctrl.cap = readq(anv->mmio_nvme + NVME_REG_CAP);
   1090
   1091	dev_dbg(anv->dev, "Enabling controller now");
   1092	ret = nvme_enable_ctrl(&anv->ctrl);
   1093	if (ret)
   1094		goto out;
   1095
   1096	dev_dbg(anv->dev, "Starting admin queue");
   1097	apple_nvme_init_queue(&anv->adminq);
   1098	nvme_start_admin_queue(&anv->ctrl);
   1099
   1100	if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_CONNECTING)) {
   1101		dev_warn(anv->ctrl.device,
   1102			 "failed to mark controller CONNECTING\n");
   1103		ret = -ENODEV;
   1104		goto out;
   1105	}
   1106
   1107	ret = nvme_init_ctrl_finish(&anv->ctrl);
   1108	if (ret)
   1109		goto out;
   1110
   1111	dev_dbg(anv->dev, "Creating IOCQ");
   1112	ret = apple_nvme_create_cq(anv);
   1113	if (ret)
   1114		goto out;
   1115	dev_dbg(anv->dev, "Creating IOSQ");
   1116	ret = apple_nvme_create_sq(anv);
   1117	if (ret)
   1118		goto out_remove_cq;
   1119
   1120	apple_nvme_init_queue(&anv->ioq);
   1121	nr_io_queues = 1;
   1122	ret = nvme_set_queue_count(&anv->ctrl, &nr_io_queues);
   1123	if (ret)
   1124		goto out_remove_sq;
   1125	if (nr_io_queues != 1) {
   1126		ret = -ENXIO;
   1127		goto out_remove_sq;
   1128	}
   1129
   1130	anv->ctrl.queue_count = nr_io_queues + 1;
   1131
   1132	nvme_start_queues(&anv->ctrl);
   1133	nvme_wait_freeze(&anv->ctrl);
   1134	blk_mq_update_nr_hw_queues(&anv->tagset, 1);
   1135	nvme_unfreeze(&anv->ctrl);
   1136
   1137	if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_LIVE)) {
   1138		dev_warn(anv->ctrl.device,
   1139			 "failed to mark controller live state\n");
   1140		ret = -ENODEV;
   1141		goto out_remove_sq;
   1142	}
   1143
   1144	nvme_start_ctrl(&anv->ctrl);
   1145
   1146	dev_dbg(anv->dev, "ANS boot and NVMe init completed.");
   1147	return;
   1148
   1149out_remove_sq:
   1150	apple_nvme_remove_sq(anv);
   1151out_remove_cq:
   1152	apple_nvme_remove_cq(anv);
   1153out:
   1154	dev_warn(anv->ctrl.device, "Reset failure status: %d\n", ret);
   1155	nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_DELETING);
   1156	nvme_get_ctrl(&anv->ctrl);
   1157	apple_nvme_disable(anv, false);
   1158	nvme_kill_queues(&anv->ctrl);
   1159	if (!queue_work(nvme_wq, &anv->remove_work))
   1160		nvme_put_ctrl(&anv->ctrl);
   1161}
   1162
   1163static void apple_nvme_remove_dead_ctrl_work(struct work_struct *work)
   1164{
   1165	struct apple_nvme *anv =
   1166		container_of(work, struct apple_nvme, remove_work);
   1167
   1168	nvme_put_ctrl(&anv->ctrl);
   1169	device_release_driver(anv->dev);
   1170}
   1171
   1172static int apple_nvme_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
   1173{
   1174	*val = readl(ctrl_to_apple_nvme(ctrl)->mmio_nvme + off);
   1175	return 0;
   1176}
   1177
   1178static int apple_nvme_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
   1179{
   1180	writel(val, ctrl_to_apple_nvme(ctrl)->mmio_nvme + off);
   1181	return 0;
   1182}
   1183
   1184static int apple_nvme_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
   1185{
   1186	*val = readq(ctrl_to_apple_nvme(ctrl)->mmio_nvme + off);
   1187	return 0;
   1188}
   1189
   1190static int apple_nvme_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
   1191{
   1192	struct device *dev = ctrl_to_apple_nvme(ctrl)->dev;
   1193
   1194	return snprintf(buf, size, "%s\n", dev_name(dev));
   1195}
   1196
   1197static void apple_nvme_free_ctrl(struct nvme_ctrl *ctrl)
   1198{
   1199	struct apple_nvme *anv = ctrl_to_apple_nvme(ctrl);
   1200
   1201	if (anv->ctrl.admin_q)
   1202		blk_put_queue(anv->ctrl.admin_q);
   1203	put_device(anv->dev);
   1204}
   1205
   1206static const struct nvme_ctrl_ops nvme_ctrl_ops = {
   1207	.name = "apple-nvme",
   1208	.module = THIS_MODULE,
   1209	.flags = 0,
   1210	.reg_read32 = apple_nvme_reg_read32,
   1211	.reg_write32 = apple_nvme_reg_write32,
   1212	.reg_read64 = apple_nvme_reg_read64,
   1213	.free_ctrl = apple_nvme_free_ctrl,
   1214	.get_address = apple_nvme_get_address,
   1215};
   1216
   1217static void apple_nvme_async_probe(void *data, async_cookie_t cookie)
   1218{
   1219	struct apple_nvme *anv = data;
   1220
   1221	flush_work(&anv->ctrl.reset_work);
   1222	flush_work(&anv->ctrl.scan_work);
   1223	nvme_put_ctrl(&anv->ctrl);
   1224}
   1225
   1226static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
   1227{
   1228	int ret;
   1229
   1230	anv->admin_tagset.ops = &apple_nvme_mq_admin_ops;
   1231	anv->admin_tagset.nr_hw_queues = 1;
   1232	anv->admin_tagset.queue_depth = APPLE_NVME_AQ_MQ_TAG_DEPTH;
   1233	anv->admin_tagset.timeout = NVME_ADMIN_TIMEOUT;
   1234	anv->admin_tagset.numa_node = NUMA_NO_NODE;
   1235	anv->admin_tagset.cmd_size = sizeof(struct apple_nvme_iod);
   1236	anv->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
   1237	anv->admin_tagset.driver_data = &anv->adminq;
   1238
   1239	ret = blk_mq_alloc_tag_set(&anv->admin_tagset);
   1240	if (ret)
   1241		return ret;
   1242	ret = devm_add_action_or_reset(anv->dev,
   1243				       (void (*)(void *))blk_mq_free_tag_set,
   1244				       &anv->admin_tagset);
   1245	if (ret)
   1246		return ret;
   1247
   1248	anv->tagset.ops = &apple_nvme_mq_ops;
   1249	anv->tagset.nr_hw_queues = 1;
   1250	anv->tagset.nr_maps = 1;
   1251	/*
   1252	 * Tags are used as an index to the NVMMU and must be unique across
   1253	 * both queues. The admin queue gets the first APPLE_NVME_AQ_DEPTH which
   1254	 * must be marked as reserved in the IO queue.
   1255	 */
   1256	anv->tagset.reserved_tags = APPLE_NVME_AQ_DEPTH;
   1257	anv->tagset.queue_depth = APPLE_ANS_MAX_QUEUE_DEPTH - 1;
   1258	anv->tagset.timeout = NVME_IO_TIMEOUT;
   1259	anv->tagset.numa_node = NUMA_NO_NODE;
   1260	anv->tagset.cmd_size = sizeof(struct apple_nvme_iod);
   1261	anv->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
   1262	anv->tagset.driver_data = &anv->ioq;
   1263
   1264	ret = blk_mq_alloc_tag_set(&anv->tagset);
   1265	if (ret)
   1266		return ret;
   1267	ret = devm_add_action_or_reset(
   1268		anv->dev, (void (*)(void *))blk_mq_free_tag_set, &anv->tagset);
   1269	if (ret)
   1270		return ret;
   1271
   1272	anv->ctrl.admin_tagset = &anv->admin_tagset;
   1273	anv->ctrl.tagset = &anv->tagset;
   1274
   1275	return 0;
   1276}
   1277
   1278static int apple_nvme_queue_alloc(struct apple_nvme *anv,
   1279				  struct apple_nvme_queue *q)
   1280{
   1281	unsigned int depth = apple_nvme_queue_depth(q);
   1282
   1283	q->cqes = dmam_alloc_coherent(anv->dev,
   1284				      depth * sizeof(struct nvme_completion),
   1285				      &q->cq_dma_addr, GFP_KERNEL);
   1286	if (!q->cqes)
   1287		return -ENOMEM;
   1288
   1289	q->sqes = dmam_alloc_coherent(anv->dev,
   1290				      depth * sizeof(struct nvme_command),
   1291				      &q->sq_dma_addr, GFP_KERNEL);
   1292	if (!q->sqes)
   1293		return -ENOMEM;
   1294
   1295	/*
   1296	 * We need the maximum queue depth here because the NVMMU only has a
   1297	 * single depth configuration shared between both queues.
   1298	 */
   1299	q->tcbs = dmam_alloc_coherent(anv->dev,
   1300				      APPLE_ANS_MAX_QUEUE_DEPTH *
   1301					      sizeof(struct apple_nvmmu_tcb),
   1302				      &q->tcb_dma_addr, GFP_KERNEL);
   1303	if (!q->tcbs)
   1304		return -ENOMEM;
   1305
   1306	/*
   1307	 * initialize phase to make sure the allocated and empty memory
   1308	 * doesn't look like a full cq already.
   1309	 */
   1310	q->cq_phase = 1;
   1311	return 0;
   1312}
   1313
   1314static void apple_nvme_detach_genpd(struct apple_nvme *anv)
   1315{
   1316	int i;
   1317
   1318	if (anv->pd_count <= 1)
   1319		return;
   1320
   1321	for (i = anv->pd_count - 1; i >= 0; i--) {
   1322		if (anv->pd_link[i])
   1323			device_link_del(anv->pd_link[i]);
   1324		if (!IS_ERR_OR_NULL(anv->pd_dev[i]))
   1325			dev_pm_domain_detach(anv->pd_dev[i], true);
   1326	}
   1327}
   1328
   1329static int apple_nvme_attach_genpd(struct apple_nvme *anv)
   1330{
   1331	struct device *dev = anv->dev;
   1332	int i;
   1333
   1334	anv->pd_count = of_count_phandle_with_args(
   1335		dev->of_node, "power-domains", "#power-domain-cells");
   1336	if (anv->pd_count <= 1)
   1337		return 0;
   1338
   1339	anv->pd_dev = devm_kcalloc(dev, anv->pd_count, sizeof(*anv->pd_dev),
   1340				   GFP_KERNEL);
   1341	if (!anv->pd_dev)
   1342		return -ENOMEM;
   1343
   1344	anv->pd_link = devm_kcalloc(dev, anv->pd_count, sizeof(*anv->pd_link),
   1345				    GFP_KERNEL);
   1346	if (!anv->pd_link)
   1347		return -ENOMEM;
   1348
   1349	for (i = 0; i < anv->pd_count; i++) {
   1350		anv->pd_dev[i] = dev_pm_domain_attach_by_id(dev, i);
   1351		if (IS_ERR(anv->pd_dev[i])) {
   1352			apple_nvme_detach_genpd(anv);
   1353			return PTR_ERR(anv->pd_dev[i]);
   1354		}
   1355
   1356		anv->pd_link[i] = device_link_add(dev, anv->pd_dev[i],
   1357						  DL_FLAG_STATELESS |
   1358						  DL_FLAG_PM_RUNTIME |
   1359						  DL_FLAG_RPM_ACTIVE);
   1360		if (!anv->pd_link[i]) {
   1361			apple_nvme_detach_genpd(anv);
   1362			return -EINVAL;
   1363		}
   1364	}
   1365
   1366	return 0;
   1367}
   1368
   1369static int apple_nvme_probe(struct platform_device *pdev)
   1370{
   1371	struct device *dev = &pdev->dev;
   1372	struct apple_nvme *anv;
   1373	int ret;
   1374
   1375	anv = devm_kzalloc(dev, sizeof(*anv), GFP_KERNEL);
   1376	if (!anv)
   1377		return -ENOMEM;
   1378
   1379	anv->dev = get_device(dev);
   1380	anv->adminq.is_adminq = true;
   1381	platform_set_drvdata(pdev, anv);
   1382
   1383	ret = apple_nvme_attach_genpd(anv);
   1384	if (ret < 0) {
   1385		dev_err_probe(dev, ret, "Failed to attach power domains");
   1386		goto put_dev;
   1387	}
   1388	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
   1389		ret = -ENXIO;
   1390		goto put_dev;
   1391	}
   1392
   1393	anv->irq = platform_get_irq(pdev, 0);
   1394	if (anv->irq < 0) {
   1395		ret = anv->irq;
   1396		goto put_dev;
   1397	}
   1398	if (!anv->irq) {
   1399		ret = -ENXIO;
   1400		goto put_dev;
   1401	}
   1402
   1403	anv->mmio_coproc = devm_platform_ioremap_resource_byname(pdev, "ans");
   1404	if (IS_ERR(anv->mmio_coproc)) {
   1405		ret = PTR_ERR(anv->mmio_coproc);
   1406		goto put_dev;
   1407	}
   1408	anv->mmio_nvme = devm_platform_ioremap_resource_byname(pdev, "nvme");
   1409	if (IS_ERR(anv->mmio_nvme)) {
   1410		ret = PTR_ERR(anv->mmio_nvme);
   1411		goto put_dev;
   1412	}
   1413
   1414	anv->adminq.sq_db = anv->mmio_nvme + APPLE_ANS_LINEAR_ASQ_DB;
   1415	anv->adminq.cq_db = anv->mmio_nvme + APPLE_ANS_ACQ_DB;
   1416	anv->ioq.sq_db = anv->mmio_nvme + APPLE_ANS_LINEAR_IOSQ_DB;
   1417	anv->ioq.cq_db = anv->mmio_nvme + APPLE_ANS_IOCQ_DB;
   1418
   1419	anv->sart = devm_apple_sart_get(dev);
   1420	if (IS_ERR(anv->sart)) {
   1421		ret = dev_err_probe(dev, PTR_ERR(anv->sart),
   1422				    "Failed to initialize SART");
   1423		goto put_dev;
   1424	}
   1425
   1426	anv->reset = devm_reset_control_array_get_exclusive(anv->dev);
   1427	if (IS_ERR(anv->reset)) {
   1428		ret = dev_err_probe(dev, PTR_ERR(anv->reset),
   1429				    "Failed to get reset control");
   1430		goto put_dev;
   1431	}
   1432
   1433	INIT_WORK(&anv->ctrl.reset_work, apple_nvme_reset_work);
   1434	INIT_WORK(&anv->remove_work, apple_nvme_remove_dead_ctrl_work);
   1435	spin_lock_init(&anv->lock);
   1436
   1437	ret = apple_nvme_queue_alloc(anv, &anv->adminq);
   1438	if (ret)
   1439		goto put_dev;
   1440	ret = apple_nvme_queue_alloc(anv, &anv->ioq);
   1441	if (ret)
   1442		goto put_dev;
   1443
   1444	anv->prp_page_pool = dmam_pool_create("prp list page", anv->dev,
   1445					      NVME_CTRL_PAGE_SIZE,
   1446					      NVME_CTRL_PAGE_SIZE, 0);
   1447	if (!anv->prp_page_pool) {
   1448		ret = -ENOMEM;
   1449		goto put_dev;
   1450	}
   1451
   1452	anv->prp_small_pool =
   1453		dmam_pool_create("prp list 256", anv->dev, 256, 256, 0);
   1454	if (!anv->prp_small_pool) {
   1455		ret = -ENOMEM;
   1456		goto put_dev;
   1457	}
   1458
   1459	WARN_ON_ONCE(apple_nvme_iod_alloc_size() > PAGE_SIZE);
   1460	anv->iod_mempool =
   1461		mempool_create_kmalloc_pool(1, apple_nvme_iod_alloc_size());
   1462	if (!anv->iod_mempool) {
   1463		ret = -ENOMEM;
   1464		goto put_dev;
   1465	}
   1466	ret = devm_add_action_or_reset(
   1467		anv->dev, (void (*)(void *))mempool_destroy, anv->iod_mempool);
   1468	if (ret)
   1469		goto put_dev;
   1470
   1471	ret = apple_nvme_alloc_tagsets(anv);
   1472	if (ret)
   1473		goto put_dev;
   1474
   1475	ret = devm_request_irq(anv->dev, anv->irq, apple_nvme_irq, 0,
   1476			       "nvme-apple", anv);
   1477	if (ret) {
   1478		dev_err_probe(dev, ret, "Failed to request IRQ");
   1479		goto put_dev;
   1480	}
   1481
   1482	anv->rtk =
   1483		devm_apple_rtkit_init(dev, anv, NULL, 0, &apple_nvme_rtkit_ops);
   1484	if (IS_ERR(anv->rtk)) {
   1485		ret = dev_err_probe(dev, PTR_ERR(anv->rtk),
   1486				    "Failed to initialize RTKit");
   1487		goto put_dev;
   1488	}
   1489
   1490	ret = nvme_init_ctrl(&anv->ctrl, anv->dev, &nvme_ctrl_ops,
   1491			     NVME_QUIRK_SKIP_CID_GEN);
   1492	if (ret) {
   1493		dev_err_probe(dev, ret, "Failed to initialize nvme_ctrl");
   1494		goto put_dev;
   1495	}
   1496
   1497	anv->ctrl.admin_q = blk_mq_init_queue(&anv->admin_tagset);
   1498	if (IS_ERR(anv->ctrl.admin_q)) {
   1499		ret = -ENOMEM;
   1500		goto put_dev;
   1501	}
   1502
   1503	if (!blk_get_queue(anv->ctrl.admin_q)) {
   1504		nvme_start_admin_queue(&anv->ctrl);
   1505		blk_cleanup_queue(anv->ctrl.admin_q);
   1506		anv->ctrl.admin_q = NULL;
   1507		ret = -ENODEV;
   1508		goto put_dev;
   1509	}
   1510
   1511	nvme_reset_ctrl(&anv->ctrl);
   1512	async_schedule(apple_nvme_async_probe, anv);
   1513
   1514	return 0;
   1515
   1516put_dev:
   1517	put_device(anv->dev);
   1518	return ret;
   1519}
   1520
   1521static int apple_nvme_remove(struct platform_device *pdev)
   1522{
   1523	struct apple_nvme *anv = platform_get_drvdata(pdev);
   1524
   1525	nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_DELETING);
   1526	flush_work(&anv->ctrl.reset_work);
   1527	nvme_stop_ctrl(&anv->ctrl);
   1528	nvme_remove_namespaces(&anv->ctrl);
   1529	apple_nvme_disable(anv, true);
   1530	nvme_uninit_ctrl(&anv->ctrl);
   1531
   1532	if (apple_rtkit_is_running(anv->rtk))
   1533		apple_rtkit_shutdown(anv->rtk);
   1534
   1535	apple_nvme_detach_genpd(anv);
   1536
   1537	return 0;
   1538}
   1539
   1540static void apple_nvme_shutdown(struct platform_device *pdev)
   1541{
   1542	struct apple_nvme *anv = platform_get_drvdata(pdev);
   1543
   1544	apple_nvme_disable(anv, true);
   1545	if (apple_rtkit_is_running(anv->rtk))
   1546		apple_rtkit_shutdown(anv->rtk);
   1547}
   1548
   1549static int apple_nvme_resume(struct device *dev)
   1550{
   1551	struct apple_nvme *anv = dev_get_drvdata(dev);
   1552
   1553	return nvme_reset_ctrl(&anv->ctrl);
   1554}
   1555
   1556static int apple_nvme_suspend(struct device *dev)
   1557{
   1558	struct apple_nvme *anv = dev_get_drvdata(dev);
   1559	int ret = 0;
   1560
   1561	apple_nvme_disable(anv, true);
   1562
   1563	if (apple_rtkit_is_running(anv->rtk))
   1564		ret = apple_rtkit_shutdown(anv->rtk);
   1565
   1566	writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
   1567
   1568	return ret;
   1569}
   1570
   1571static DEFINE_SIMPLE_DEV_PM_OPS(apple_nvme_pm_ops, apple_nvme_suspend,
   1572				apple_nvme_resume);
   1573
   1574static const struct of_device_id apple_nvme_of_match[] = {
   1575	{ .compatible = "apple,nvme-ans2" },
   1576	{},
   1577};
   1578MODULE_DEVICE_TABLE(of, apple_nvme_of_match);
   1579
   1580static struct platform_driver apple_nvme_driver = {
   1581	.driver = {
   1582		.name = "nvme-apple",
   1583		.of_match_table = apple_nvme_of_match,
   1584		.pm = pm_sleep_ptr(&apple_nvme_pm_ops),
   1585	},
   1586	.probe = apple_nvme_probe,
   1587	.remove = apple_nvme_remove,
   1588	.shutdown = apple_nvme_shutdown,
   1589};
   1590module_platform_driver(apple_nvme_driver);
   1591
   1592MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>");
   1593MODULE_LICENSE("GPL");