cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

oradax.c (26812B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
      4 */
      5
      6/*
      7 * Oracle Data Analytics Accelerator (DAX)
      8 *
      9 * DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8
     10 * (DAX2) processor chips, and has direct access to the CPU's L3
     11 * caches as well as physical memory. It can perform several
     12 * operations on data streams with various input and output formats.
     13 * The driver provides a transport mechanism only and has limited
     14 * knowledge of the various opcodes and data formats. A user space
     15 * library provides high level services and translates these into low
     16 * level commands which are then passed into the driver and
     17 * subsequently the hypervisor and the coprocessor.  The library is
     18 * the recommended way for applications to use the coprocessor, and
     19 * the driver interface is not intended for general use.
     20 *
     21 * See Documentation/sparc/oradax/oracle-dax.rst for more details.
     22 */
     23
     24#include <linux/uaccess.h>
     25#include <linux/module.h>
     26#include <linux/delay.h>
     27#include <linux/cdev.h>
     28#include <linux/slab.h>
     29#include <linux/mm.h>
     30
     31#include <asm/hypervisor.h>
     32#include <asm/mdesc.h>
     33#include <asm/oradax.h>
     34
     35MODULE_LICENSE("GPL");
     36MODULE_DESCRIPTION("Driver for Oracle Data Analytics Accelerator");
     37
     38#define	DAX_DBG_FLG_BASIC	0x01
     39#define	DAX_DBG_FLG_STAT	0x02
     40#define	DAX_DBG_FLG_INFO	0x04
     41#define	DAX_DBG_FLG_ALL		0xff
     42
     43#define	dax_err(fmt, ...)      pr_err("%s: " fmt "\n", __func__, ##__VA_ARGS__)
     44#define	dax_info(fmt, ...)     pr_info("%s: " fmt "\n", __func__, ##__VA_ARGS__)
     45
     46#define	dax_dbg(fmt, ...)	do {					\
     47					if (dax_debug & DAX_DBG_FLG_BASIC)\
     48						dax_info(fmt, ##__VA_ARGS__); \
     49				} while (0)
     50#define	dax_stat_dbg(fmt, ...)	do {					\
     51					if (dax_debug & DAX_DBG_FLG_STAT) \
     52						dax_info(fmt, ##__VA_ARGS__); \
     53				} while (0)
     54#define	dax_info_dbg(fmt, ...)	do { \
     55					if (dax_debug & DAX_DBG_FLG_INFO) \
     56						dax_info(fmt, ##__VA_ARGS__); \
     57				} while (0)
     58
     59#define	DAX1_MINOR		1
     60#define	DAX1_MAJOR		1
     61#define	DAX2_MINOR		0
     62#define	DAX2_MAJOR		2
     63
     64#define	DAX1_STR    "ORCL,sun4v-dax"
     65#define	DAX2_STR    "ORCL,sun4v-dax2"
     66
     67#define	DAX_CA_ELEMS		(DAX_MMAP_LEN / sizeof(struct dax_cca))
     68
     69#define	DAX_CCB_USEC		100
     70#define	DAX_CCB_RETRIES		10000
     71
     72/* stream types */
     73enum {
     74	OUT,
     75	PRI,
     76	SEC,
     77	TBL,
     78	NUM_STREAM_TYPES
     79};
     80
     81/* completion status */
     82#define	CCA_STAT_NOT_COMPLETED	0
     83#define	CCA_STAT_COMPLETED	1
     84#define	CCA_STAT_FAILED		2
     85#define	CCA_STAT_KILLED		3
     86#define	CCA_STAT_NOT_RUN	4
     87#define	CCA_STAT_PIPE_OUT	5
     88#define	CCA_STAT_PIPE_SRC	6
     89#define	CCA_STAT_PIPE_DST	7
     90
     91/* completion err */
     92#define	CCA_ERR_SUCCESS		0x0	/* no error */
     93#define	CCA_ERR_OVERFLOW	0x1	/* buffer overflow */
     94#define	CCA_ERR_DECODE		0x2	/* CCB decode error */
     95#define	CCA_ERR_PAGE_OVERFLOW	0x3	/* page overflow */
     96#define	CCA_ERR_KILLED		0x7	/* command was killed */
     97#define	CCA_ERR_TIMEOUT		0x8	/* Timeout */
     98#define	CCA_ERR_ADI		0x9	/* ADI error */
     99#define	CCA_ERR_DATA_FMT	0xA	/* data format error */
    100#define	CCA_ERR_OTHER_NO_RETRY	0xE	/* Other error, do not retry */
    101#define	CCA_ERR_OTHER_RETRY	0xF	/* Other error, retry */
    102#define	CCA_ERR_PARTIAL_SYMBOL	0x80	/* QP partial symbol warning */
    103
    104/* CCB address types */
    105#define	DAX_ADDR_TYPE_NONE	0
    106#define	DAX_ADDR_TYPE_VA_ALT	1	/* secondary context */
    107#define	DAX_ADDR_TYPE_RA	2	/* real address */
    108#define	DAX_ADDR_TYPE_VA	3	/* virtual address */
    109
    110/* dax_header_t opcode */
    111#define	DAX_OP_SYNC_NOP		0x0
    112#define	DAX_OP_EXTRACT		0x1
    113#define	DAX_OP_SCAN_VALUE	0x2
    114#define	DAX_OP_SCAN_RANGE	0x3
    115#define	DAX_OP_TRANSLATE	0x4
    116#define	DAX_OP_SELECT		0x5
    117#define	DAX_OP_INVERT		0x10	/* OR with translate, scan opcodes */
    118
    119struct dax_header {
    120	u32 ccb_version:4;	/* 31:28 CCB Version */
    121				/* 27:24 Sync Flags */
    122	u32 pipe:1;		/* Pipeline */
    123	u32 longccb:1;		/* Longccb. Set for scan with lu2, lu3, lu4. */
    124	u32 cond:1;		/* Conditional */
    125	u32 serial:1;		/* Serial */
    126	u32 opcode:8;		/* 23:16 Opcode */
    127				/* 15:0 Address Type. */
    128	u32 reserved:3;		/* 15:13 reserved */
    129	u32 table_addr_type:2;	/* 12:11 Huffman Table Address Type */
    130	u32 out_addr_type:3;	/* 10:8 Destination Address Type */
    131	u32 sec_addr_type:3;	/* 7:5 Secondary Source Address Type */
    132	u32 pri_addr_type:3;	/* 4:2 Primary Source Address Type */
    133	u32 cca_addr_type:2;	/* 1:0 Completion Address Type */
    134};
    135
    136struct dax_control {
    137	u32 pri_fmt:4;		/* 31:28 Primary Input Format */
    138	u32 pri_elem_size:5;	/* 27:23 Primary Input Element Size(less1) */
    139	u32 pri_offset:3;	/* 22:20 Primary Input Starting Offset */
    140	u32 sec_encoding:1;	/* 19    Secondary Input Encoding */
    141				/*	 (must be 0 for Select) */
    142	u32 sec_offset:3;	/* 18:16 Secondary Input Starting Offset */
    143	u32 sec_elem_size:2;	/* 15:14 Secondary Input Element Size */
    144				/*	 (must be 0 for Select) */
    145	u32 out_fmt:2;		/* 13:12 Output Format */
    146	u32 out_elem_size:2;	/* 11:10 Output Element Size */
    147	u32 misc:10;		/* 9:0 Opcode specific info */
    148};
    149
    150struct dax_data_access {
    151	u64 flow_ctrl:2;	/* 63:62 Flow Control Type */
    152	u64 pipe_target:2;	/* 61:60 Pipeline Target */
    153	u64 out_buf_size:20;	/* 59:40 Output Buffer Size */
    154				/*	 (cachelines less 1) */
    155	u64 unused1:8;		/* 39:32 Reserved, Set to 0 */
    156	u64 out_alloc:5;	/* 31:27 Output Allocation */
    157	u64 unused2:1;		/* 26	 Reserved */
    158	u64 pri_len_fmt:2;	/* 25:24 Input Length Format */
    159	u64 pri_len:24;		/* 23:0  Input Element/Byte/Bit Count */
    160				/*	 (less 1) */
    161};
    162
    163struct dax_ccb {
    164	struct dax_header hdr;	/* CCB Header */
    165	struct dax_control ctrl;/* Control Word */
    166	void *ca;		/* Completion Address */
    167	void *pri;		/* Primary Input Address */
    168	struct dax_data_access dac; /* Data Access Control */
    169	void *sec;		/* Secondary Input Address */
    170	u64 dword5;		/* depends on opcode */
    171	void *out;		/* Output Address */
    172	void *tbl;		/* Table Address or bitmap */
    173};
    174
    175struct dax_cca {
    176	u8	status;		/* user may mwait on this address */
    177	u8	err;		/* user visible error notification */
    178	u8	rsvd[2];	/* reserved */
    179	u32	n_remaining;	/* for QP partial symbol warning */
    180	u32	output_sz;	/* output in bytes */
    181	u32	rsvd2;		/* reserved */
    182	u64	run_cycles;	/* run time in OCND2 cycles */
    183	u64	run_stats;	/* nothing reported in version 1.0 */
    184	u32	n_processed;	/* number input elements */
    185	u32	rsvd3[5];	/* reserved */
    186	u64	retval;		/* command return value */
    187	u64	rsvd4[8];	/* reserved */
    188};
    189
    190/* per thread CCB context */
    191struct dax_ctx {
    192	struct dax_ccb		*ccb_buf;
    193	u64			ccb_buf_ra;	/* cached RA of ccb_buf  */
    194	struct dax_cca		*ca_buf;
    195	u64			ca_buf_ra;	/* cached RA of ca_buf   */
    196	struct page		*pages[DAX_CA_ELEMS][NUM_STREAM_TYPES];
    197						/* array of locked pages */
    198	struct task_struct	*owner;		/* thread that owns ctx  */
    199	struct task_struct	*client;	/* requesting thread     */
    200	union ccb_result	result;
    201	u32			ccb_count;
    202	u32			fail_count;
    203};
    204
    205/* driver public entry points */
    206static int dax_open(struct inode *inode, struct file *file);
    207static ssize_t dax_read(struct file *filp, char __user *buf,
    208			size_t count, loff_t *ppos);
    209static ssize_t dax_write(struct file *filp, const char __user *buf,
    210			 size_t count, loff_t *ppos);
    211static int dax_devmap(struct file *f, struct vm_area_struct *vma);
    212static int dax_close(struct inode *i, struct file *f);
    213
    214static const struct file_operations dax_fops = {
    215	.owner	=	THIS_MODULE,
    216	.open	=	dax_open,
    217	.read	=	dax_read,
    218	.write	=	dax_write,
    219	.mmap	=	dax_devmap,
    220	.release =	dax_close,
    221};
    222
    223static int dax_ccb_exec(struct dax_ctx *ctx, const char __user *buf,
    224			size_t count, loff_t *ppos);
    225static int dax_ccb_info(u64 ca, struct ccb_info_result *info);
    226static int dax_ccb_kill(u64 ca, u16 *kill_res);
    227
    228static struct cdev c_dev;
    229static struct class *cl;
    230static dev_t first;
    231
    232static int max_ccb_version;
    233static int dax_debug;
    234module_param(dax_debug, int, 0644);
    235MODULE_PARM_DESC(dax_debug, "Debug flags");
    236
    237static int __init dax_attach(void)
    238{
    239	unsigned long dummy, hv_rv, major, minor, minor_requested, max_ccbs;
    240	struct mdesc_handle *hp = mdesc_grab();
    241	char *prop, *dax_name;
    242	bool found = false;
    243	int len, ret = 0;
    244	u64 pn;
    245
    246	if (hp == NULL) {
    247		dax_err("Unable to grab mdesc");
    248		return -ENODEV;
    249	}
    250
    251	mdesc_for_each_node_by_name(hp, pn, "virtual-device") {
    252		prop = (char *)mdesc_get_property(hp, pn, "name", &len);
    253		if (prop == NULL)
    254			continue;
    255		if (strncmp(prop, "dax", strlen("dax")))
    256			continue;
    257		dax_dbg("Found node 0x%llx = %s", pn, prop);
    258
    259		prop = (char *)mdesc_get_property(hp, pn, "compatible", &len);
    260		if (prop == NULL)
    261			continue;
    262		dax_dbg("Found node 0x%llx = %s", pn, prop);
    263		found = true;
    264		break;
    265	}
    266
    267	if (!found) {
    268		dax_err("No DAX device found");
    269		ret = -ENODEV;
    270		goto done;
    271	}
    272
    273	if (strncmp(prop, DAX2_STR, strlen(DAX2_STR)) == 0) {
    274		dax_name = DAX_NAME "2";
    275		major = DAX2_MAJOR;
    276		minor_requested = DAX2_MINOR;
    277		max_ccb_version = 1;
    278		dax_dbg("MD indicates DAX2 coprocessor");
    279	} else if (strncmp(prop, DAX1_STR, strlen(DAX1_STR)) == 0) {
    280		dax_name = DAX_NAME "1";
    281		major = DAX1_MAJOR;
    282		minor_requested = DAX1_MINOR;
    283		max_ccb_version = 0;
    284		dax_dbg("MD indicates DAX1 coprocessor");
    285	} else {
    286		dax_err("Unknown dax type: %s", prop);
    287		ret = -ENODEV;
    288		goto done;
    289	}
    290
    291	minor = minor_requested;
    292	dax_dbg("Registering DAX HV api with major %ld minor %ld", major,
    293		minor);
    294	if (sun4v_hvapi_register(HV_GRP_DAX, major, &minor)) {
    295		dax_err("hvapi_register failed");
    296		ret = -ENODEV;
    297		goto done;
    298	} else {
    299		dax_dbg("Max minor supported by HV = %ld (major %ld)", minor,
    300			major);
    301		minor = min(minor, minor_requested);
    302		dax_dbg("registered DAX major %ld minor %ld", major, minor);
    303	}
    304
    305	/* submit a zero length ccb array to query coprocessor queue size */
    306	hv_rv = sun4v_ccb_submit(0, 0, HV_CCB_QUERY_CMD, 0, &max_ccbs, &dummy);
    307	if (hv_rv != 0) {
    308		dax_err("get_hwqueue_size failed with status=%ld and max_ccbs=%ld",
    309			hv_rv, max_ccbs);
    310		ret = -ENODEV;
    311		goto done;
    312	}
    313
    314	if (max_ccbs != DAX_MAX_CCBS) {
    315		dax_err("HV reports unsupported max_ccbs=%ld", max_ccbs);
    316		ret = -ENODEV;
    317		goto done;
    318	}
    319
    320	if (alloc_chrdev_region(&first, 0, 1, DAX_NAME) < 0) {
    321		dax_err("alloc_chrdev_region failed");
    322		ret = -ENXIO;
    323		goto done;
    324	}
    325
    326	cl = class_create(THIS_MODULE, DAX_NAME);
    327	if (IS_ERR(cl)) {
    328		dax_err("class_create failed");
    329		ret = PTR_ERR(cl);
    330		goto class_error;
    331	}
    332
    333	if (device_create(cl, NULL, first, NULL, dax_name) == NULL) {
    334		dax_err("device_create failed");
    335		ret = -ENXIO;
    336		goto device_error;
    337	}
    338
    339	cdev_init(&c_dev, &dax_fops);
    340	if (cdev_add(&c_dev, first, 1) == -1) {
    341		dax_err("cdev_add failed");
    342		ret = -ENXIO;
    343		goto cdev_error;
    344	}
    345
    346	pr_info("Attached DAX module\n");
    347	goto done;
    348
    349cdev_error:
    350	device_destroy(cl, first);
    351device_error:
    352	class_destroy(cl);
    353class_error:
    354	unregister_chrdev_region(first, 1);
    355done:
    356	mdesc_release(hp);
    357	return ret;
    358}
    359module_init(dax_attach);
    360
    361static void __exit dax_detach(void)
    362{
    363	pr_info("Cleaning up DAX module\n");
    364	cdev_del(&c_dev);
    365	device_destroy(cl, first);
    366	class_destroy(cl);
    367	unregister_chrdev_region(first, 1);
    368}
    369module_exit(dax_detach);
    370
    371/* map completion area */
    372static int dax_devmap(struct file *f, struct vm_area_struct *vma)
    373{
    374	struct dax_ctx *ctx = (struct dax_ctx *)f->private_data;
    375	size_t len = vma->vm_end - vma->vm_start;
    376
    377	dax_dbg("len=0x%lx, flags=0x%lx", len, vma->vm_flags);
    378
    379	if (ctx->owner != current) {
    380		dax_dbg("devmap called from wrong thread");
    381		return -EINVAL;
    382	}
    383
    384	if (len != DAX_MMAP_LEN) {
    385		dax_dbg("len(%lu) != DAX_MMAP_LEN(%d)", len, DAX_MMAP_LEN);
    386		return -EINVAL;
    387	}
    388
    389	/* completion area is mapped read-only for user */
    390	if (vma->vm_flags & VM_WRITE)
    391		return -EPERM;
    392	vma->vm_flags &= ~VM_MAYWRITE;
    393
    394	if (remap_pfn_range(vma, vma->vm_start, ctx->ca_buf_ra >> PAGE_SHIFT,
    395			    len, vma->vm_page_prot))
    396		return -EAGAIN;
    397
    398	dax_dbg("mmapped completion area at uva 0x%lx", vma->vm_start);
    399	return 0;
    400}
    401
    402/* Unlock user pages. Called during dequeue or device close */
    403static void dax_unlock_pages(struct dax_ctx *ctx, int ccb_index, int nelem)
    404{
    405	int i, j;
    406
    407	for (i = ccb_index; i < ccb_index + nelem; i++) {
    408		for (j = 0; j < NUM_STREAM_TYPES; j++) {
    409			struct page *p = ctx->pages[i][j];
    410
    411			if (p) {
    412				dax_dbg("freeing page %p", p);
    413				unpin_user_pages_dirty_lock(&p, 1, j == OUT);
    414				ctx->pages[i][j] = NULL;
    415			}
    416		}
    417	}
    418}
    419
    420static int dax_lock_page(void *va, struct page **p)
    421{
    422	int ret;
    423
    424	dax_dbg("uva %p", va);
    425
    426	ret = pin_user_pages_fast((unsigned long)va, 1, FOLL_WRITE, p);
    427	if (ret == 1) {
    428		dax_dbg("locked page %p, for VA %p", *p, va);
    429		return 0;
    430	}
    431
    432	dax_dbg("pin_user_pages failed, va=%p, ret=%d", va, ret);
    433	return -1;
    434}
    435
    436static int dax_lock_pages(struct dax_ctx *ctx, int idx,
    437			  int nelem, u64 *err_va)
    438{
    439	int i;
    440
    441	for (i = 0; i < nelem; i++) {
    442		struct dax_ccb *ccbp = &ctx->ccb_buf[i];
    443
    444		/*
    445		 * For each address in the CCB whose type is virtual,
    446		 * lock the page and change the type to virtual alternate
    447		 * context. On error, return the offending address in
    448		 * err_va.
    449		 */
    450		if (ccbp->hdr.out_addr_type == DAX_ADDR_TYPE_VA) {
    451			dax_dbg("output");
    452			if (dax_lock_page(ccbp->out,
    453					  &ctx->pages[i + idx][OUT]) != 0) {
    454				*err_va = (u64)ccbp->out;
    455				goto error;
    456			}
    457			ccbp->hdr.out_addr_type = DAX_ADDR_TYPE_VA_ALT;
    458		}
    459
    460		if (ccbp->hdr.pri_addr_type == DAX_ADDR_TYPE_VA) {
    461			dax_dbg("input");
    462			if (dax_lock_page(ccbp->pri,
    463					  &ctx->pages[i + idx][PRI]) != 0) {
    464				*err_va = (u64)ccbp->pri;
    465				goto error;
    466			}
    467			ccbp->hdr.pri_addr_type = DAX_ADDR_TYPE_VA_ALT;
    468		}
    469
    470		if (ccbp->hdr.sec_addr_type == DAX_ADDR_TYPE_VA) {
    471			dax_dbg("sec input");
    472			if (dax_lock_page(ccbp->sec,
    473					  &ctx->pages[i + idx][SEC]) != 0) {
    474				*err_va = (u64)ccbp->sec;
    475				goto error;
    476			}
    477			ccbp->hdr.sec_addr_type = DAX_ADDR_TYPE_VA_ALT;
    478		}
    479
    480		if (ccbp->hdr.table_addr_type == DAX_ADDR_TYPE_VA) {
    481			dax_dbg("tbl");
    482			if (dax_lock_page(ccbp->tbl,
    483					  &ctx->pages[i + idx][TBL]) != 0) {
    484				*err_va = (u64)ccbp->tbl;
    485				goto error;
    486			}
    487			ccbp->hdr.table_addr_type = DAX_ADDR_TYPE_VA_ALT;
    488		}
    489
    490		/* skip over 2nd 64 bytes of long CCB */
    491		if (ccbp->hdr.longccb)
    492			i++;
    493	}
    494	return DAX_SUBMIT_OK;
    495
    496error:
    497	dax_unlock_pages(ctx, idx, nelem);
    498	return DAX_SUBMIT_ERR_NOACCESS;
    499}
    500
    501static void dax_ccb_wait(struct dax_ctx *ctx, int idx)
    502{
    503	int ret, nretries;
    504	u16 kill_res;
    505
    506	dax_dbg("idx=%d", idx);
    507
    508	for (nretries = 0; nretries < DAX_CCB_RETRIES; nretries++) {
    509		if (ctx->ca_buf[idx].status == CCA_STAT_NOT_COMPLETED)
    510			udelay(DAX_CCB_USEC);
    511		else
    512			return;
    513	}
    514	dax_dbg("ctx (%p): CCB[%d] timed out, wait usec=%d, retries=%d. Killing ccb",
    515		(void *)ctx, idx, DAX_CCB_USEC, DAX_CCB_RETRIES);
    516
    517	ret = dax_ccb_kill(ctx->ca_buf_ra + idx * sizeof(struct dax_cca),
    518			   &kill_res);
    519	dax_dbg("Kill CCB[%d] %s", idx, ret ? "failed" : "succeeded");
    520}
    521
    522static int dax_close(struct inode *ino, struct file *f)
    523{
    524	struct dax_ctx *ctx = (struct dax_ctx *)f->private_data;
    525	int i;
    526
    527	f->private_data = NULL;
    528
    529	for (i = 0; i < DAX_CA_ELEMS; i++) {
    530		if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) {
    531			dax_dbg("CCB[%d] not completed", i);
    532			dax_ccb_wait(ctx, i);
    533		}
    534		dax_unlock_pages(ctx, i, 1);
    535	}
    536
    537	kfree(ctx->ccb_buf);
    538	kfree(ctx->ca_buf);
    539	dax_stat_dbg("CCBs: %d good, %d bad", ctx->ccb_count, ctx->fail_count);
    540	kfree(ctx);
    541
    542	return 0;
    543}
    544
    545static ssize_t dax_read(struct file *f, char __user *buf,
    546			size_t count, loff_t *ppos)
    547{
    548	struct dax_ctx *ctx = f->private_data;
    549
    550	if (ctx->client != current)
    551		return -EUSERS;
    552
    553	ctx->client = NULL;
    554
    555	if (count != sizeof(union ccb_result))
    556		return -EINVAL;
    557	if (copy_to_user(buf, &ctx->result, sizeof(union ccb_result)))
    558		return -EFAULT;
    559	return count;
    560}
    561
    562static ssize_t dax_write(struct file *f, const char __user *buf,
    563			 size_t count, loff_t *ppos)
    564{
    565	struct dax_ctx *ctx = f->private_data;
    566	struct dax_command hdr;
    567	unsigned long ca;
    568	int i, idx, ret;
    569
    570	if (ctx->client != NULL)
    571		return -EINVAL;
    572
    573	if (count == 0 || count > DAX_MAX_CCBS * sizeof(struct dax_ccb))
    574		return -EINVAL;
    575
    576	if (count % sizeof(struct dax_ccb) == 0)
    577		return dax_ccb_exec(ctx, buf, count, ppos); /* CCB EXEC */
    578
    579	if (count != sizeof(struct dax_command))
    580		return -EINVAL;
    581
    582	/* immediate command */
    583	if (ctx->owner != current)
    584		return -EUSERS;
    585
    586	if (copy_from_user(&hdr, buf, sizeof(hdr)))
    587		return -EFAULT;
    588
    589	ca = ctx->ca_buf_ra + hdr.ca_offset;
    590
    591	switch (hdr.command) {
    592	case CCB_KILL:
    593		if (hdr.ca_offset >= DAX_MMAP_LEN) {
    594			dax_dbg("invalid ca_offset (%d) >= ca_buflen (%d)",
    595				hdr.ca_offset, DAX_MMAP_LEN);
    596			return -EINVAL;
    597		}
    598
    599		ret = dax_ccb_kill(ca, &ctx->result.kill.action);
    600		if (ret != 0) {
    601			dax_dbg("dax_ccb_kill failed (ret=%d)", ret);
    602			return ret;
    603		}
    604
    605		dax_info_dbg("killed (ca_offset %d)", hdr.ca_offset);
    606		idx = hdr.ca_offset / sizeof(struct dax_cca);
    607		ctx->ca_buf[idx].status = CCA_STAT_KILLED;
    608		ctx->ca_buf[idx].err = CCA_ERR_KILLED;
    609		ctx->client = current;
    610		return count;
    611
    612	case CCB_INFO:
    613		if (hdr.ca_offset >= DAX_MMAP_LEN) {
    614			dax_dbg("invalid ca_offset (%d) >= ca_buflen (%d)",
    615				hdr.ca_offset, DAX_MMAP_LEN);
    616			return -EINVAL;
    617		}
    618
    619		ret = dax_ccb_info(ca, &ctx->result.info);
    620		if (ret != 0) {
    621			dax_dbg("dax_ccb_info failed (ret=%d)", ret);
    622			return ret;
    623		}
    624
    625		dax_info_dbg("info succeeded on ca_offset %d", hdr.ca_offset);
    626		ctx->client = current;
    627		return count;
    628
    629	case CCB_DEQUEUE:
    630		for (i = 0; i < DAX_CA_ELEMS; i++) {
    631			if (ctx->ca_buf[i].status !=
    632			    CCA_STAT_NOT_COMPLETED)
    633				dax_unlock_pages(ctx, i, 1);
    634		}
    635		return count;
    636
    637	default:
    638		return -EINVAL;
    639	}
    640}
    641
    642static int dax_open(struct inode *inode, struct file *f)
    643{
    644	struct dax_ctx *ctx = NULL;
    645	int i;
    646
    647	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
    648	if (ctx == NULL)
    649		goto done;
    650
    651	ctx->ccb_buf = kcalloc(DAX_MAX_CCBS, sizeof(struct dax_ccb),
    652			       GFP_KERNEL);
    653	if (ctx->ccb_buf == NULL)
    654		goto done;
    655
    656	ctx->ccb_buf_ra = virt_to_phys(ctx->ccb_buf);
    657	dax_dbg("ctx->ccb_buf=0x%p, ccb_buf_ra=0x%llx",
    658		(void *)ctx->ccb_buf, ctx->ccb_buf_ra);
    659
    660	/* allocate CCB completion area buffer */
    661	ctx->ca_buf = kzalloc(DAX_MMAP_LEN, GFP_KERNEL);
    662	if (ctx->ca_buf == NULL)
    663		goto alloc_error;
    664	for (i = 0; i < DAX_CA_ELEMS; i++)
    665		ctx->ca_buf[i].status = CCA_STAT_COMPLETED;
    666
    667	ctx->ca_buf_ra = virt_to_phys(ctx->ca_buf);
    668	dax_dbg("ctx=0x%p, ctx->ca_buf=0x%p, ca_buf_ra=0x%llx",
    669		(void *)ctx, (void *)ctx->ca_buf, ctx->ca_buf_ra);
    670
    671	ctx->owner = current;
    672	f->private_data = ctx;
    673	return 0;
    674
    675alloc_error:
    676	kfree(ctx->ccb_buf);
    677done:
    678	kfree(ctx);
    679	return -ENOMEM;
    680}
    681
    682static char *dax_hv_errno(unsigned long hv_ret, int *ret)
    683{
    684	switch (hv_ret) {
    685	case HV_EBADALIGN:
    686		*ret = -EFAULT;
    687		return "HV_EBADALIGN";
    688	case HV_ENORADDR:
    689		*ret = -EFAULT;
    690		return "HV_ENORADDR";
    691	case HV_EINVAL:
    692		*ret = -EINVAL;
    693		return "HV_EINVAL";
    694	case HV_EWOULDBLOCK:
    695		*ret = -EAGAIN;
    696		return "HV_EWOULDBLOCK";
    697	case HV_ENOACCESS:
    698		*ret = -EPERM;
    699		return "HV_ENOACCESS";
    700	default:
    701		break;
    702	}
    703
    704	*ret = -EIO;
    705	return "UNKNOWN";
    706}
    707
    708static int dax_ccb_kill(u64 ca, u16 *kill_res)
    709{
    710	unsigned long hv_ret;
    711	int count, ret = 0;
    712	char *err_str;
    713
    714	for (count = 0; count < DAX_CCB_RETRIES; count++) {
    715		dax_dbg("attempting kill on ca_ra 0x%llx", ca);
    716		hv_ret = sun4v_ccb_kill(ca, kill_res);
    717
    718		if (hv_ret == HV_EOK) {
    719			dax_info_dbg("HV_EOK (ca_ra 0x%llx): %d", ca,
    720				     *kill_res);
    721		} else {
    722			err_str = dax_hv_errno(hv_ret, &ret);
    723			dax_dbg("%s (ca_ra 0x%llx)", err_str, ca);
    724		}
    725
    726		if (ret != -EAGAIN)
    727			return ret;
    728		dax_info_dbg("ccb_kill count = %d", count);
    729		udelay(DAX_CCB_USEC);
    730	}
    731
    732	return -EAGAIN;
    733}
    734
    735static int dax_ccb_info(u64 ca, struct ccb_info_result *info)
    736{
    737	unsigned long hv_ret;
    738	char *err_str;
    739	int ret = 0;
    740
    741	dax_dbg("attempting info on ca_ra 0x%llx", ca);
    742	hv_ret = sun4v_ccb_info(ca, info);
    743
    744	if (hv_ret == HV_EOK) {
    745		dax_info_dbg("HV_EOK (ca_ra 0x%llx): %d", ca, info->state);
    746		if (info->state == DAX_CCB_ENQUEUED) {
    747			dax_info_dbg("dax_unit %d, queue_num %d, queue_pos %d",
    748				     info->inst_num, info->q_num, info->q_pos);
    749		}
    750	} else {
    751		err_str = dax_hv_errno(hv_ret, &ret);
    752		dax_dbg("%s (ca_ra 0x%llx)", err_str, ca);
    753	}
    754
    755	return ret;
    756}
    757
    758static void dax_prt_ccbs(struct dax_ccb *ccb, int nelem)
    759{
    760	int i, j;
    761	u64 *ccbp;
    762
    763	dax_dbg("ccb buffer:");
    764	for (i = 0; i < nelem; i++) {
    765		ccbp = (u64 *)&ccb[i];
    766		dax_dbg(" %sccb[%d]", ccb[i].hdr.longccb ? "long " : "",  i);
    767		for (j = 0; j < 8; j++)
    768			dax_dbg("\tccb[%d].dwords[%d]=0x%llx",
    769				i, j, *(ccbp + j));
    770	}
    771}
    772
    773/*
    774 * Validates user CCB content.  Also sets completion address and address types
    775 * for all addresses contained in CCB.
    776 */
    777static int dax_preprocess_usr_ccbs(struct dax_ctx *ctx, int idx, int nelem)
    778{
    779	int i;
    780
    781	/*
    782	 * The user is not allowed to specify real address types in
    783	 * the CCB header.  This must be enforced by the kernel before
    784	 * submitting the CCBs to HV.  The only allowed values for all
    785	 * address fields are VA or IMM
    786	 */
    787	for (i = 0; i < nelem; i++) {
    788		struct dax_ccb *ccbp = &ctx->ccb_buf[i];
    789		unsigned long ca_offset;
    790
    791		if (ccbp->hdr.ccb_version > max_ccb_version)
    792			return DAX_SUBMIT_ERR_CCB_INVAL;
    793
    794		switch (ccbp->hdr.opcode) {
    795		case DAX_OP_SYNC_NOP:
    796		case DAX_OP_EXTRACT:
    797		case DAX_OP_SCAN_VALUE:
    798		case DAX_OP_SCAN_RANGE:
    799		case DAX_OP_TRANSLATE:
    800		case DAX_OP_SCAN_VALUE | DAX_OP_INVERT:
    801		case DAX_OP_SCAN_RANGE | DAX_OP_INVERT:
    802		case DAX_OP_TRANSLATE | DAX_OP_INVERT:
    803		case DAX_OP_SELECT:
    804			break;
    805		default:
    806			return DAX_SUBMIT_ERR_CCB_INVAL;
    807		}
    808
    809		if (ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_VA &&
    810		    ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_NONE) {
    811			dax_dbg("invalid out_addr_type in user CCB[%d]", i);
    812			return DAX_SUBMIT_ERR_CCB_INVAL;
    813		}
    814
    815		if (ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_VA &&
    816		    ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_NONE) {
    817			dax_dbg("invalid pri_addr_type in user CCB[%d]", i);
    818			return DAX_SUBMIT_ERR_CCB_INVAL;
    819		}
    820
    821		if (ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_VA &&
    822		    ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_NONE) {
    823			dax_dbg("invalid sec_addr_type in user CCB[%d]", i);
    824			return DAX_SUBMIT_ERR_CCB_INVAL;
    825		}
    826
    827		if (ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_VA &&
    828		    ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_NONE) {
    829			dax_dbg("invalid table_addr_type in user CCB[%d]", i);
    830			return DAX_SUBMIT_ERR_CCB_INVAL;
    831		}
    832
    833		/* set completion (real) address and address type */
    834		ccbp->hdr.cca_addr_type = DAX_ADDR_TYPE_RA;
    835		ca_offset = (idx + i) * sizeof(struct dax_cca);
    836		ccbp->ca = (void *)ctx->ca_buf_ra + ca_offset;
    837		memset(&ctx->ca_buf[idx + i], 0, sizeof(struct dax_cca));
    838
    839		dax_dbg("ccb[%d]=%p, ca_offset=0x%lx, compl RA=0x%llx",
    840			i, ccbp, ca_offset, ctx->ca_buf_ra + ca_offset);
    841
    842		/* skip over 2nd 64 bytes of long CCB */
    843		if (ccbp->hdr.longccb)
    844			i++;
    845	}
    846
    847	return DAX_SUBMIT_OK;
    848}
    849
    850static int dax_ccb_exec(struct dax_ctx *ctx, const char __user *buf,
    851			size_t count, loff_t *ppos)
    852{
    853	unsigned long accepted_len, hv_rv;
    854	int i, idx, nccbs, naccepted;
    855
    856	ctx->client = current;
    857	idx = *ppos;
    858	nccbs = count / sizeof(struct dax_ccb);
    859
    860	if (ctx->owner != current) {
    861		dax_dbg("wrong thread");
    862		ctx->result.exec.status = DAX_SUBMIT_ERR_THR_INIT;
    863		return 0;
    864	}
    865	dax_dbg("args: ccb_buf_len=%ld, idx=%d", count, idx);
    866
    867	/* for given index and length, verify ca_buf range exists */
    868	if (idx < 0 || idx > (DAX_CA_ELEMS - nccbs)) {
    869		ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL;
    870		return 0;
    871	}
    872
    873	/*
    874	 * Copy CCBs into kernel buffer to prevent modification by the
    875	 * user in between validation and submission.
    876	 */
    877	if (copy_from_user(ctx->ccb_buf, buf, count)) {
    878		dax_dbg("copyin of user CCB buffer failed");
    879		ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_ARR_MMU_MISS;
    880		return 0;
    881	}
    882
    883	/* check to see if ca_buf[idx] .. ca_buf[idx + nccbs] are available */
    884	for (i = idx; i < idx + nccbs; i++) {
    885		if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) {
    886			dax_dbg("CA range not available, dequeue needed");
    887			ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL;
    888			return 0;
    889		}
    890	}
    891	dax_unlock_pages(ctx, idx, nccbs);
    892
    893	ctx->result.exec.status = dax_preprocess_usr_ccbs(ctx, idx, nccbs);
    894	if (ctx->result.exec.status != DAX_SUBMIT_OK)
    895		return 0;
    896
    897	ctx->result.exec.status = dax_lock_pages(ctx, idx, nccbs,
    898						 &ctx->result.exec.status_data);
    899	if (ctx->result.exec.status != DAX_SUBMIT_OK)
    900		return 0;
    901
    902	if (dax_debug & DAX_DBG_FLG_BASIC)
    903		dax_prt_ccbs(ctx->ccb_buf, nccbs);
    904
    905	hv_rv = sun4v_ccb_submit(ctx->ccb_buf_ra, count,
    906				 HV_CCB_QUERY_CMD | HV_CCB_VA_SECONDARY, 0,
    907				 &accepted_len, &ctx->result.exec.status_data);
    908
    909	switch (hv_rv) {
    910	case HV_EOK:
    911		/*
    912		 * Hcall succeeded with no errors but the accepted
    913		 * length may be less than the requested length.  The
    914		 * only way the driver can resubmit the remainder is
    915		 * to wait for completion of the submitted CCBs since
    916		 * there is no way to guarantee the ordering semantics
    917		 * required by the client applications.  Therefore we
    918		 * let the user library deal with resubmissions.
    919		 */
    920		ctx->result.exec.status = DAX_SUBMIT_OK;
    921		break;
    922	case HV_EWOULDBLOCK:
    923		/*
    924		 * This is a transient HV API error. The user library
    925		 * can retry.
    926		 */
    927		dax_dbg("hcall returned HV_EWOULDBLOCK");
    928		ctx->result.exec.status = DAX_SUBMIT_ERR_WOULDBLOCK;
    929		break;
    930	case HV_ENOMAP:
    931		/*
    932		 * HV was unable to translate a VA. The VA it could
    933		 * not translate is returned in the status_data param.
    934		 */
    935		dax_dbg("hcall returned HV_ENOMAP");
    936		ctx->result.exec.status = DAX_SUBMIT_ERR_NOMAP;
    937		break;
    938	case HV_EINVAL:
    939		/*
    940		 * This is the result of an invalid user CCB as HV is
    941		 * validating some of the user CCB fields.  Pass this
    942		 * error back to the user. There is no supporting info
    943		 * to isolate the invalid field.
    944		 */
    945		dax_dbg("hcall returned HV_EINVAL");
    946		ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_INVAL;
    947		break;
    948	case HV_ENOACCESS:
    949		/*
    950		 * HV found a VA that did not have the appropriate
    951		 * permissions (such as the w bit). The VA in question
    952		 * is returned in status_data param.
    953		 */
    954		dax_dbg("hcall returned HV_ENOACCESS");
    955		ctx->result.exec.status = DAX_SUBMIT_ERR_NOACCESS;
    956		break;
    957	case HV_EUNAVAILABLE:
    958		/*
    959		 * The requested CCB operation could not be performed
    960		 * at this time. Return the specific unavailable code
    961		 * in the status_data field.
    962		 */
    963		dax_dbg("hcall returned HV_EUNAVAILABLE");
    964		ctx->result.exec.status = DAX_SUBMIT_ERR_UNAVAIL;
    965		break;
    966	default:
    967		ctx->result.exec.status = DAX_SUBMIT_ERR_INTERNAL;
    968		dax_dbg("unknown hcall return value (%ld)", hv_rv);
    969		break;
    970	}
    971
    972	/* unlock pages associated with the unaccepted CCBs */
    973	naccepted = accepted_len / sizeof(struct dax_ccb);
    974	dax_unlock_pages(ctx, idx + naccepted, nccbs - naccepted);
    975
    976	/* mark unaccepted CCBs as not completed */
    977	for (i = idx + naccepted; i < idx + nccbs; i++)
    978		ctx->ca_buf[i].status = CCA_STAT_COMPLETED;
    979
    980	ctx->ccb_count += naccepted;
    981	ctx->fail_count += nccbs - naccepted;
    982
    983	dax_dbg("hcall rv=%ld, accepted_len=%ld, status_data=0x%llx, ret status=%d",
    984		hv_rv, accepted_len, ctx->result.exec.status_data,
    985		ctx->result.exec.status);
    986
    987	if (count == accepted_len)
    988		ctx->client = NULL; /* no read needed to complete protocol */
    989	return accepted_len;
    990}