cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ldc.c (51598B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* ldc.c: Logical Domain Channel link-layer protocol driver.
      3 *
      4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
      5 */
      6
      7#include <linux/kernel.h>
      8#include <linux/export.h>
      9#include <linux/slab.h>
     10#include <linux/spinlock.h>
     11#include <linux/delay.h>
     12#include <linux/errno.h>
     13#include <linux/string.h>
     14#include <linux/scatterlist.h>
     15#include <linux/interrupt.h>
     16#include <linux/list.h>
     17#include <linux/init.h>
     18#include <linux/bitmap.h>
     19#include <asm/iommu-common.h>
     20
     21#include <asm/hypervisor.h>
     22#include <asm/iommu.h>
     23#include <asm/page.h>
     24#include <asm/ldc.h>
     25#include <asm/mdesc.h>
     26
     27#define DRV_MODULE_NAME		"ldc"
     28#define PFX DRV_MODULE_NAME	": "
     29#define DRV_MODULE_VERSION	"1.1"
     30#define DRV_MODULE_RELDATE	"July 22, 2008"
     31
     32#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
     33#define COOKIE_PGSZ_CODE_SHIFT	60ULL
     34
     35
     36static char version[] =
     37	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
     38
     39/* Packet header layout for unreliable and reliable mode frames.
     40 * When in RAW mode, packets are simply straight 64-byte payloads
     41 * with no headers.
     42 */
     43struct ldc_packet {
     44	u8			type;
     45#define LDC_CTRL		0x01
     46#define LDC_DATA		0x02
     47#define LDC_ERR			0x10
     48
     49	u8			stype;
     50#define LDC_INFO		0x01
     51#define LDC_ACK			0x02
     52#define LDC_NACK		0x04
     53
     54	u8			ctrl;
     55#define LDC_VERS		0x01 /* Link Version		*/
     56#define LDC_RTS			0x02 /* Request To Send		*/
     57#define LDC_RTR			0x03 /* Ready To Receive	*/
     58#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
     59#define LDC_CTRL_MSK		0x0f
     60
     61	u8			env;
     62#define LDC_LEN			0x3f
     63#define LDC_FRAG_MASK		0xc0
     64#define LDC_START		0x40
     65#define LDC_STOP		0x80
     66
     67	u32			seqid;
     68
     69	union {
     70		u8		u_data[LDC_PACKET_SIZE - 8];
     71		struct {
     72			u32	pad;
     73			u32	ackid;
     74			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
     75		} r;
     76	} u;
     77};
     78
     79struct ldc_version {
     80	u16 major;
     81	u16 minor;
     82};
     83
     84/* Ordered from largest major to lowest.  */
     85static struct ldc_version ver_arr[] = {
     86	{ .major = 1, .minor = 0 },
     87};
     88
     89#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
     90#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
     91
     92struct ldc_channel;
     93
     94struct ldc_mode_ops {
     95	int (*write)(struct ldc_channel *, const void *, unsigned int);
     96	int (*read)(struct ldc_channel *, void *, unsigned int);
     97};
     98
     99static const struct ldc_mode_ops raw_ops;
    100static const struct ldc_mode_ops nonraw_ops;
    101static const struct ldc_mode_ops stream_ops;
    102
    103int ldom_domaining_enabled;
    104
    105struct ldc_iommu {
    106	/* Protects ldc_unmap.  */
    107	spinlock_t			lock;
    108	struct ldc_mtable_entry		*page_table;
    109	struct iommu_map_table		iommu_map_table;
    110};
    111
    112struct ldc_channel {
    113	/* Protects all operations that depend upon channel state.  */
    114	spinlock_t			lock;
    115
    116	unsigned long			id;
    117
    118	u8				*mssbuf;
    119	u32				mssbuf_len;
    120	u32				mssbuf_off;
    121
    122	struct ldc_packet		*tx_base;
    123	unsigned long			tx_head;
    124	unsigned long			tx_tail;
    125	unsigned long			tx_num_entries;
    126	unsigned long			tx_ra;
    127
    128	unsigned long			tx_acked;
    129
    130	struct ldc_packet		*rx_base;
    131	unsigned long			rx_head;
    132	unsigned long			rx_tail;
    133	unsigned long			rx_num_entries;
    134	unsigned long			rx_ra;
    135
    136	u32				rcv_nxt;
    137	u32				snd_nxt;
    138
    139	unsigned long			chan_state;
    140
    141	struct ldc_channel_config	cfg;
    142	void				*event_arg;
    143
    144	const struct ldc_mode_ops	*mops;
    145
    146	struct ldc_iommu		iommu;
    147
    148	struct ldc_version		ver;
    149
    150	u8				hs_state;
    151#define LDC_HS_CLOSED			0x00
    152#define LDC_HS_OPEN			0x01
    153#define LDC_HS_GOTVERS			0x02
    154#define LDC_HS_SENTRTR			0x03
    155#define LDC_HS_GOTRTR			0x04
    156#define LDC_HS_COMPLETE			0x10
    157
    158	u8				flags;
    159#define LDC_FLAG_ALLOCED_QUEUES		0x01
    160#define LDC_FLAG_REGISTERED_QUEUES	0x02
    161#define LDC_FLAG_REGISTERED_IRQS	0x04
    162#define LDC_FLAG_RESET			0x10
    163
    164	u8				mss;
    165	u8				state;
    166
    167#define LDC_IRQ_NAME_MAX		32
    168	char				rx_irq_name[LDC_IRQ_NAME_MAX];
    169	char				tx_irq_name[LDC_IRQ_NAME_MAX];
    170
    171	struct hlist_head		mh_list;
    172
    173	struct hlist_node		list;
    174};
    175
    176#define ldcdbg(TYPE, f, a...) \
    177do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
    178		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
    179} while (0)
    180
    181#define	LDC_ABORT(lp)	ldc_abort((lp), __func__)
    182
    183static const char *state_to_str(u8 state)
    184{
    185	switch (state) {
    186	case LDC_STATE_INVALID:
    187		return "INVALID";
    188	case LDC_STATE_INIT:
    189		return "INIT";
    190	case LDC_STATE_BOUND:
    191		return "BOUND";
    192	case LDC_STATE_READY:
    193		return "READY";
    194	case LDC_STATE_CONNECTED:
    195		return "CONNECTED";
    196	default:
    197		return "<UNKNOWN>";
    198	}
    199}
    200
    201static unsigned long __advance(unsigned long off, unsigned long num_entries)
    202{
    203	off += LDC_PACKET_SIZE;
    204	if (off == (num_entries * LDC_PACKET_SIZE))
    205		off = 0;
    206
    207	return off;
    208}
    209
    210static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
    211{
    212	return __advance(off, lp->rx_num_entries);
    213}
    214
    215static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
    216{
    217	return __advance(off, lp->tx_num_entries);
    218}
    219
    220static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
    221						  unsigned long *new_tail)
    222{
    223	struct ldc_packet *p;
    224	unsigned long t;
    225
    226	t = tx_advance(lp, lp->tx_tail);
    227	if (t == lp->tx_head)
    228		return NULL;
    229
    230	*new_tail = t;
    231
    232	p = lp->tx_base;
    233	return p + (lp->tx_tail / LDC_PACKET_SIZE);
    234}
    235
    236/* When we are in reliable or stream mode, have to track the next packet
    237 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
    238 * to be careful not to stomp over the queue past that point.  During
    239 * the handshake, we don't have TX data packets pending in the queue
    240 * and that's why handshake_get_tx_packet() need not be mindful of
    241 * lp->tx_acked.
    242 */
    243static unsigned long head_for_data(struct ldc_channel *lp)
    244{
    245	if (lp->cfg.mode == LDC_MODE_STREAM)
    246		return lp->tx_acked;
    247	return lp->tx_head;
    248}
    249
    250static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
    251{
    252	unsigned long limit, tail, new_tail, diff;
    253	unsigned int mss;
    254
    255	limit = head_for_data(lp);
    256	tail = lp->tx_tail;
    257	new_tail = tx_advance(lp, tail);
    258	if (new_tail == limit)
    259		return 0;
    260
    261	if (limit > new_tail)
    262		diff = limit - new_tail;
    263	else
    264		diff = (limit +
    265			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
    266	diff /= LDC_PACKET_SIZE;
    267	mss = lp->mss;
    268
    269	if (diff * mss < size)
    270		return 0;
    271
    272	return 1;
    273}
    274
    275static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
    276					     unsigned long *new_tail)
    277{
    278	struct ldc_packet *p;
    279	unsigned long h, t;
    280
    281	h = head_for_data(lp);
    282	t = tx_advance(lp, lp->tx_tail);
    283	if (t == h)
    284		return NULL;
    285
    286	*new_tail = t;
    287
    288	p = lp->tx_base;
    289	return p + (lp->tx_tail / LDC_PACKET_SIZE);
    290}
    291
    292static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
    293{
    294	unsigned long orig_tail = lp->tx_tail;
    295	int limit = 1000;
    296
    297	lp->tx_tail = tail;
    298	while (limit-- > 0) {
    299		unsigned long err;
    300
    301		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
    302		if (!err)
    303			return 0;
    304
    305		if (err != HV_EWOULDBLOCK) {
    306			lp->tx_tail = orig_tail;
    307			return -EINVAL;
    308		}
    309		udelay(1);
    310	}
    311
    312	lp->tx_tail = orig_tail;
    313	return -EBUSY;
    314}
    315
    316/* This just updates the head value in the hypervisor using
    317 * a polling loop with a timeout.  The caller takes care of
    318 * upating software state representing the head change, if any.
    319 */
    320static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
    321{
    322	int limit = 1000;
    323
    324	while (limit-- > 0) {
    325		unsigned long err;
    326
    327		err = sun4v_ldc_rx_set_qhead(lp->id, head);
    328		if (!err)
    329			return 0;
    330
    331		if (err != HV_EWOULDBLOCK)
    332			return -EINVAL;
    333
    334		udelay(1);
    335	}
    336
    337	return -EBUSY;
    338}
    339
    340static int send_tx_packet(struct ldc_channel *lp,
    341			  struct ldc_packet *p,
    342			  unsigned long new_tail)
    343{
    344	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
    345
    346	return set_tx_tail(lp, new_tail);
    347}
    348
    349static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
    350						 u8 stype, u8 ctrl,
    351						 void *data, int dlen,
    352						 unsigned long *new_tail)
    353{
    354	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
    355
    356	if (p) {
    357		memset(p, 0, sizeof(*p));
    358		p->type = LDC_CTRL;
    359		p->stype = stype;
    360		p->ctrl = ctrl;
    361		if (data)
    362			memcpy(p->u.u_data, data, dlen);
    363	}
    364	return p;
    365}
    366
    367static int start_handshake(struct ldc_channel *lp)
    368{
    369	struct ldc_packet *p;
    370	struct ldc_version *ver;
    371	unsigned long new_tail;
    372
    373	ver = &ver_arr[0];
    374
    375	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
    376	       ver->major, ver->minor);
    377
    378	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
    379				   ver, sizeof(*ver), &new_tail);
    380	if (p) {
    381		int err = send_tx_packet(lp, p, new_tail);
    382		if (!err)
    383			lp->flags &= ~LDC_FLAG_RESET;
    384		return err;
    385	}
    386	return -EBUSY;
    387}
    388
    389static int send_version_nack(struct ldc_channel *lp,
    390			     u16 major, u16 minor)
    391{
    392	struct ldc_packet *p;
    393	struct ldc_version ver;
    394	unsigned long new_tail;
    395
    396	ver.major = major;
    397	ver.minor = minor;
    398
    399	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
    400				   &ver, sizeof(ver), &new_tail);
    401	if (p) {
    402		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
    403		       ver.major, ver.minor);
    404
    405		return send_tx_packet(lp, p, new_tail);
    406	}
    407	return -EBUSY;
    408}
    409
    410static int send_version_ack(struct ldc_channel *lp,
    411			    struct ldc_version *vp)
    412{
    413	struct ldc_packet *p;
    414	unsigned long new_tail;
    415
    416	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
    417				   vp, sizeof(*vp), &new_tail);
    418	if (p) {
    419		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
    420		       vp->major, vp->minor);
    421
    422		return send_tx_packet(lp, p, new_tail);
    423	}
    424	return -EBUSY;
    425}
    426
    427static int send_rts(struct ldc_channel *lp)
    428{
    429	struct ldc_packet *p;
    430	unsigned long new_tail;
    431
    432	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
    433				   &new_tail);
    434	if (p) {
    435		p->env = lp->cfg.mode;
    436		p->seqid = 0;
    437		lp->rcv_nxt = 0;
    438
    439		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
    440		       p->env, p->seqid);
    441
    442		return send_tx_packet(lp, p, new_tail);
    443	}
    444	return -EBUSY;
    445}
    446
    447static int send_rtr(struct ldc_channel *lp)
    448{
    449	struct ldc_packet *p;
    450	unsigned long new_tail;
    451
    452	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
    453				   &new_tail);
    454	if (p) {
    455		p->env = lp->cfg.mode;
    456		p->seqid = 0;
    457
    458		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
    459		       p->env, p->seqid);
    460
    461		return send_tx_packet(lp, p, new_tail);
    462	}
    463	return -EBUSY;
    464}
    465
    466static int send_rdx(struct ldc_channel *lp)
    467{
    468	struct ldc_packet *p;
    469	unsigned long new_tail;
    470
    471	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
    472				   &new_tail);
    473	if (p) {
    474		p->env = 0;
    475		p->seqid = ++lp->snd_nxt;
    476		p->u.r.ackid = lp->rcv_nxt;
    477
    478		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
    479		       p->env, p->seqid, p->u.r.ackid);
    480
    481		return send_tx_packet(lp, p, new_tail);
    482	}
    483	return -EBUSY;
    484}
    485
    486static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
    487{
    488	struct ldc_packet *p;
    489	unsigned long new_tail;
    490	int err;
    491
    492	p = data_get_tx_packet(lp, &new_tail);
    493	if (!p)
    494		return -EBUSY;
    495	memset(p, 0, sizeof(*p));
    496	p->type = data_pkt->type;
    497	p->stype = LDC_NACK;
    498	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
    499	p->seqid = lp->snd_nxt + 1;
    500	p->u.r.ackid = lp->rcv_nxt;
    501
    502	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
    503	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
    504
    505	err = send_tx_packet(lp, p, new_tail);
    506	if (!err)
    507		lp->snd_nxt++;
    508
    509	return err;
    510}
    511
    512static int ldc_abort(struct ldc_channel *lp, const char *msg)
    513{
    514	unsigned long hv_err;
    515
    516	ldcdbg(STATE, "ABORT[%s]\n", msg);
    517	ldc_print(lp);
    518
    519	/* We report but do not act upon the hypervisor errors because
    520	 * there really isn't much we can do if they fail at this point.
    521	 */
    522	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
    523	if (hv_err)
    524		printk(KERN_ERR PFX "ldc_abort: "
    525		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
    526		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
    527
    528	hv_err = sun4v_ldc_tx_get_state(lp->id,
    529					&lp->tx_head,
    530					&lp->tx_tail,
    531					&lp->chan_state);
    532	if (hv_err)
    533		printk(KERN_ERR PFX "ldc_abort: "
    534		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
    535		       lp->id, hv_err);
    536
    537	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
    538	if (hv_err)
    539		printk(KERN_ERR PFX "ldc_abort: "
    540		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
    541		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
    542
    543	/* Refetch the RX queue state as well, because we could be invoked
    544	 * here in the queue processing context.
    545	 */
    546	hv_err = sun4v_ldc_rx_get_state(lp->id,
    547					&lp->rx_head,
    548					&lp->rx_tail,
    549					&lp->chan_state);
    550	if (hv_err)
    551		printk(KERN_ERR PFX "ldc_abort: "
    552		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
    553		       lp->id, hv_err);
    554
    555	return -ECONNRESET;
    556}
    557
    558static struct ldc_version *find_by_major(u16 major)
    559{
    560	struct ldc_version *ret = NULL;
    561	int i;
    562
    563	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
    564		struct ldc_version *v = &ver_arr[i];
    565		if (v->major <= major) {
    566			ret = v;
    567			break;
    568		}
    569	}
    570	return ret;
    571}
    572
    573static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
    574{
    575	struct ldc_version *vap;
    576	int err;
    577
    578	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
    579	       vp->major, vp->minor);
    580
    581	if (lp->hs_state == LDC_HS_GOTVERS) {
    582		lp->hs_state = LDC_HS_OPEN;
    583		memset(&lp->ver, 0, sizeof(lp->ver));
    584	}
    585
    586	vap = find_by_major(vp->major);
    587	if (!vap) {
    588		err = send_version_nack(lp, 0, 0);
    589	} else if (vap->major != vp->major) {
    590		err = send_version_nack(lp, vap->major, vap->minor);
    591	} else {
    592		struct ldc_version ver = *vp;
    593		if (ver.minor > vap->minor)
    594			ver.minor = vap->minor;
    595		err = send_version_ack(lp, &ver);
    596		if (!err) {
    597			lp->ver = ver;
    598			lp->hs_state = LDC_HS_GOTVERS;
    599		}
    600	}
    601	if (err)
    602		return LDC_ABORT(lp);
    603
    604	return 0;
    605}
    606
    607static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
    608{
    609	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
    610	       vp->major, vp->minor);
    611
    612	if (lp->hs_state == LDC_HS_GOTVERS) {
    613		if (lp->ver.major != vp->major ||
    614		    lp->ver.minor != vp->minor)
    615			return LDC_ABORT(lp);
    616	} else {
    617		lp->ver = *vp;
    618		lp->hs_state = LDC_HS_GOTVERS;
    619	}
    620	if (send_rts(lp))
    621		return LDC_ABORT(lp);
    622	return 0;
    623}
    624
    625static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
    626{
    627	struct ldc_version *vap;
    628	struct ldc_packet *p;
    629	unsigned long new_tail;
    630
    631	if (vp->major == 0 && vp->minor == 0)
    632		return LDC_ABORT(lp);
    633
    634	vap = find_by_major(vp->major);
    635	if (!vap)
    636		return LDC_ABORT(lp);
    637
    638	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
    639					   vap, sizeof(*vap),
    640					   &new_tail);
    641	if (!p)
    642		return LDC_ABORT(lp);
    643
    644	return send_tx_packet(lp, p, new_tail);
    645}
    646
    647static int process_version(struct ldc_channel *lp,
    648			   struct ldc_packet *p)
    649{
    650	struct ldc_version *vp;
    651
    652	vp = (struct ldc_version *) p->u.u_data;
    653
    654	switch (p->stype) {
    655	case LDC_INFO:
    656		return process_ver_info(lp, vp);
    657
    658	case LDC_ACK:
    659		return process_ver_ack(lp, vp);
    660
    661	case LDC_NACK:
    662		return process_ver_nack(lp, vp);
    663
    664	default:
    665		return LDC_ABORT(lp);
    666	}
    667}
    668
    669static int process_rts(struct ldc_channel *lp,
    670		       struct ldc_packet *p)
    671{
    672	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
    673	       p->stype, p->seqid, p->env);
    674
    675	if (p->stype     != LDC_INFO	   ||
    676	    lp->hs_state != LDC_HS_GOTVERS ||
    677	    p->env       != lp->cfg.mode)
    678		return LDC_ABORT(lp);
    679
    680	lp->snd_nxt = p->seqid;
    681	lp->rcv_nxt = p->seqid;
    682	lp->hs_state = LDC_HS_SENTRTR;
    683	if (send_rtr(lp))
    684		return LDC_ABORT(lp);
    685
    686	return 0;
    687}
    688
    689static int process_rtr(struct ldc_channel *lp,
    690		       struct ldc_packet *p)
    691{
    692	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
    693	       p->stype, p->seqid, p->env);
    694
    695	if (p->stype     != LDC_INFO ||
    696	    p->env       != lp->cfg.mode)
    697		return LDC_ABORT(lp);
    698
    699	lp->snd_nxt = p->seqid;
    700	lp->hs_state = LDC_HS_COMPLETE;
    701	ldc_set_state(lp, LDC_STATE_CONNECTED);
    702	send_rdx(lp);
    703
    704	return LDC_EVENT_UP;
    705}
    706
    707static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
    708{
    709	return lp->rcv_nxt + 1 == seqid;
    710}
    711
    712static int process_rdx(struct ldc_channel *lp,
    713		       struct ldc_packet *p)
    714{
    715	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
    716	       p->stype, p->seqid, p->env, p->u.r.ackid);
    717
    718	if (p->stype != LDC_INFO ||
    719	    !(rx_seq_ok(lp, p->seqid)))
    720		return LDC_ABORT(lp);
    721
    722	lp->rcv_nxt = p->seqid;
    723
    724	lp->hs_state = LDC_HS_COMPLETE;
    725	ldc_set_state(lp, LDC_STATE_CONNECTED);
    726
    727	return LDC_EVENT_UP;
    728}
    729
    730static int process_control_frame(struct ldc_channel *lp,
    731				 struct ldc_packet *p)
    732{
    733	switch (p->ctrl) {
    734	case LDC_VERS:
    735		return process_version(lp, p);
    736
    737	case LDC_RTS:
    738		return process_rts(lp, p);
    739
    740	case LDC_RTR:
    741		return process_rtr(lp, p);
    742
    743	case LDC_RDX:
    744		return process_rdx(lp, p);
    745
    746	default:
    747		return LDC_ABORT(lp);
    748	}
    749}
    750
    751static int process_error_frame(struct ldc_channel *lp,
    752			       struct ldc_packet *p)
    753{
    754	return LDC_ABORT(lp);
    755}
    756
    757static int process_data_ack(struct ldc_channel *lp,
    758			    struct ldc_packet *ack)
    759{
    760	unsigned long head = lp->tx_acked;
    761	u32 ackid = ack->u.r.ackid;
    762
    763	while (1) {
    764		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
    765
    766		head = tx_advance(lp, head);
    767
    768		if (p->seqid == ackid) {
    769			lp->tx_acked = head;
    770			return 0;
    771		}
    772		if (head == lp->tx_tail)
    773			return LDC_ABORT(lp);
    774	}
    775
    776	return 0;
    777}
    778
    779static void send_events(struct ldc_channel *lp, unsigned int event_mask)
    780{
    781	if (event_mask & LDC_EVENT_RESET)
    782		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
    783	if (event_mask & LDC_EVENT_UP)
    784		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
    785	if (event_mask & LDC_EVENT_DATA_READY)
    786		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
    787}
    788
    789static irqreturn_t ldc_rx(int irq, void *dev_id)
    790{
    791	struct ldc_channel *lp = dev_id;
    792	unsigned long orig_state, flags;
    793	unsigned int event_mask;
    794
    795	spin_lock_irqsave(&lp->lock, flags);
    796
    797	orig_state = lp->chan_state;
    798
    799	/* We should probably check for hypervisor errors here and
    800	 * reset the LDC channel if we get one.
    801	 */
    802	sun4v_ldc_rx_get_state(lp->id,
    803			       &lp->rx_head,
    804			       &lp->rx_tail,
    805			       &lp->chan_state);
    806
    807	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
    808	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
    809
    810	event_mask = 0;
    811
    812	if (lp->cfg.mode == LDC_MODE_RAW &&
    813	    lp->chan_state == LDC_CHANNEL_UP) {
    814		lp->hs_state = LDC_HS_COMPLETE;
    815		ldc_set_state(lp, LDC_STATE_CONNECTED);
    816
    817		/*
    818		 * Generate an LDC_EVENT_UP event if the channel
    819		 * was not already up.
    820		 */
    821		if (orig_state != LDC_CHANNEL_UP) {
    822			event_mask |= LDC_EVENT_UP;
    823			orig_state = lp->chan_state;
    824		}
    825	}
    826
    827	/* If we are in reset state, flush the RX queue and ignore
    828	 * everything.
    829	 */
    830	if (lp->flags & LDC_FLAG_RESET) {
    831		(void) ldc_rx_reset(lp);
    832		goto out;
    833	}
    834
    835	/* Once we finish the handshake, we let the ldc_read()
    836	 * paths do all of the control frame and state management.
    837	 * Just trigger the callback.
    838	 */
    839	if (lp->hs_state == LDC_HS_COMPLETE) {
    840handshake_complete:
    841		if (lp->chan_state != orig_state) {
    842			unsigned int event = LDC_EVENT_RESET;
    843
    844			if (lp->chan_state == LDC_CHANNEL_UP)
    845				event = LDC_EVENT_UP;
    846
    847			event_mask |= event;
    848		}
    849		if (lp->rx_head != lp->rx_tail)
    850			event_mask |= LDC_EVENT_DATA_READY;
    851
    852		goto out;
    853	}
    854
    855	if (lp->chan_state != orig_state)
    856		goto out;
    857
    858	while (lp->rx_head != lp->rx_tail) {
    859		struct ldc_packet *p;
    860		unsigned long new;
    861		int err;
    862
    863		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
    864
    865		switch (p->type) {
    866		case LDC_CTRL:
    867			err = process_control_frame(lp, p);
    868			if (err > 0)
    869				event_mask |= err;
    870			break;
    871
    872		case LDC_DATA:
    873			event_mask |= LDC_EVENT_DATA_READY;
    874			err = 0;
    875			break;
    876
    877		case LDC_ERR:
    878			err = process_error_frame(lp, p);
    879			break;
    880
    881		default:
    882			err = LDC_ABORT(lp);
    883			break;
    884		}
    885
    886		if (err < 0)
    887			break;
    888
    889		new = lp->rx_head;
    890		new += LDC_PACKET_SIZE;
    891		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
    892			new = 0;
    893		lp->rx_head = new;
    894
    895		err = __set_rx_head(lp, new);
    896		if (err < 0) {
    897			(void) LDC_ABORT(lp);
    898			break;
    899		}
    900		if (lp->hs_state == LDC_HS_COMPLETE)
    901			goto handshake_complete;
    902	}
    903
    904out:
    905	spin_unlock_irqrestore(&lp->lock, flags);
    906
    907	send_events(lp, event_mask);
    908
    909	return IRQ_HANDLED;
    910}
    911
    912static irqreturn_t ldc_tx(int irq, void *dev_id)
    913{
    914	struct ldc_channel *lp = dev_id;
    915	unsigned long flags, orig_state;
    916	unsigned int event_mask = 0;
    917
    918	spin_lock_irqsave(&lp->lock, flags);
    919
    920	orig_state = lp->chan_state;
    921
    922	/* We should probably check for hypervisor errors here and
    923	 * reset the LDC channel if we get one.
    924	 */
    925	sun4v_ldc_tx_get_state(lp->id,
    926			       &lp->tx_head,
    927			       &lp->tx_tail,
    928			       &lp->chan_state);
    929
    930	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
    931	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
    932
    933	if (lp->cfg.mode == LDC_MODE_RAW &&
    934	    lp->chan_state == LDC_CHANNEL_UP) {
    935		lp->hs_state = LDC_HS_COMPLETE;
    936		ldc_set_state(lp, LDC_STATE_CONNECTED);
    937
    938		/*
    939		 * Generate an LDC_EVENT_UP event if the channel
    940		 * was not already up.
    941		 */
    942		if (orig_state != LDC_CHANNEL_UP) {
    943			event_mask |= LDC_EVENT_UP;
    944			orig_state = lp->chan_state;
    945		}
    946	}
    947
    948	spin_unlock_irqrestore(&lp->lock, flags);
    949
    950	send_events(lp, event_mask);
    951
    952	return IRQ_HANDLED;
    953}
    954
    955/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
    956 * XXX that addition and removal from the ldc_channel_list has
    957 * XXX atomicity, otherwise the __ldc_channel_exists() check is
    958 * XXX totally pointless as another thread can slip into ldc_alloc()
    959 * XXX and add a channel with the same ID.  There also needs to be
    960 * XXX a spinlock for ldc_channel_list.
    961 */
    962static HLIST_HEAD(ldc_channel_list);
    963
    964static int __ldc_channel_exists(unsigned long id)
    965{
    966	struct ldc_channel *lp;
    967
    968	hlist_for_each_entry(lp, &ldc_channel_list, list) {
    969		if (lp->id == id)
    970			return 1;
    971	}
    972	return 0;
    973}
    974
    975static int alloc_queue(const char *name, unsigned long num_entries,
    976		       struct ldc_packet **base, unsigned long *ra)
    977{
    978	unsigned long size, order;
    979	void *q;
    980
    981	size = num_entries * LDC_PACKET_SIZE;
    982	order = get_order(size);
    983
    984	q = (void *) __get_free_pages(GFP_KERNEL, order);
    985	if (!q) {
    986		printk(KERN_ERR PFX "Alloc of %s queue failed with "
    987		       "size=%lu order=%lu\n", name, size, order);
    988		return -ENOMEM;
    989	}
    990
    991	memset(q, 0, PAGE_SIZE << order);
    992
    993	*base = q;
    994	*ra = __pa(q);
    995
    996	return 0;
    997}
    998
    999static void free_queue(unsigned long num_entries, struct ldc_packet *q)
   1000{
   1001	unsigned long size, order;
   1002
   1003	if (!q)
   1004		return;
   1005
   1006	size = num_entries * LDC_PACKET_SIZE;
   1007	order = get_order(size);
   1008
   1009	free_pages((unsigned long)q, order);
   1010}
   1011
   1012static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
   1013{
   1014	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
   1015	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
   1016
   1017	cookie &= ~COOKIE_PGSZ_CODE;
   1018
   1019	return (cookie >> (13ULL + (szcode * 3ULL)));
   1020}
   1021
   1022static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
   1023		      unsigned long entry, unsigned long npages)
   1024{
   1025	struct ldc_mtable_entry *base;
   1026	unsigned long i, shift;
   1027
   1028	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
   1029	base = iommu->page_table + entry;
   1030	for (i = 0; i < npages; i++) {
   1031		if (base->cookie)
   1032			sun4v_ldc_revoke(id, cookie + (i << shift),
   1033					 base->cookie);
   1034		base->mte = 0;
   1035	}
   1036}
   1037
   1038/* XXX Make this configurable... XXX */
   1039#define LDC_IOTABLE_SIZE	(8 * 1024)
   1040
   1041static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
   1042{
   1043	unsigned long sz, num_tsb_entries, tsbsize, order;
   1044	struct ldc_iommu *ldc_iommu = &lp->iommu;
   1045	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
   1046	struct ldc_mtable_entry *table;
   1047	unsigned long hv_err;
   1048	int err;
   1049
   1050	num_tsb_entries = LDC_IOTABLE_SIZE;
   1051	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
   1052	spin_lock_init(&ldc_iommu->lock);
   1053
   1054	sz = num_tsb_entries / 8;
   1055	sz = (sz + 7UL) & ~7UL;
   1056	iommu->map = kzalloc(sz, GFP_KERNEL);
   1057	if (!iommu->map) {
   1058		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
   1059		return -ENOMEM;
   1060	}
   1061	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
   1062			    NULL, false /* no large pool */,
   1063			    1 /* npools */,
   1064			    true /* skip span boundary check */);
   1065
   1066	order = get_order(tsbsize);
   1067
   1068	table = (struct ldc_mtable_entry *)
   1069		__get_free_pages(GFP_KERNEL, order);
   1070	err = -ENOMEM;
   1071	if (!table) {
   1072		printk(KERN_ERR PFX "Alloc of MTE table failed, "
   1073		       "size=%lu order=%lu\n", tsbsize, order);
   1074		goto out_free_map;
   1075	}
   1076
   1077	memset(table, 0, PAGE_SIZE << order);
   1078
   1079	ldc_iommu->page_table = table;
   1080
   1081	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
   1082					 num_tsb_entries);
   1083	err = -EINVAL;
   1084	if (hv_err)
   1085		goto out_free_table;
   1086
   1087	return 0;
   1088
   1089out_free_table:
   1090	free_pages((unsigned long) table, order);
   1091	ldc_iommu->page_table = NULL;
   1092
   1093out_free_map:
   1094	kfree(iommu->map);
   1095	iommu->map = NULL;
   1096
   1097	return err;
   1098}
   1099
   1100static void ldc_iommu_release(struct ldc_channel *lp)
   1101{
   1102	struct ldc_iommu *ldc_iommu = &lp->iommu;
   1103	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
   1104	unsigned long num_tsb_entries, tsbsize, order;
   1105
   1106	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
   1107
   1108	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
   1109	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
   1110	order = get_order(tsbsize);
   1111
   1112	free_pages((unsigned long) ldc_iommu->page_table, order);
   1113	ldc_iommu->page_table = NULL;
   1114
   1115	kfree(iommu->map);
   1116	iommu->map = NULL;
   1117}
   1118
   1119struct ldc_channel *ldc_alloc(unsigned long id,
   1120			      const struct ldc_channel_config *cfgp,
   1121			      void *event_arg,
   1122			      const char *name)
   1123{
   1124	struct ldc_channel *lp;
   1125	const struct ldc_mode_ops *mops;
   1126	unsigned long dummy1, dummy2, hv_err;
   1127	u8 mss, *mssbuf;
   1128	int err;
   1129
   1130	err = -ENODEV;
   1131	if (!ldom_domaining_enabled)
   1132		goto out_err;
   1133
   1134	err = -EINVAL;
   1135	if (!cfgp)
   1136		goto out_err;
   1137	if (!name)
   1138		goto out_err;
   1139
   1140	switch (cfgp->mode) {
   1141	case LDC_MODE_RAW:
   1142		mops = &raw_ops;
   1143		mss = LDC_PACKET_SIZE;
   1144		break;
   1145
   1146	case LDC_MODE_UNRELIABLE:
   1147		mops = &nonraw_ops;
   1148		mss = LDC_PACKET_SIZE - 8;
   1149		break;
   1150
   1151	case LDC_MODE_STREAM:
   1152		mops = &stream_ops;
   1153		mss = LDC_PACKET_SIZE - 8 - 8;
   1154		break;
   1155
   1156	default:
   1157		goto out_err;
   1158	}
   1159
   1160	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
   1161		goto out_err;
   1162
   1163	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
   1164	err = -ENODEV;
   1165	if (hv_err == HV_ECHANNEL)
   1166		goto out_err;
   1167
   1168	err = -EEXIST;
   1169	if (__ldc_channel_exists(id))
   1170		goto out_err;
   1171
   1172	mssbuf = NULL;
   1173
   1174	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
   1175	err = -ENOMEM;
   1176	if (!lp)
   1177		goto out_err;
   1178
   1179	spin_lock_init(&lp->lock);
   1180
   1181	lp->id = id;
   1182
   1183	err = ldc_iommu_init(name, lp);
   1184	if (err)
   1185		goto out_free_ldc;
   1186
   1187	lp->mops = mops;
   1188	lp->mss = mss;
   1189
   1190	lp->cfg = *cfgp;
   1191	if (!lp->cfg.mtu)
   1192		lp->cfg.mtu = LDC_DEFAULT_MTU;
   1193
   1194	if (lp->cfg.mode == LDC_MODE_STREAM) {
   1195		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
   1196		if (!mssbuf) {
   1197			err = -ENOMEM;
   1198			goto out_free_iommu;
   1199		}
   1200		lp->mssbuf = mssbuf;
   1201	}
   1202
   1203	lp->event_arg = event_arg;
   1204
   1205	/* XXX allow setting via ldc_channel_config to override defaults
   1206	 * XXX or use some formula based upon mtu
   1207	 */
   1208	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
   1209	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
   1210
   1211	err = alloc_queue("TX", lp->tx_num_entries,
   1212			  &lp->tx_base, &lp->tx_ra);
   1213	if (err)
   1214		goto out_free_mssbuf;
   1215
   1216	err = alloc_queue("RX", lp->rx_num_entries,
   1217			  &lp->rx_base, &lp->rx_ra);
   1218	if (err)
   1219		goto out_free_txq;
   1220
   1221	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
   1222
   1223	lp->hs_state = LDC_HS_CLOSED;
   1224	ldc_set_state(lp, LDC_STATE_INIT);
   1225
   1226	INIT_HLIST_NODE(&lp->list);
   1227	hlist_add_head(&lp->list, &ldc_channel_list);
   1228
   1229	INIT_HLIST_HEAD(&lp->mh_list);
   1230
   1231	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
   1232	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
   1233
   1234	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
   1235			  lp->rx_irq_name, lp);
   1236	if (err)
   1237		goto out_free_txq;
   1238
   1239	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
   1240			  lp->tx_irq_name, lp);
   1241	if (err) {
   1242		free_irq(lp->cfg.rx_irq, lp);
   1243		goto out_free_txq;
   1244	}
   1245
   1246	return lp;
   1247
   1248out_free_txq:
   1249	free_queue(lp->tx_num_entries, lp->tx_base);
   1250
   1251out_free_mssbuf:
   1252	kfree(mssbuf);
   1253
   1254out_free_iommu:
   1255	ldc_iommu_release(lp);
   1256
   1257out_free_ldc:
   1258	kfree(lp);
   1259
   1260out_err:
   1261	return ERR_PTR(err);
   1262}
   1263EXPORT_SYMBOL(ldc_alloc);
   1264
   1265void ldc_unbind(struct ldc_channel *lp)
   1266{
   1267	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
   1268		free_irq(lp->cfg.rx_irq, lp);
   1269		free_irq(lp->cfg.tx_irq, lp);
   1270		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
   1271	}
   1272
   1273	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
   1274		sun4v_ldc_tx_qconf(lp->id, 0, 0);
   1275		sun4v_ldc_rx_qconf(lp->id, 0, 0);
   1276		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
   1277	}
   1278	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
   1279		free_queue(lp->tx_num_entries, lp->tx_base);
   1280		free_queue(lp->rx_num_entries, lp->rx_base);
   1281		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
   1282	}
   1283
   1284	ldc_set_state(lp, LDC_STATE_INIT);
   1285}
   1286EXPORT_SYMBOL(ldc_unbind);
   1287
   1288void ldc_free(struct ldc_channel *lp)
   1289{
   1290	ldc_unbind(lp);
   1291	hlist_del(&lp->list);
   1292	kfree(lp->mssbuf);
   1293	ldc_iommu_release(lp);
   1294
   1295	kfree(lp);
   1296}
   1297EXPORT_SYMBOL(ldc_free);
   1298
   1299/* Bind the channel.  This registers the LDC queues with
   1300 * the hypervisor and puts the channel into a pseudo-listening
   1301 * state.  This does not initiate a handshake, ldc_connect() does
   1302 * that.
   1303 */
   1304int ldc_bind(struct ldc_channel *lp)
   1305{
   1306	unsigned long hv_err, flags;
   1307	int err = -EINVAL;
   1308
   1309	if (lp->state != LDC_STATE_INIT)
   1310		return -EINVAL;
   1311
   1312	spin_lock_irqsave(&lp->lock, flags);
   1313
   1314	enable_irq(lp->cfg.rx_irq);
   1315	enable_irq(lp->cfg.tx_irq);
   1316
   1317	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
   1318
   1319	err = -ENODEV;
   1320	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
   1321	if (hv_err)
   1322		goto out_free_irqs;
   1323
   1324	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
   1325	if (hv_err)
   1326		goto out_free_irqs;
   1327
   1328	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
   1329	if (hv_err)
   1330		goto out_unmap_tx;
   1331
   1332	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
   1333	if (hv_err)
   1334		goto out_unmap_tx;
   1335
   1336	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
   1337
   1338	hv_err = sun4v_ldc_tx_get_state(lp->id,
   1339					&lp->tx_head,
   1340					&lp->tx_tail,
   1341					&lp->chan_state);
   1342	err = -EBUSY;
   1343	if (hv_err)
   1344		goto out_unmap_rx;
   1345
   1346	lp->tx_acked = lp->tx_head;
   1347
   1348	lp->hs_state = LDC_HS_OPEN;
   1349	ldc_set_state(lp, LDC_STATE_BOUND);
   1350
   1351	if (lp->cfg.mode == LDC_MODE_RAW) {
   1352		/*
   1353		 * There is no handshake in RAW mode, so handshake
   1354		 * is completed.
   1355		 */
   1356		lp->hs_state = LDC_HS_COMPLETE;
   1357	}
   1358
   1359	spin_unlock_irqrestore(&lp->lock, flags);
   1360
   1361	return 0;
   1362
   1363out_unmap_rx:
   1364	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
   1365	sun4v_ldc_rx_qconf(lp->id, 0, 0);
   1366
   1367out_unmap_tx:
   1368	sun4v_ldc_tx_qconf(lp->id, 0, 0);
   1369
   1370out_free_irqs:
   1371	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
   1372	free_irq(lp->cfg.tx_irq, lp);
   1373	free_irq(lp->cfg.rx_irq, lp);
   1374
   1375	spin_unlock_irqrestore(&lp->lock, flags);
   1376
   1377	return err;
   1378}
   1379EXPORT_SYMBOL(ldc_bind);
   1380
   1381int ldc_connect(struct ldc_channel *lp)
   1382{
   1383	unsigned long flags;
   1384	int err;
   1385
   1386	if (lp->cfg.mode == LDC_MODE_RAW)
   1387		return -EINVAL;
   1388
   1389	spin_lock_irqsave(&lp->lock, flags);
   1390
   1391	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
   1392	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
   1393	    lp->hs_state != LDC_HS_OPEN)
   1394		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
   1395	else
   1396		err = start_handshake(lp);
   1397
   1398	spin_unlock_irqrestore(&lp->lock, flags);
   1399
   1400	return err;
   1401}
   1402EXPORT_SYMBOL(ldc_connect);
   1403
   1404int ldc_disconnect(struct ldc_channel *lp)
   1405{
   1406	unsigned long hv_err, flags;
   1407	int err;
   1408
   1409	if (lp->cfg.mode == LDC_MODE_RAW)
   1410		return -EINVAL;
   1411
   1412	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
   1413	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
   1414		return -EINVAL;
   1415
   1416	spin_lock_irqsave(&lp->lock, flags);
   1417
   1418	err = -ENODEV;
   1419	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
   1420	if (hv_err)
   1421		goto out_err;
   1422
   1423	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
   1424	if (hv_err)
   1425		goto out_err;
   1426
   1427	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
   1428	if (hv_err)
   1429		goto out_err;
   1430
   1431	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
   1432	if (hv_err)
   1433		goto out_err;
   1434
   1435	ldc_set_state(lp, LDC_STATE_BOUND);
   1436	lp->hs_state = LDC_HS_OPEN;
   1437	lp->flags |= LDC_FLAG_RESET;
   1438
   1439	spin_unlock_irqrestore(&lp->lock, flags);
   1440
   1441	return 0;
   1442
   1443out_err:
   1444	sun4v_ldc_tx_qconf(lp->id, 0, 0);
   1445	sun4v_ldc_rx_qconf(lp->id, 0, 0);
   1446	free_irq(lp->cfg.tx_irq, lp);
   1447	free_irq(lp->cfg.rx_irq, lp);
   1448	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
   1449		       LDC_FLAG_REGISTERED_QUEUES);
   1450	ldc_set_state(lp, LDC_STATE_INIT);
   1451
   1452	spin_unlock_irqrestore(&lp->lock, flags);
   1453
   1454	return err;
   1455}
   1456EXPORT_SYMBOL(ldc_disconnect);
   1457
   1458int ldc_state(struct ldc_channel *lp)
   1459{
   1460	return lp->state;
   1461}
   1462EXPORT_SYMBOL(ldc_state);
   1463
   1464void ldc_set_state(struct ldc_channel *lp, u8 state)
   1465{
   1466	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
   1467	       state_to_str(lp->state),
   1468	       state_to_str(state));
   1469
   1470	lp->state = state;
   1471}
   1472EXPORT_SYMBOL(ldc_set_state);
   1473
   1474int ldc_mode(struct ldc_channel *lp)
   1475{
   1476	return lp->cfg.mode;
   1477}
   1478EXPORT_SYMBOL(ldc_mode);
   1479
   1480int ldc_rx_reset(struct ldc_channel *lp)
   1481{
   1482	return __set_rx_head(lp, lp->rx_tail);
   1483}
   1484EXPORT_SYMBOL(ldc_rx_reset);
   1485
   1486void __ldc_print(struct ldc_channel *lp, const char *caller)
   1487{
   1488	pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
   1489		"\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
   1490		"\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
   1491		"\trcv_nxt=%u snd_nxt=%u\n",
   1492		caller, lp->id, lp->flags, state_to_str(lp->state),
   1493		lp->chan_state, lp->hs_state,
   1494		lp->rx_head, lp->rx_tail, lp->rx_num_entries,
   1495		lp->tx_head, lp->tx_tail, lp->tx_num_entries,
   1496		lp->rcv_nxt, lp->snd_nxt);
   1497}
   1498EXPORT_SYMBOL(__ldc_print);
   1499
   1500static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
   1501{
   1502	struct ldc_packet *p;
   1503	unsigned long new_tail, hv_err;
   1504	int err;
   1505
   1506	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
   1507					&lp->chan_state);
   1508	if (unlikely(hv_err))
   1509		return -EBUSY;
   1510
   1511	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
   1512		return LDC_ABORT(lp);
   1513
   1514	if (size > LDC_PACKET_SIZE)
   1515		return -EMSGSIZE;
   1516
   1517	p = data_get_tx_packet(lp, &new_tail);
   1518	if (!p)
   1519		return -EAGAIN;
   1520
   1521	memcpy(p, buf, size);
   1522
   1523	err = send_tx_packet(lp, p, new_tail);
   1524	if (!err)
   1525		err = size;
   1526
   1527	return err;
   1528}
   1529
   1530static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
   1531{
   1532	struct ldc_packet *p;
   1533	unsigned long hv_err, new;
   1534	int err;
   1535
   1536	if (size < LDC_PACKET_SIZE)
   1537		return -EINVAL;
   1538
   1539	hv_err = sun4v_ldc_rx_get_state(lp->id,
   1540					&lp->rx_head,
   1541					&lp->rx_tail,
   1542					&lp->chan_state);
   1543	if (hv_err)
   1544		return LDC_ABORT(lp);
   1545
   1546	if (lp->chan_state == LDC_CHANNEL_DOWN ||
   1547	    lp->chan_state == LDC_CHANNEL_RESETTING)
   1548		return -ECONNRESET;
   1549
   1550	if (lp->rx_head == lp->rx_tail)
   1551		return 0;
   1552
   1553	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
   1554	memcpy(buf, p, LDC_PACKET_SIZE);
   1555
   1556	new = rx_advance(lp, lp->rx_head);
   1557	lp->rx_head = new;
   1558
   1559	err = __set_rx_head(lp, new);
   1560	if (err < 0)
   1561		err = -ECONNRESET;
   1562	else
   1563		err = LDC_PACKET_SIZE;
   1564
   1565	return err;
   1566}
   1567
   1568static const struct ldc_mode_ops raw_ops = {
   1569	.write		=	write_raw,
   1570	.read		=	read_raw,
   1571};
   1572
   1573static int write_nonraw(struct ldc_channel *lp, const void *buf,
   1574			unsigned int size)
   1575{
   1576	unsigned long hv_err, tail;
   1577	unsigned int copied;
   1578	u32 seq;
   1579	int err;
   1580
   1581	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
   1582					&lp->chan_state);
   1583	if (unlikely(hv_err))
   1584		return -EBUSY;
   1585
   1586	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
   1587		return LDC_ABORT(lp);
   1588
   1589	if (!tx_has_space_for(lp, size))
   1590		return -EAGAIN;
   1591
   1592	seq = lp->snd_nxt;
   1593	copied = 0;
   1594	tail = lp->tx_tail;
   1595	while (copied < size) {
   1596		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
   1597		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
   1598			    p->u.u_data :
   1599			    p->u.r.r_data);
   1600		int data_len;
   1601
   1602		p->type = LDC_DATA;
   1603		p->stype = LDC_INFO;
   1604		p->ctrl = 0;
   1605
   1606		data_len = size - copied;
   1607		if (data_len > lp->mss)
   1608			data_len = lp->mss;
   1609
   1610		BUG_ON(data_len > LDC_LEN);
   1611
   1612		p->env = (data_len |
   1613			  (copied == 0 ? LDC_START : 0) |
   1614			  (data_len == size - copied ? LDC_STOP : 0));
   1615
   1616		p->seqid = ++seq;
   1617
   1618		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
   1619		       p->type,
   1620		       p->stype,
   1621		       p->ctrl,
   1622		       p->env,
   1623		       p->seqid);
   1624
   1625		memcpy(data, buf, data_len);
   1626		buf += data_len;
   1627		copied += data_len;
   1628
   1629		tail = tx_advance(lp, tail);
   1630	}
   1631
   1632	err = set_tx_tail(lp, tail);
   1633	if (!err) {
   1634		lp->snd_nxt = seq;
   1635		err = size;
   1636	}
   1637
   1638	return err;
   1639}
   1640
   1641static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
   1642		      struct ldc_packet *first_frag)
   1643{
   1644	int err;
   1645
   1646	if (first_frag)
   1647		lp->rcv_nxt = first_frag->seqid - 1;
   1648
   1649	err = send_data_nack(lp, p);
   1650	if (err)
   1651		return err;
   1652
   1653	err = ldc_rx_reset(lp);
   1654	if (err < 0)
   1655		return LDC_ABORT(lp);
   1656
   1657	return 0;
   1658}
   1659
   1660static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
   1661{
   1662	if (p->stype & LDC_ACK) {
   1663		int err = process_data_ack(lp, p);
   1664		if (err)
   1665			return err;
   1666	}
   1667	if (p->stype & LDC_NACK)
   1668		return LDC_ABORT(lp);
   1669
   1670	return 0;
   1671}
   1672
   1673static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
   1674{
   1675	unsigned long dummy;
   1676	int limit = 1000;
   1677
   1678	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
   1679	       cur_head, lp->rx_head, lp->rx_tail);
   1680	while (limit-- > 0) {
   1681		unsigned long hv_err;
   1682
   1683		hv_err = sun4v_ldc_rx_get_state(lp->id,
   1684						&dummy,
   1685						&lp->rx_tail,
   1686						&lp->chan_state);
   1687		if (hv_err)
   1688			return LDC_ABORT(lp);
   1689
   1690		if (lp->chan_state == LDC_CHANNEL_DOWN ||
   1691		    lp->chan_state == LDC_CHANNEL_RESETTING)
   1692			return -ECONNRESET;
   1693
   1694		if (cur_head != lp->rx_tail) {
   1695			ldcdbg(DATA, "DATA WAIT DONE "
   1696			       "head[%lx] tail[%lx] chan_state[%lx]\n",
   1697			       dummy, lp->rx_tail, lp->chan_state);
   1698			return 0;
   1699		}
   1700
   1701		udelay(1);
   1702	}
   1703	return -EAGAIN;
   1704}
   1705
   1706static int rx_set_head(struct ldc_channel *lp, unsigned long head)
   1707{
   1708	int err = __set_rx_head(lp, head);
   1709
   1710	if (err < 0)
   1711		return LDC_ABORT(lp);
   1712
   1713	lp->rx_head = head;
   1714	return 0;
   1715}
   1716
   1717static void send_data_ack(struct ldc_channel *lp)
   1718{
   1719	unsigned long new_tail;
   1720	struct ldc_packet *p;
   1721
   1722	p = data_get_tx_packet(lp, &new_tail);
   1723	if (likely(p)) {
   1724		int err;
   1725
   1726		memset(p, 0, sizeof(*p));
   1727		p->type = LDC_DATA;
   1728		p->stype = LDC_ACK;
   1729		p->ctrl = 0;
   1730		p->seqid = lp->snd_nxt + 1;
   1731		p->u.r.ackid = lp->rcv_nxt;
   1732
   1733		err = send_tx_packet(lp, p, new_tail);
   1734		if (!err)
   1735			lp->snd_nxt++;
   1736	}
   1737}
   1738
   1739static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
   1740{
   1741	struct ldc_packet *first_frag;
   1742	unsigned long hv_err, new;
   1743	int err, copied;
   1744
   1745	hv_err = sun4v_ldc_rx_get_state(lp->id,
   1746					&lp->rx_head,
   1747					&lp->rx_tail,
   1748					&lp->chan_state);
   1749	if (hv_err)
   1750		return LDC_ABORT(lp);
   1751
   1752	if (lp->chan_state == LDC_CHANNEL_DOWN ||
   1753	    lp->chan_state == LDC_CHANNEL_RESETTING)
   1754		return -ECONNRESET;
   1755
   1756	if (lp->rx_head == lp->rx_tail)
   1757		return 0;
   1758
   1759	first_frag = NULL;
   1760	copied = err = 0;
   1761	new = lp->rx_head;
   1762	while (1) {
   1763		struct ldc_packet *p;
   1764		int pkt_len;
   1765
   1766		BUG_ON(new == lp->rx_tail);
   1767		p = lp->rx_base + (new / LDC_PACKET_SIZE);
   1768
   1769		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
   1770		       "rcv_nxt[%08x]\n",
   1771		       p->type,
   1772		       p->stype,
   1773		       p->ctrl,
   1774		       p->env,
   1775		       p->seqid,
   1776		       p->u.r.ackid,
   1777		       lp->rcv_nxt);
   1778
   1779		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
   1780			err = rx_bad_seq(lp, p, first_frag);
   1781			copied = 0;
   1782			break;
   1783		}
   1784
   1785		if (p->type & LDC_CTRL) {
   1786			err = process_control_frame(lp, p);
   1787			if (err < 0)
   1788				break;
   1789			err = 0;
   1790		}
   1791
   1792		lp->rcv_nxt = p->seqid;
   1793
   1794		/*
   1795		 * If this is a control-only packet, there is nothing
   1796		 * else to do but advance the rx queue since the packet
   1797		 * was already processed above.
   1798		 */
   1799		if (!(p->type & LDC_DATA)) {
   1800			new = rx_advance(lp, new);
   1801			break;
   1802		}
   1803		if (p->stype & (LDC_ACK | LDC_NACK)) {
   1804			err = data_ack_nack(lp, p);
   1805			if (err)
   1806				break;
   1807		}
   1808		if (!(p->stype & LDC_INFO)) {
   1809			new = rx_advance(lp, new);
   1810			err = rx_set_head(lp, new);
   1811			if (err)
   1812				break;
   1813			goto no_data;
   1814		}
   1815
   1816		pkt_len = p->env & LDC_LEN;
   1817
   1818		/* Every initial packet starts with the START bit set.
   1819		 *
   1820		 * Singleton packets will have both START+STOP set.
   1821		 *
   1822		 * Fragments will have START set in the first frame, STOP
   1823		 * set in the last frame, and neither bit set in middle
   1824		 * frames of the packet.
   1825		 *
   1826		 * Therefore if we are at the beginning of a packet and
   1827		 * we don't see START, or we are in the middle of a fragmented
   1828		 * packet and do see START, we are unsynchronized and should
   1829		 * flush the RX queue.
   1830		 */
   1831		if ((first_frag == NULL && !(p->env & LDC_START)) ||
   1832		    (first_frag != NULL &&  (p->env & LDC_START))) {
   1833			if (!first_frag)
   1834				new = rx_advance(lp, new);
   1835
   1836			err = rx_set_head(lp, new);
   1837			if (err)
   1838				break;
   1839
   1840			if (!first_frag)
   1841				goto no_data;
   1842		}
   1843		if (!first_frag)
   1844			first_frag = p;
   1845
   1846		if (pkt_len > size - copied) {
   1847			/* User didn't give us a big enough buffer,
   1848			 * what to do?  This is a pretty serious error.
   1849			 *
   1850			 * Since we haven't updated the RX ring head to
   1851			 * consume any of the packets, signal the error
   1852			 * to the user and just leave the RX ring alone.
   1853			 *
   1854			 * This seems the best behavior because this allows
   1855			 * a user of the LDC layer to start with a small
   1856			 * RX buffer for ldc_read() calls and use -EMSGSIZE
   1857			 * as a cue to enlarge it's read buffer.
   1858			 */
   1859			err = -EMSGSIZE;
   1860			break;
   1861		}
   1862
   1863		/* Ok, we are gonna eat this one.  */
   1864		new = rx_advance(lp, new);
   1865
   1866		memcpy(buf,
   1867		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
   1868			p->u.u_data : p->u.r.r_data), pkt_len);
   1869		buf += pkt_len;
   1870		copied += pkt_len;
   1871
   1872		if (p->env & LDC_STOP)
   1873			break;
   1874
   1875no_data:
   1876		if (new == lp->rx_tail) {
   1877			err = rx_data_wait(lp, new);
   1878			if (err)
   1879				break;
   1880		}
   1881	}
   1882
   1883	if (!err)
   1884		err = rx_set_head(lp, new);
   1885
   1886	if (err && first_frag)
   1887		lp->rcv_nxt = first_frag->seqid - 1;
   1888
   1889	if (!err) {
   1890		err = copied;
   1891		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
   1892			send_data_ack(lp);
   1893	}
   1894
   1895	return err;
   1896}
   1897
   1898static const struct ldc_mode_ops nonraw_ops = {
   1899	.write		=	write_nonraw,
   1900	.read		=	read_nonraw,
   1901};
   1902
   1903static int write_stream(struct ldc_channel *lp, const void *buf,
   1904			unsigned int size)
   1905{
   1906	if (size > lp->cfg.mtu)
   1907		size = lp->cfg.mtu;
   1908	return write_nonraw(lp, buf, size);
   1909}
   1910
   1911static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
   1912{
   1913	if (!lp->mssbuf_len) {
   1914		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
   1915		if (err < 0)
   1916			return err;
   1917
   1918		lp->mssbuf_len = err;
   1919		lp->mssbuf_off = 0;
   1920	}
   1921
   1922	if (size > lp->mssbuf_len)
   1923		size = lp->mssbuf_len;
   1924	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
   1925
   1926	lp->mssbuf_off += size;
   1927	lp->mssbuf_len -= size;
   1928
   1929	return size;
   1930}
   1931
   1932static const struct ldc_mode_ops stream_ops = {
   1933	.write		=	write_stream,
   1934	.read		=	read_stream,
   1935};
   1936
   1937int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
   1938{
   1939	unsigned long flags;
   1940	int err;
   1941
   1942	if (!buf)
   1943		return -EINVAL;
   1944
   1945	if (!size)
   1946		return 0;
   1947
   1948	spin_lock_irqsave(&lp->lock, flags);
   1949
   1950	if (lp->hs_state != LDC_HS_COMPLETE)
   1951		err = -ENOTCONN;
   1952	else
   1953		err = lp->mops->write(lp, buf, size);
   1954
   1955	spin_unlock_irqrestore(&lp->lock, flags);
   1956
   1957	return err;
   1958}
   1959EXPORT_SYMBOL(ldc_write);
   1960
   1961int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
   1962{
   1963	unsigned long flags;
   1964	int err;
   1965
   1966	ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
   1967
   1968	if (!buf)
   1969		return -EINVAL;
   1970
   1971	if (!size)
   1972		return 0;
   1973
   1974	spin_lock_irqsave(&lp->lock, flags);
   1975
   1976	if (lp->hs_state != LDC_HS_COMPLETE)
   1977		err = -ENOTCONN;
   1978	else
   1979		err = lp->mops->read(lp, buf, size);
   1980
   1981	spin_unlock_irqrestore(&lp->lock, flags);
   1982
   1983	ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
   1984	       lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
   1985
   1986	return err;
   1987}
   1988EXPORT_SYMBOL(ldc_read);
   1989
   1990static u64 pagesize_code(void)
   1991{
   1992	switch (PAGE_SIZE) {
   1993	default:
   1994	case (8ULL * 1024ULL):
   1995		return 0;
   1996	case (64ULL * 1024ULL):
   1997		return 1;
   1998	case (512ULL * 1024ULL):
   1999		return 2;
   2000	case (4ULL * 1024ULL * 1024ULL):
   2001		return 3;
   2002	case (32ULL * 1024ULL * 1024ULL):
   2003		return 4;
   2004	case (256ULL * 1024ULL * 1024ULL):
   2005		return 5;
   2006	}
   2007}
   2008
   2009static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
   2010{
   2011	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
   2012		(index << PAGE_SHIFT) |
   2013		page_offset);
   2014}
   2015
   2016
   2017static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
   2018					     unsigned long npages)
   2019{
   2020	long entry;
   2021
   2022	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
   2023				      npages, NULL, (unsigned long)-1, 0);
   2024	if (unlikely(entry == IOMMU_ERROR_CODE))
   2025		return NULL;
   2026
   2027	return iommu->page_table + entry;
   2028}
   2029
   2030static u64 perm_to_mte(unsigned int map_perm)
   2031{
   2032	u64 mte_base;
   2033
   2034	mte_base = pagesize_code();
   2035
   2036	if (map_perm & LDC_MAP_SHADOW) {
   2037		if (map_perm & LDC_MAP_R)
   2038			mte_base |= LDC_MTE_COPY_R;
   2039		if (map_perm & LDC_MAP_W)
   2040			mte_base |= LDC_MTE_COPY_W;
   2041	}
   2042	if (map_perm & LDC_MAP_DIRECT) {
   2043		if (map_perm & LDC_MAP_R)
   2044			mte_base |= LDC_MTE_READ;
   2045		if (map_perm & LDC_MAP_W)
   2046			mte_base |= LDC_MTE_WRITE;
   2047		if (map_perm & LDC_MAP_X)
   2048			mte_base |= LDC_MTE_EXEC;
   2049	}
   2050	if (map_perm & LDC_MAP_IO) {
   2051		if (map_perm & LDC_MAP_R)
   2052			mte_base |= LDC_MTE_IOMMU_R;
   2053		if (map_perm & LDC_MAP_W)
   2054			mte_base |= LDC_MTE_IOMMU_W;
   2055	}
   2056
   2057	return mte_base;
   2058}
   2059
   2060static int pages_in_region(unsigned long base, long len)
   2061{
   2062	int count = 0;
   2063
   2064	do {
   2065		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
   2066
   2067		len -= (new - base);
   2068		base = new;
   2069		count++;
   2070	} while (len > 0);
   2071
   2072	return count;
   2073}
   2074
   2075struct cookie_state {
   2076	struct ldc_mtable_entry		*page_table;
   2077	struct ldc_trans_cookie		*cookies;
   2078	u64				mte_base;
   2079	u64				prev_cookie;
   2080	u32				pte_idx;
   2081	u32				nc;
   2082};
   2083
   2084static void fill_cookies(struct cookie_state *sp, unsigned long pa,
   2085			 unsigned long off, unsigned long len)
   2086{
   2087	do {
   2088		unsigned long tlen, new = pa + PAGE_SIZE;
   2089		u64 this_cookie;
   2090
   2091		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
   2092
   2093		tlen = PAGE_SIZE;
   2094		if (off)
   2095			tlen = PAGE_SIZE - off;
   2096		if (tlen > len)
   2097			tlen = len;
   2098
   2099		this_cookie = make_cookie(sp->pte_idx,
   2100					  pagesize_code(), off);
   2101
   2102		off = 0;
   2103
   2104		if (this_cookie == sp->prev_cookie) {
   2105			sp->cookies[sp->nc - 1].cookie_size += tlen;
   2106		} else {
   2107			sp->cookies[sp->nc].cookie_addr = this_cookie;
   2108			sp->cookies[sp->nc].cookie_size = tlen;
   2109			sp->nc++;
   2110		}
   2111		sp->prev_cookie = this_cookie + tlen;
   2112
   2113		sp->pte_idx++;
   2114
   2115		len -= tlen;
   2116		pa = new;
   2117	} while (len > 0);
   2118}
   2119
   2120static int sg_count_one(struct scatterlist *sg)
   2121{
   2122	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
   2123	long len = sg->length;
   2124
   2125	if ((sg->offset | len) & (8UL - 1))
   2126		return -EFAULT;
   2127
   2128	return pages_in_region(base + sg->offset, len);
   2129}
   2130
   2131static int sg_count_pages(struct scatterlist *sg, int num_sg)
   2132{
   2133	int count;
   2134	int i;
   2135
   2136	count = 0;
   2137	for (i = 0; i < num_sg; i++) {
   2138		int err = sg_count_one(sg + i);
   2139		if (err < 0)
   2140			return err;
   2141		count += err;
   2142	}
   2143
   2144	return count;
   2145}
   2146
   2147int ldc_map_sg(struct ldc_channel *lp,
   2148	       struct scatterlist *sg, int num_sg,
   2149	       struct ldc_trans_cookie *cookies, int ncookies,
   2150	       unsigned int map_perm)
   2151{
   2152	unsigned long i, npages;
   2153	struct ldc_mtable_entry *base;
   2154	struct cookie_state state;
   2155	struct ldc_iommu *iommu;
   2156	int err;
   2157	struct scatterlist *s;
   2158
   2159	if (map_perm & ~LDC_MAP_ALL)
   2160		return -EINVAL;
   2161
   2162	err = sg_count_pages(sg, num_sg);
   2163	if (err < 0)
   2164		return err;
   2165
   2166	npages = err;
   2167	if (err > ncookies)
   2168		return -EMSGSIZE;
   2169
   2170	iommu = &lp->iommu;
   2171
   2172	base = alloc_npages(iommu, npages);
   2173
   2174	if (!base)
   2175		return -ENOMEM;
   2176
   2177	state.page_table = iommu->page_table;
   2178	state.cookies = cookies;
   2179	state.mte_base = perm_to_mte(map_perm);
   2180	state.prev_cookie = ~(u64)0;
   2181	state.pte_idx = (base - iommu->page_table);
   2182	state.nc = 0;
   2183
   2184	for_each_sg(sg, s, num_sg, i) {
   2185		fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
   2186			     s->offset, s->length);
   2187	}
   2188
   2189	return state.nc;
   2190}
   2191EXPORT_SYMBOL(ldc_map_sg);
   2192
   2193int ldc_map_single(struct ldc_channel *lp,
   2194		   void *buf, unsigned int len,
   2195		   struct ldc_trans_cookie *cookies, int ncookies,
   2196		   unsigned int map_perm)
   2197{
   2198	unsigned long npages, pa;
   2199	struct ldc_mtable_entry *base;
   2200	struct cookie_state state;
   2201	struct ldc_iommu *iommu;
   2202
   2203	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
   2204		return -EINVAL;
   2205
   2206	pa = __pa(buf);
   2207	if ((pa | len) & (8UL - 1))
   2208		return -EFAULT;
   2209
   2210	npages = pages_in_region(pa, len);
   2211
   2212	iommu = &lp->iommu;
   2213
   2214	base = alloc_npages(iommu, npages);
   2215
   2216	if (!base)
   2217		return -ENOMEM;
   2218
   2219	state.page_table = iommu->page_table;
   2220	state.cookies = cookies;
   2221	state.mte_base = perm_to_mte(map_perm);
   2222	state.prev_cookie = ~(u64)0;
   2223	state.pte_idx = (base - iommu->page_table);
   2224	state.nc = 0;
   2225	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
   2226	BUG_ON(state.nc > ncookies);
   2227
   2228	return state.nc;
   2229}
   2230EXPORT_SYMBOL(ldc_map_single);
   2231
   2232
   2233static void free_npages(unsigned long id, struct ldc_iommu *iommu,
   2234			u64 cookie, u64 size)
   2235{
   2236	unsigned long npages, entry;
   2237
   2238	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
   2239
   2240	entry = ldc_cookie_to_index(cookie, iommu);
   2241	ldc_demap(iommu, id, cookie, entry, npages);
   2242	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
   2243}
   2244
   2245void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
   2246	       int ncookies)
   2247{
   2248	struct ldc_iommu *iommu = &lp->iommu;
   2249	int i;
   2250	unsigned long flags;
   2251
   2252	spin_lock_irqsave(&iommu->lock, flags);
   2253	for (i = 0; i < ncookies; i++) {
   2254		u64 addr = cookies[i].cookie_addr;
   2255		u64 size = cookies[i].cookie_size;
   2256
   2257		free_npages(lp->id, iommu, addr, size);
   2258	}
   2259	spin_unlock_irqrestore(&iommu->lock, flags);
   2260}
   2261EXPORT_SYMBOL(ldc_unmap);
   2262
   2263int ldc_copy(struct ldc_channel *lp, int copy_dir,
   2264	     void *buf, unsigned int len, unsigned long offset,
   2265	     struct ldc_trans_cookie *cookies, int ncookies)
   2266{
   2267	unsigned int orig_len;
   2268	unsigned long ra;
   2269	int i;
   2270
   2271	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
   2272		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
   2273		       lp->id, copy_dir);
   2274		return -EINVAL;
   2275	}
   2276
   2277	ra = __pa(buf);
   2278	if ((ra | len | offset) & (8UL - 1)) {
   2279		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
   2280		       "ra[%lx] len[%x] offset[%lx]\n",
   2281		       lp->id, ra, len, offset);
   2282		return -EFAULT;
   2283	}
   2284
   2285	if (lp->hs_state != LDC_HS_COMPLETE ||
   2286	    (lp->flags & LDC_FLAG_RESET)) {
   2287		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
   2288		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
   2289		return -ECONNRESET;
   2290	}
   2291
   2292	orig_len = len;
   2293	for (i = 0; i < ncookies; i++) {
   2294		unsigned long cookie_raddr = cookies[i].cookie_addr;
   2295		unsigned long this_len = cookies[i].cookie_size;
   2296		unsigned long actual_len;
   2297
   2298		if (unlikely(offset)) {
   2299			unsigned long this_off = offset;
   2300
   2301			if (this_off > this_len)
   2302				this_off = this_len;
   2303
   2304			offset -= this_off;
   2305			this_len -= this_off;
   2306			if (!this_len)
   2307				continue;
   2308			cookie_raddr += this_off;
   2309		}
   2310
   2311		if (this_len > len)
   2312			this_len = len;
   2313
   2314		while (1) {
   2315			unsigned long hv_err;
   2316
   2317			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
   2318						cookie_raddr, ra,
   2319						this_len, &actual_len);
   2320			if (unlikely(hv_err)) {
   2321				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
   2322				       "HV error %lu\n",
   2323				       lp->id, hv_err);
   2324				if (lp->hs_state != LDC_HS_COMPLETE ||
   2325				    (lp->flags & LDC_FLAG_RESET))
   2326					return -ECONNRESET;
   2327				else
   2328					return -EFAULT;
   2329			}
   2330
   2331			cookie_raddr += actual_len;
   2332			ra += actual_len;
   2333			len -= actual_len;
   2334			if (actual_len == this_len)
   2335				break;
   2336
   2337			this_len -= actual_len;
   2338		}
   2339
   2340		if (!len)
   2341			break;
   2342	}
   2343
   2344	/* It is caller policy what to do about short copies.
   2345	 * For example, a networking driver can declare the
   2346	 * packet a runt and drop it.
   2347	 */
   2348
   2349	return orig_len - len;
   2350}
   2351EXPORT_SYMBOL(ldc_copy);
   2352
   2353void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
   2354			  struct ldc_trans_cookie *cookies, int *ncookies,
   2355			  unsigned int map_perm)
   2356{
   2357	void *buf;
   2358	int err;
   2359
   2360	if (len & (8UL - 1))
   2361		return ERR_PTR(-EINVAL);
   2362
   2363	buf = kzalloc(len, GFP_ATOMIC);
   2364	if (!buf)
   2365		return ERR_PTR(-ENOMEM);
   2366
   2367	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
   2368	if (err < 0) {
   2369		kfree(buf);
   2370		return ERR_PTR(err);
   2371	}
   2372	*ncookies = err;
   2373
   2374	return buf;
   2375}
   2376EXPORT_SYMBOL(ldc_alloc_exp_dring);
   2377
   2378void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
   2379			struct ldc_trans_cookie *cookies, int ncookies)
   2380{
   2381	ldc_unmap(lp, cookies, ncookies);
   2382	kfree(buf);
   2383}
   2384EXPORT_SYMBOL(ldc_free_exp_dring);
   2385
   2386static int __init ldc_init(void)
   2387{
   2388	unsigned long major, minor;
   2389	struct mdesc_handle *hp;
   2390	const u64 *v;
   2391	int err;
   2392	u64 mp;
   2393
   2394	hp = mdesc_grab();
   2395	if (!hp)
   2396		return -ENODEV;
   2397
   2398	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
   2399	err = -ENODEV;
   2400	if (mp == MDESC_NODE_NULL)
   2401		goto out;
   2402
   2403	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
   2404	if (!v)
   2405		goto out;
   2406
   2407	major = 1;
   2408	minor = 0;
   2409	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
   2410		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
   2411		goto out;
   2412	}
   2413
   2414	printk(KERN_INFO "%s", version);
   2415
   2416	if (!*v) {
   2417		printk(KERN_INFO PFX "Domaining disabled.\n");
   2418		goto out;
   2419	}
   2420	ldom_domaining_enabled = 1;
   2421	err = 0;
   2422
   2423out:
   2424	mdesc_release(hp);
   2425	return err;
   2426}
   2427
   2428core_initcall(ldc_init);