cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

device.c (43674B)


      1/*
      2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *	  copyright notice, this list of conditions and the following
     16 *	  disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *	  copyright notice, this list of conditions and the following
     20 *	  disclaimer in the documentation and/or other materials
     21 *	  provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32#include <linux/module.h>
     33#include <linux/moduleparam.h>
     34#include <linux/debugfs.h>
     35#include <linux/vmalloc.h>
     36#include <linux/math64.h>
     37
     38#include <rdma/ib_verbs.h>
     39
     40#include "iw_cxgb4.h"
     41
     42#define DRV_VERSION "0.1"
     43
     44MODULE_AUTHOR("Steve Wise");
     45MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
     46MODULE_LICENSE("Dual BSD/GPL");
     47
     48static int allow_db_fc_on_t5;
     49module_param(allow_db_fc_on_t5, int, 0644);
     50MODULE_PARM_DESC(allow_db_fc_on_t5,
     51		 "Allow DB Flow Control on T5 (default = 0)");
     52
     53static int allow_db_coalescing_on_t5;
     54module_param(allow_db_coalescing_on_t5, int, 0644);
     55MODULE_PARM_DESC(allow_db_coalescing_on_t5,
     56		 "Allow DB Coalescing on T5 (default = 0)");
     57
     58int c4iw_wr_log = 0;
     59module_param(c4iw_wr_log, int, 0444);
     60MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data.");
     61
     62static int c4iw_wr_log_size_order = 12;
     63module_param(c4iw_wr_log_size_order, int, 0444);
     64MODULE_PARM_DESC(c4iw_wr_log_size_order,
     65		 "Number of entries (log2) in the work request timing log.");
     66
     67static LIST_HEAD(uld_ctx_list);
     68static DEFINE_MUTEX(dev_mutex);
     69static struct workqueue_struct *reg_workq;
     70
     71#define DB_FC_RESUME_SIZE 64
     72#define DB_FC_RESUME_DELAY 1
     73#define DB_FC_DRAIN_THRESH 0
     74
     75static struct dentry *c4iw_debugfs_root;
     76
     77struct c4iw_debugfs_data {
     78	struct c4iw_dev *devp;
     79	char *buf;
     80	int bufsize;
     81	int pos;
     82};
     83
     84static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
     85			    loff_t *ppos)
     86{
     87	struct c4iw_debugfs_data *d = file->private_data;
     88
     89	return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
     90}
     91
     92void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
     93{
     94	struct wr_log_entry le;
     95	int idx;
     96
     97	if (!wq->rdev->wr_log)
     98		return;
     99
    100	idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
    101		(wq->rdev->wr_log_size - 1);
    102	le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
    103	le.poll_host_time = ktime_get();
    104	le.valid = 1;
    105	le.cqe_sge_ts = CQE_TS(cqe);
    106	if (SQ_TYPE(cqe)) {
    107		le.qid = wq->sq.qid;
    108		le.opcode = CQE_OPCODE(cqe);
    109		le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time;
    110		le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
    111		le.wr_id = CQE_WRID_SQ_IDX(cqe);
    112	} else {
    113		le.qid = wq->rq.qid;
    114		le.opcode = FW_RI_RECEIVE;
    115		le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time;
    116		le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
    117		le.wr_id = CQE_WRID_MSN(cqe);
    118	}
    119	wq->rdev->wr_log[idx] = le;
    120}
    121
    122static int wr_log_show(struct seq_file *seq, void *v)
    123{
    124	struct c4iw_dev *dev = seq->private;
    125	ktime_t prev_time;
    126	struct wr_log_entry *lep;
    127	int prev_time_set = 0;
    128	int idx, end;
    129
    130#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
    131
    132	idx = atomic_read(&dev->rdev.wr_log_idx) &
    133		(dev->rdev.wr_log_size - 1);
    134	end = idx - 1;
    135	if (end < 0)
    136		end = dev->rdev.wr_log_size - 1;
    137	lep = &dev->rdev.wr_log[idx];
    138	while (idx != end) {
    139		if (lep->valid) {
    140			if (!prev_time_set) {
    141				prev_time_set = 1;
    142				prev_time = lep->poll_host_time;
    143			}
    144			seq_printf(seq, "%04u: nsec %llu qid %u opcode "
    145				   "%u %s 0x%x host_wr_delta nsec %llu "
    146				   "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
    147				   "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
    148				   "cqe_poll_delta_ns %llu\n",
    149				   idx,
    150				   ktime_to_ns(ktime_sub(lep->poll_host_time,
    151							 prev_time)),
    152				   lep->qid, lep->opcode,
    153				   lep->opcode == FW_RI_RECEIVE ?
    154							"msn" : "wrid",
    155				   lep->wr_id,
    156				   ktime_to_ns(ktime_sub(lep->poll_host_time,
    157							 lep->post_host_time)),
    158				   lep->post_sge_ts, lep->cqe_sge_ts,
    159				   lep->poll_sge_ts,
    160				   ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
    161				   ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
    162			prev_time = lep->poll_host_time;
    163		}
    164		idx++;
    165		if (idx > (dev->rdev.wr_log_size - 1))
    166			idx = 0;
    167		lep = &dev->rdev.wr_log[idx];
    168	}
    169#undef ts2ns
    170	return 0;
    171}
    172
    173static int wr_log_open(struct inode *inode, struct file *file)
    174{
    175	return single_open(file, wr_log_show, inode->i_private);
    176}
    177
    178static ssize_t wr_log_clear(struct file *file, const char __user *buf,
    179			    size_t count, loff_t *pos)
    180{
    181	struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
    182	int i;
    183
    184	if (dev->rdev.wr_log)
    185		for (i = 0; i < dev->rdev.wr_log_size; i++)
    186			dev->rdev.wr_log[i].valid = 0;
    187	return count;
    188}
    189
    190static const struct file_operations wr_log_debugfs_fops = {
    191	.owner   = THIS_MODULE,
    192	.open    = wr_log_open,
    193	.release = single_release,
    194	.read    = seq_read,
    195	.llseek  = seq_lseek,
    196	.write   = wr_log_clear,
    197};
    198
    199static struct sockaddr_in zero_sin = {
    200	.sin_family = AF_INET,
    201};
    202
    203static struct sockaddr_in6 zero_sin6 = {
    204	.sin6_family = AF_INET6,
    205};
    206
    207static void set_ep_sin_addrs(struct c4iw_ep *ep,
    208			     struct sockaddr_in **lsin,
    209			     struct sockaddr_in **rsin,
    210			     struct sockaddr_in **m_lsin,
    211			     struct sockaddr_in **m_rsin)
    212{
    213	struct iw_cm_id *id = ep->com.cm_id;
    214
    215	*m_lsin = (struct sockaddr_in *)&ep->com.local_addr;
    216	*m_rsin = (struct sockaddr_in *)&ep->com.remote_addr;
    217	if (id) {
    218		*lsin = (struct sockaddr_in *)&id->local_addr;
    219		*rsin = (struct sockaddr_in *)&id->remote_addr;
    220	} else {
    221		*lsin = &zero_sin;
    222		*rsin = &zero_sin;
    223	}
    224}
    225
    226static void set_ep_sin6_addrs(struct c4iw_ep *ep,
    227			      struct sockaddr_in6 **lsin6,
    228			      struct sockaddr_in6 **rsin6,
    229			      struct sockaddr_in6 **m_lsin6,
    230			      struct sockaddr_in6 **m_rsin6)
    231{
    232	struct iw_cm_id *id = ep->com.cm_id;
    233
    234	*m_lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
    235	*m_rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
    236	if (id) {
    237		*lsin6 = (struct sockaddr_in6 *)&id->local_addr;
    238		*rsin6 = (struct sockaddr_in6 *)&id->remote_addr;
    239	} else {
    240		*lsin6 = &zero_sin6;
    241		*rsin6 = &zero_sin6;
    242	}
    243}
    244
    245static int dump_qp(unsigned long id, struct c4iw_qp *qp,
    246		   struct c4iw_debugfs_data *qpd)
    247{
    248	int space;
    249	int cc;
    250	if (id != qp->wq.sq.qid)
    251		return 0;
    252
    253	space = qpd->bufsize - qpd->pos - 1;
    254	if (space == 0)
    255		return 1;
    256
    257	if (qp->ep) {
    258		struct c4iw_ep *ep = qp->ep;
    259
    260		if (ep->com.local_addr.ss_family == AF_INET) {
    261			struct sockaddr_in *lsin;
    262			struct sockaddr_in *rsin;
    263			struct sockaddr_in *m_lsin;
    264			struct sockaddr_in *m_rsin;
    265
    266			set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
    267			cc = snprintf(qpd->buf + qpd->pos, space,
    268				      "rc qp sq id %u %s id %u state %u "
    269				      "onchip %u ep tid %u state %u "
    270				      "%pI4:%u/%u->%pI4:%u/%u\n",
    271				      qp->wq.sq.qid, qp->srq ? "srq" : "rq",
    272				      qp->srq ? qp->srq->idx : qp->wq.rq.qid,
    273				      (int)qp->attr.state,
    274				      qp->wq.sq.flags & T4_SQ_ONCHIP,
    275				      ep->hwtid, (int)ep->com.state,
    276				      &lsin->sin_addr, ntohs(lsin->sin_port),
    277				      ntohs(m_lsin->sin_port),
    278				      &rsin->sin_addr, ntohs(rsin->sin_port),
    279				      ntohs(m_rsin->sin_port));
    280		} else {
    281			struct sockaddr_in6 *lsin6;
    282			struct sockaddr_in6 *rsin6;
    283			struct sockaddr_in6 *m_lsin6;
    284			struct sockaddr_in6 *m_rsin6;
    285
    286			set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6,
    287					  &m_rsin6);
    288			cc = snprintf(qpd->buf + qpd->pos, space,
    289				      "rc qp sq id %u rq id %u state %u "
    290				      "onchip %u ep tid %u state %u "
    291				      "%pI6:%u/%u->%pI6:%u/%u\n",
    292				      qp->wq.sq.qid, qp->wq.rq.qid,
    293				      (int)qp->attr.state,
    294				      qp->wq.sq.flags & T4_SQ_ONCHIP,
    295				      ep->hwtid, (int)ep->com.state,
    296				      &lsin6->sin6_addr,
    297				      ntohs(lsin6->sin6_port),
    298				      ntohs(m_lsin6->sin6_port),
    299				      &rsin6->sin6_addr,
    300				      ntohs(rsin6->sin6_port),
    301				      ntohs(m_rsin6->sin6_port));
    302		}
    303	} else
    304		cc = snprintf(qpd->buf + qpd->pos, space,
    305			     "qp sq id %u rq id %u state %u onchip %u\n",
    306			      qp->wq.sq.qid, qp->wq.rq.qid,
    307			      (int)qp->attr.state,
    308			      qp->wq.sq.flags & T4_SQ_ONCHIP);
    309	if (cc < space)
    310		qpd->pos += cc;
    311	return 0;
    312}
    313
    314static int qp_release(struct inode *inode, struct file *file)
    315{
    316	struct c4iw_debugfs_data *qpd = file->private_data;
    317	if (!qpd) {
    318		pr_info("%s null qpd?\n", __func__);
    319		return 0;
    320	}
    321	vfree(qpd->buf);
    322	kfree(qpd);
    323	return 0;
    324}
    325
    326static int qp_open(struct inode *inode, struct file *file)
    327{
    328	struct c4iw_qp *qp;
    329	struct c4iw_debugfs_data *qpd;
    330	unsigned long index;
    331	int count = 1;
    332
    333	qpd = kmalloc(sizeof(*qpd), GFP_KERNEL);
    334	if (!qpd)
    335		return -ENOMEM;
    336
    337	qpd->devp = inode->i_private;
    338	qpd->pos = 0;
    339
    340	/*
    341	 * No need to lock; we drop the lock to call vmalloc so it's racy
    342	 * anyway.  Someone who cares should switch this over to seq_file
    343	 */
    344	xa_for_each(&qpd->devp->qps, index, qp)
    345		count++;
    346
    347	qpd->bufsize = count * 180;
    348	qpd->buf = vmalloc(qpd->bufsize);
    349	if (!qpd->buf) {
    350		kfree(qpd);
    351		return -ENOMEM;
    352	}
    353
    354	xa_lock_irq(&qpd->devp->qps);
    355	xa_for_each(&qpd->devp->qps, index, qp)
    356		dump_qp(index, qp, qpd);
    357	xa_unlock_irq(&qpd->devp->qps);
    358
    359	qpd->buf[qpd->pos++] = 0;
    360	file->private_data = qpd;
    361	return 0;
    362}
    363
    364static const struct file_operations qp_debugfs_fops = {
    365	.owner   = THIS_MODULE,
    366	.open    = qp_open,
    367	.release = qp_release,
    368	.read    = debugfs_read,
    369	.llseek  = default_llseek,
    370};
    371
    372static int dump_stag(unsigned long id, struct c4iw_debugfs_data *stagd)
    373{
    374	int space;
    375	int cc;
    376	struct fw_ri_tpte tpte;
    377	int ret;
    378
    379	space = stagd->bufsize - stagd->pos - 1;
    380	if (space == 0)
    381		return 1;
    382
    383	ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8,
    384			      (__be32 *)&tpte);
    385	if (ret) {
    386		dev_err(&stagd->devp->rdev.lldi.pdev->dev,
    387			"%s cxgb4_read_tpte err %d\n", __func__, ret);
    388		return ret;
    389	}
    390	cc = snprintf(stagd->buf + stagd->pos, space,
    391		      "stag: idx 0x%x valid %d key 0x%x state %d pdid %d "
    392		      "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
    393		      (u32)id<<8,
    394		      FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
    395		      FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
    396		      FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
    397		      FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
    398		      FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
    399		      FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
    400		      ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
    401		      ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
    402	if (cc < space)
    403		stagd->pos += cc;
    404	return 0;
    405}
    406
    407static int stag_release(struct inode *inode, struct file *file)
    408{
    409	struct c4iw_debugfs_data *stagd = file->private_data;
    410	if (!stagd) {
    411		pr_info("%s null stagd?\n", __func__);
    412		return 0;
    413	}
    414	vfree(stagd->buf);
    415	kfree(stagd);
    416	return 0;
    417}
    418
    419static int stag_open(struct inode *inode, struct file *file)
    420{
    421	struct c4iw_debugfs_data *stagd;
    422	void *p;
    423	unsigned long index;
    424	int ret = 0;
    425	int count = 1;
    426
    427	stagd = kmalloc(sizeof(*stagd), GFP_KERNEL);
    428	if (!stagd) {
    429		ret = -ENOMEM;
    430		goto out;
    431	}
    432	stagd->devp = inode->i_private;
    433	stagd->pos = 0;
    434
    435	xa_for_each(&stagd->devp->mrs, index, p)
    436		count++;
    437
    438	stagd->bufsize = count * 256;
    439	stagd->buf = vmalloc(stagd->bufsize);
    440	if (!stagd->buf) {
    441		ret = -ENOMEM;
    442		goto err1;
    443	}
    444
    445	xa_lock_irq(&stagd->devp->mrs);
    446	xa_for_each(&stagd->devp->mrs, index, p)
    447		dump_stag(index, stagd);
    448	xa_unlock_irq(&stagd->devp->mrs);
    449
    450	stagd->buf[stagd->pos++] = 0;
    451	file->private_data = stagd;
    452	goto out;
    453err1:
    454	kfree(stagd);
    455out:
    456	return ret;
    457}
    458
    459static const struct file_operations stag_debugfs_fops = {
    460	.owner   = THIS_MODULE,
    461	.open    = stag_open,
    462	.release = stag_release,
    463	.read    = debugfs_read,
    464	.llseek  = default_llseek,
    465};
    466
    467static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
    468
    469static int stats_show(struct seq_file *seq, void *v)
    470{
    471	struct c4iw_dev *dev = seq->private;
    472
    473	seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
    474		   "Max", "Fail");
    475	seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
    476			dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
    477			dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
    478	seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
    479			dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
    480			dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
    481	seq_printf(seq, "     SRQS: %10llu %10llu %10llu %10llu\n",
    482		   dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
    483			dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
    484	seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
    485			dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
    486			dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
    487	seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
    488			dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
    489			dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
    490	seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
    491			dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
    492			dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
    493	seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
    494			dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
    495			dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
    496	seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
    497	seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
    498	seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
    499	seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
    500		   db_state_str[dev->db_state],
    501		   dev->rdev.stats.db_state_transitions,
    502		   dev->rdev.stats.db_fc_interruptions);
    503	seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
    504	seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
    505		   dev->rdev.stats.act_ofld_conn_fails);
    506	seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
    507		   dev->rdev.stats.pas_ofld_conn_fails);
    508	seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
    509	seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
    510	return 0;
    511}
    512
    513static int stats_open(struct inode *inode, struct file *file)
    514{
    515	return single_open(file, stats_show, inode->i_private);
    516}
    517
    518static ssize_t stats_clear(struct file *file, const char __user *buf,
    519		size_t count, loff_t *pos)
    520{
    521	struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
    522
    523	mutex_lock(&dev->rdev.stats.lock);
    524	dev->rdev.stats.pd.max = 0;
    525	dev->rdev.stats.pd.fail = 0;
    526	dev->rdev.stats.qid.max = 0;
    527	dev->rdev.stats.qid.fail = 0;
    528	dev->rdev.stats.stag.max = 0;
    529	dev->rdev.stats.stag.fail = 0;
    530	dev->rdev.stats.pbl.max = 0;
    531	dev->rdev.stats.pbl.fail = 0;
    532	dev->rdev.stats.rqt.max = 0;
    533	dev->rdev.stats.rqt.fail = 0;
    534	dev->rdev.stats.rqt.max = 0;
    535	dev->rdev.stats.rqt.fail = 0;
    536	dev->rdev.stats.ocqp.max = 0;
    537	dev->rdev.stats.ocqp.fail = 0;
    538	dev->rdev.stats.db_full = 0;
    539	dev->rdev.stats.db_empty = 0;
    540	dev->rdev.stats.db_drop = 0;
    541	dev->rdev.stats.db_state_transitions = 0;
    542	dev->rdev.stats.tcam_full = 0;
    543	dev->rdev.stats.act_ofld_conn_fails = 0;
    544	dev->rdev.stats.pas_ofld_conn_fails = 0;
    545	mutex_unlock(&dev->rdev.stats.lock);
    546	return count;
    547}
    548
    549static const struct file_operations stats_debugfs_fops = {
    550	.owner   = THIS_MODULE,
    551	.open    = stats_open,
    552	.release = single_release,
    553	.read    = seq_read,
    554	.llseek  = seq_lseek,
    555	.write   = stats_clear,
    556};
    557
    558static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd)
    559{
    560	int space;
    561	int cc;
    562
    563	space = epd->bufsize - epd->pos - 1;
    564	if (space == 0)
    565		return 1;
    566
    567	if (ep->com.local_addr.ss_family == AF_INET) {
    568		struct sockaddr_in *lsin;
    569		struct sockaddr_in *rsin;
    570		struct sockaddr_in *m_lsin;
    571		struct sockaddr_in *m_rsin;
    572
    573		set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
    574		cc = snprintf(epd->buf + epd->pos, space,
    575			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
    576			      "history 0x%lx hwtid %d atid %d "
    577			      "conn_na %u abort_na %u "
    578			      "%pI4:%d/%d <-> %pI4:%d/%d\n",
    579			      ep, ep->com.cm_id, ep->com.qp,
    580			      (int)ep->com.state, ep->com.flags,
    581			      ep->com.history, ep->hwtid, ep->atid,
    582			      ep->stats.connect_neg_adv,
    583			      ep->stats.abort_neg_adv,
    584			      &lsin->sin_addr, ntohs(lsin->sin_port),
    585			      ntohs(m_lsin->sin_port),
    586			      &rsin->sin_addr, ntohs(rsin->sin_port),
    587			      ntohs(m_rsin->sin_port));
    588	} else {
    589		struct sockaddr_in6 *lsin6;
    590		struct sockaddr_in6 *rsin6;
    591		struct sockaddr_in6 *m_lsin6;
    592		struct sockaddr_in6 *m_rsin6;
    593
    594		set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, &m_rsin6);
    595		cc = snprintf(epd->buf + epd->pos, space,
    596			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
    597			      "history 0x%lx hwtid %d atid %d "
    598			      "conn_na %u abort_na %u "
    599			      "%pI6:%d/%d <-> %pI6:%d/%d\n",
    600			      ep, ep->com.cm_id, ep->com.qp,
    601			      (int)ep->com.state, ep->com.flags,
    602			      ep->com.history, ep->hwtid, ep->atid,
    603			      ep->stats.connect_neg_adv,
    604			      ep->stats.abort_neg_adv,
    605			      &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
    606			      ntohs(m_lsin6->sin6_port),
    607			      &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
    608			      ntohs(m_rsin6->sin6_port));
    609	}
    610	if (cc < space)
    611		epd->pos += cc;
    612	return 0;
    613}
    614
    615static
    616int dump_listen_ep(struct c4iw_listen_ep *ep, struct c4iw_debugfs_data *epd)
    617{
    618	int space;
    619	int cc;
    620
    621	space = epd->bufsize - epd->pos - 1;
    622	if (space == 0)
    623		return 1;
    624
    625	if (ep->com.local_addr.ss_family == AF_INET) {
    626		struct sockaddr_in *lsin = (struct sockaddr_in *)
    627			&ep->com.cm_id->local_addr;
    628		struct sockaddr_in *m_lsin = (struct sockaddr_in *)
    629			&ep->com.cm_id->m_local_addr;
    630
    631		cc = snprintf(epd->buf + epd->pos, space,
    632			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
    633			      "backlog %d %pI4:%d/%d\n",
    634			      ep, ep->com.cm_id, (int)ep->com.state,
    635			      ep->com.flags, ep->stid, ep->backlog,
    636			      &lsin->sin_addr, ntohs(lsin->sin_port),
    637			      ntohs(m_lsin->sin_port));
    638	} else {
    639		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
    640			&ep->com.cm_id->local_addr;
    641		struct sockaddr_in6 *m_lsin6 = (struct sockaddr_in6 *)
    642			&ep->com.cm_id->m_local_addr;
    643
    644		cc = snprintf(epd->buf + epd->pos, space,
    645			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
    646			      "backlog %d %pI6:%d/%d\n",
    647			      ep, ep->com.cm_id, (int)ep->com.state,
    648			      ep->com.flags, ep->stid, ep->backlog,
    649			      &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
    650			      ntohs(m_lsin6->sin6_port));
    651	}
    652	if (cc < space)
    653		epd->pos += cc;
    654	return 0;
    655}
    656
    657static int ep_release(struct inode *inode, struct file *file)
    658{
    659	struct c4iw_debugfs_data *epd = file->private_data;
    660	if (!epd) {
    661		pr_info("%s null qpd?\n", __func__);
    662		return 0;
    663	}
    664	vfree(epd->buf);
    665	kfree(epd);
    666	return 0;
    667}
    668
    669static int ep_open(struct inode *inode, struct file *file)
    670{
    671	struct c4iw_ep *ep;
    672	struct c4iw_listen_ep *lep;
    673	unsigned long index;
    674	struct c4iw_debugfs_data *epd;
    675	int ret = 0;
    676	int count = 1;
    677
    678	epd = kmalloc(sizeof(*epd), GFP_KERNEL);
    679	if (!epd) {
    680		ret = -ENOMEM;
    681		goto out;
    682	}
    683	epd->devp = inode->i_private;
    684	epd->pos = 0;
    685
    686	xa_for_each(&epd->devp->hwtids, index, ep)
    687		count++;
    688	xa_for_each(&epd->devp->atids, index, ep)
    689		count++;
    690	xa_for_each(&epd->devp->stids, index, lep)
    691		count++;
    692
    693	epd->bufsize = count * 240;
    694	epd->buf = vmalloc(epd->bufsize);
    695	if (!epd->buf) {
    696		ret = -ENOMEM;
    697		goto err1;
    698	}
    699
    700	xa_lock_irq(&epd->devp->hwtids);
    701	xa_for_each(&epd->devp->hwtids, index, ep)
    702		dump_ep(ep, epd);
    703	xa_unlock_irq(&epd->devp->hwtids);
    704	xa_lock_irq(&epd->devp->atids);
    705	xa_for_each(&epd->devp->atids, index, ep)
    706		dump_ep(ep, epd);
    707	xa_unlock_irq(&epd->devp->atids);
    708	xa_lock_irq(&epd->devp->stids);
    709	xa_for_each(&epd->devp->stids, index, lep)
    710		dump_listen_ep(lep, epd);
    711	xa_unlock_irq(&epd->devp->stids);
    712
    713	file->private_data = epd;
    714	goto out;
    715err1:
    716	kfree(epd);
    717out:
    718	return ret;
    719}
    720
    721static const struct file_operations ep_debugfs_fops = {
    722	.owner   = THIS_MODULE,
    723	.open    = ep_open,
    724	.release = ep_release,
    725	.read    = debugfs_read,
    726};
    727
    728static void setup_debugfs(struct c4iw_dev *devp)
    729{
    730	debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
    731				 (void *)devp, &qp_debugfs_fops, 4096);
    732
    733	debugfs_create_file_size("stags", S_IWUSR, devp->debugfs_root,
    734				 (void *)devp, &stag_debugfs_fops, 4096);
    735
    736	debugfs_create_file_size("stats", S_IWUSR, devp->debugfs_root,
    737				 (void *)devp, &stats_debugfs_fops, 4096);
    738
    739	debugfs_create_file_size("eps", S_IWUSR, devp->debugfs_root,
    740				 (void *)devp, &ep_debugfs_fops, 4096);
    741
    742	if (c4iw_wr_log)
    743		debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
    744					 (void *)devp, &wr_log_debugfs_fops, 4096);
    745}
    746
    747void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
    748			       struct c4iw_dev_ucontext *uctx)
    749{
    750	struct list_head *pos, *nxt;
    751	struct c4iw_qid_list *entry;
    752
    753	mutex_lock(&uctx->lock);
    754	list_for_each_safe(pos, nxt, &uctx->qpids) {
    755		entry = list_entry(pos, struct c4iw_qid_list, entry);
    756		list_del_init(&entry->entry);
    757		if (!(entry->qid & rdev->qpmask)) {
    758			c4iw_put_resource(&rdev->resource.qid_table,
    759					  entry->qid);
    760			mutex_lock(&rdev->stats.lock);
    761			rdev->stats.qid.cur -= rdev->qpmask + 1;
    762			mutex_unlock(&rdev->stats.lock);
    763		}
    764		kfree(entry);
    765	}
    766
    767	list_for_each_safe(pos, nxt, &uctx->cqids) {
    768		entry = list_entry(pos, struct c4iw_qid_list, entry);
    769		list_del_init(&entry->entry);
    770		kfree(entry);
    771	}
    772	mutex_unlock(&uctx->lock);
    773}
    774
    775void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
    776			    struct c4iw_dev_ucontext *uctx)
    777{
    778	INIT_LIST_HEAD(&uctx->qpids);
    779	INIT_LIST_HEAD(&uctx->cqids);
    780	mutex_init(&uctx->lock);
    781}
    782
    783/* Caller takes care of locking if needed */
    784static int c4iw_rdev_open(struct c4iw_rdev *rdev)
    785{
    786	int err;
    787	unsigned int factor;
    788
    789	c4iw_init_dev_ucontext(rdev, &rdev->uctx);
    790
    791	/*
    792	 * This implementation assumes udb_density == ucq_density!  Eventually
    793	 * we might need to support this but for now fail the open. Also the
    794	 * cqid and qpid range must match for now.
    795	 */
    796	if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
    797		pr_err("%s: unsupported udb/ucq densities %u/%u\n",
    798		       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
    799		       rdev->lldi.ucq_density);
    800		return -EINVAL;
    801	}
    802	if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
    803	    rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
    804		pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n",
    805		       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
    806		       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
    807		       rdev->lldi.vr->cq.size);
    808		return -EINVAL;
    809	}
    810
    811	/* This implementation requires a sge_host_page_size <= PAGE_SIZE. */
    812	if (rdev->lldi.sge_host_page_size > PAGE_SIZE) {
    813		pr_err("%s: unsupported sge host page size %u\n",
    814		       pci_name(rdev->lldi.pdev),
    815		       rdev->lldi.sge_host_page_size);
    816		return -EINVAL;
    817	}
    818
    819	factor = PAGE_SIZE / rdev->lldi.sge_host_page_size;
    820	rdev->qpmask = (rdev->lldi.udb_density * factor) - 1;
    821	rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1;
    822
    823	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
    824		 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
    825		 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
    826		 rdev->lldi.vr->pbl.start,
    827		 rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
    828		 rdev->lldi.vr->rq.size,
    829		 rdev->lldi.vr->qp.start,
    830		 rdev->lldi.vr->qp.size,
    831		 rdev->lldi.vr->cq.start,
    832		 rdev->lldi.vr->cq.size,
    833		 rdev->lldi.vr->srq.size);
    834	pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
    835		 &rdev->lldi.pdev->resource[2],
    836		 rdev->lldi.db_reg, rdev->lldi.gts_reg,
    837		 rdev->qpmask, rdev->cqmask);
    838
    839	if (c4iw_num_stags(rdev) == 0)
    840		return -EINVAL;
    841
    842	rdev->stats.pd.total = T4_MAX_NUM_PD;
    843	rdev->stats.stag.total = rdev->lldi.vr->stag.size;
    844	rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
    845	rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
    846	rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
    847	rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
    848	rdev->stats.qid.total = rdev->lldi.vr->qp.size;
    849
    850	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
    851				 T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
    852	if (err) {
    853		pr_err("error %d initializing resources\n", err);
    854		return err;
    855	}
    856	err = c4iw_pblpool_create(rdev);
    857	if (err) {
    858		pr_err("error %d initializing pbl pool\n", err);
    859		goto destroy_resource;
    860	}
    861	err = c4iw_rqtpool_create(rdev);
    862	if (err) {
    863		pr_err("error %d initializing rqt pool\n", err);
    864		goto destroy_pblpool;
    865	}
    866	err = c4iw_ocqp_pool_create(rdev);
    867	if (err) {
    868		pr_err("error %d initializing ocqp pool\n", err);
    869		goto destroy_rqtpool;
    870	}
    871	rdev->status_page = (struct t4_dev_status_page *)
    872			    __get_free_page(GFP_KERNEL);
    873	if (!rdev->status_page) {
    874		err = -ENOMEM;
    875		goto destroy_ocqp_pool;
    876	}
    877	rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
    878	rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
    879	rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
    880	rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
    881	rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support;
    882
    883	if (c4iw_wr_log) {
    884		rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order,
    885				       sizeof(*rdev->wr_log),
    886				       GFP_KERNEL);
    887		if (rdev->wr_log) {
    888			rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
    889			atomic_set(&rdev->wr_log_idx, 0);
    890		}
    891	}
    892
    893	rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
    894	if (!rdev->free_workq) {
    895		err = -ENOMEM;
    896		goto err_free_status_page_and_wr_log;
    897	}
    898
    899	rdev->status_page->db_off = 0;
    900
    901	init_completion(&rdev->rqt_compl);
    902	init_completion(&rdev->pbl_compl);
    903	kref_init(&rdev->rqt_kref);
    904	kref_init(&rdev->pbl_kref);
    905
    906	return 0;
    907err_free_status_page_and_wr_log:
    908	if (c4iw_wr_log && rdev->wr_log)
    909		kfree(rdev->wr_log);
    910	free_page((unsigned long)rdev->status_page);
    911destroy_ocqp_pool:
    912	c4iw_ocqp_pool_destroy(rdev);
    913destroy_rqtpool:
    914	c4iw_rqtpool_destroy(rdev);
    915destroy_pblpool:
    916	c4iw_pblpool_destroy(rdev);
    917destroy_resource:
    918	c4iw_destroy_resource(&rdev->resource);
    919	return err;
    920}
    921
    922static void c4iw_rdev_close(struct c4iw_rdev *rdev)
    923{
    924	kfree(rdev->wr_log);
    925	c4iw_release_dev_ucontext(rdev, &rdev->uctx);
    926	free_page((unsigned long)rdev->status_page);
    927	c4iw_pblpool_destroy(rdev);
    928	c4iw_rqtpool_destroy(rdev);
    929	wait_for_completion(&rdev->pbl_compl);
    930	wait_for_completion(&rdev->rqt_compl);
    931	c4iw_ocqp_pool_destroy(rdev);
    932	destroy_workqueue(rdev->free_workq);
    933	c4iw_destroy_resource(&rdev->resource);
    934}
    935
    936void c4iw_dealloc(struct uld_ctx *ctx)
    937{
    938	c4iw_rdev_close(&ctx->dev->rdev);
    939	WARN_ON(!xa_empty(&ctx->dev->cqs));
    940	WARN_ON(!xa_empty(&ctx->dev->qps));
    941	WARN_ON(!xa_empty(&ctx->dev->mrs));
    942	wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids));
    943	WARN_ON(!xa_empty(&ctx->dev->stids));
    944	WARN_ON(!xa_empty(&ctx->dev->atids));
    945	if (ctx->dev->rdev.bar2_kva)
    946		iounmap(ctx->dev->rdev.bar2_kva);
    947	if (ctx->dev->rdev.oc_mw_kva)
    948		iounmap(ctx->dev->rdev.oc_mw_kva);
    949	ib_dealloc_device(&ctx->dev->ibdev);
    950	ctx->dev = NULL;
    951}
    952
    953static void c4iw_remove(struct uld_ctx *ctx)
    954{
    955	pr_debug("c4iw_dev %p\n", ctx->dev);
    956	debugfs_remove_recursive(ctx->dev->debugfs_root);
    957	c4iw_unregister_device(ctx->dev);
    958	c4iw_dealloc(ctx);
    959}
    960
    961static int rdma_supported(const struct cxgb4_lld_info *infop)
    962{
    963	return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
    964	       infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
    965	       infop->vr->cq.size > 0;
    966}
    967
    968static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
    969{
    970	struct c4iw_dev *devp;
    971	int ret;
    972
    973	if (!rdma_supported(infop)) {
    974		pr_info("%s: RDMA not supported on this device\n",
    975			pci_name(infop->pdev));
    976		return ERR_PTR(-ENOSYS);
    977	}
    978	if (!ocqp_supported(infop))
    979		pr_info("%s: On-Chip Queues not supported on this device\n",
    980			pci_name(infop->pdev));
    981
    982	devp = ib_alloc_device(c4iw_dev, ibdev);
    983	if (!devp) {
    984		pr_err("Cannot allocate ib device\n");
    985		return ERR_PTR(-ENOMEM);
    986	}
    987	devp->rdev.lldi = *infop;
    988
    989	/* init various hw-queue params based on lld info */
    990	pr_debug("Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
    991		 devp->rdev.lldi.sge_ingpadboundary,
    992		 devp->rdev.lldi.sge_egrstatuspagesize);
    993
    994	devp->rdev.hw_queue.t4_eq_status_entries =
    995		devp->rdev.lldi.sge_egrstatuspagesize / 64;
    996	devp->rdev.hw_queue.t4_max_eq_size = 65520;
    997	devp->rdev.hw_queue.t4_max_iq_size = 65520;
    998	devp->rdev.hw_queue.t4_max_rq_size = 8192 -
    999		devp->rdev.hw_queue.t4_eq_status_entries - 1;
   1000	devp->rdev.hw_queue.t4_max_sq_size =
   1001		devp->rdev.hw_queue.t4_max_eq_size -
   1002		devp->rdev.hw_queue.t4_eq_status_entries - 1;
   1003	devp->rdev.hw_queue.t4_max_qp_depth =
   1004		devp->rdev.hw_queue.t4_max_rq_size;
   1005	devp->rdev.hw_queue.t4_max_cq_depth =
   1006		devp->rdev.hw_queue.t4_max_iq_size - 2;
   1007	devp->rdev.hw_queue.t4_stat_len =
   1008		devp->rdev.lldi.sge_egrstatuspagesize;
   1009
   1010	/*
   1011	 * For T5/T6 devices, we map all of BAR2 with WC.
   1012	 * For T4 devices with onchip qp mem, we map only that part
   1013	 * of BAR2 with WC.
   1014	 */
   1015	devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
   1016	if (!is_t4(devp->rdev.lldi.adapter_type)) {
   1017		devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
   1018			pci_resource_len(devp->rdev.lldi.pdev, 2));
   1019		if (!devp->rdev.bar2_kva) {
   1020			pr_err("Unable to ioremap BAR2\n");
   1021			ib_dealloc_device(&devp->ibdev);
   1022			return ERR_PTR(-EINVAL);
   1023		}
   1024	} else if (ocqp_supported(infop)) {
   1025		devp->rdev.oc_mw_pa =
   1026			pci_resource_start(devp->rdev.lldi.pdev, 2) +
   1027			pci_resource_len(devp->rdev.lldi.pdev, 2) -
   1028			roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
   1029		devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
   1030			devp->rdev.lldi.vr->ocq.size);
   1031		if (!devp->rdev.oc_mw_kva) {
   1032			pr_err("Unable to ioremap onchip mem\n");
   1033			ib_dealloc_device(&devp->ibdev);
   1034			return ERR_PTR(-EINVAL);
   1035		}
   1036	}
   1037
   1038	pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
   1039		 devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
   1040		 devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
   1041
   1042	ret = c4iw_rdev_open(&devp->rdev);
   1043	if (ret) {
   1044		pr_err("Unable to open CXIO rdev err %d\n", ret);
   1045		ib_dealloc_device(&devp->ibdev);
   1046		return ERR_PTR(ret);
   1047	}
   1048
   1049	xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ);
   1050	xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ);
   1051	xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ);
   1052	xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ);
   1053	xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ);
   1054	xa_init_flags(&devp->stids, XA_FLAGS_LOCK_IRQ);
   1055	mutex_init(&devp->rdev.stats.lock);
   1056	mutex_init(&devp->db_mutex);
   1057	INIT_LIST_HEAD(&devp->db_fc_list);
   1058	init_waitqueue_head(&devp->wait);
   1059	devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
   1060
   1061	if (c4iw_debugfs_root) {
   1062		devp->debugfs_root = debugfs_create_dir(
   1063					pci_name(devp->rdev.lldi.pdev),
   1064					c4iw_debugfs_root);
   1065		setup_debugfs(devp);
   1066	}
   1067
   1068
   1069	return devp;
   1070}
   1071
   1072static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
   1073{
   1074	struct uld_ctx *ctx;
   1075	static int vers_printed;
   1076	int i;
   1077
   1078	if (!vers_printed++)
   1079		pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
   1080			DRV_VERSION);
   1081
   1082	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
   1083	if (!ctx) {
   1084		ctx = ERR_PTR(-ENOMEM);
   1085		goto out;
   1086	}
   1087	ctx->lldi = *infop;
   1088
   1089	pr_debug("found device %s nchan %u nrxq %u ntxq %u nports %u\n",
   1090		 pci_name(ctx->lldi.pdev),
   1091		 ctx->lldi.nchan, ctx->lldi.nrxq,
   1092		 ctx->lldi.ntxq, ctx->lldi.nports);
   1093
   1094	mutex_lock(&dev_mutex);
   1095	list_add_tail(&ctx->entry, &uld_ctx_list);
   1096	mutex_unlock(&dev_mutex);
   1097
   1098	for (i = 0; i < ctx->lldi.nrxq; i++)
   1099		pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
   1100out:
   1101	return ctx;
   1102}
   1103
   1104static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
   1105						 const __be64 *rsp,
   1106						 u32 pktshift)
   1107{
   1108	struct sk_buff *skb;
   1109
   1110	/*
   1111	 * Allocate space for cpl_pass_accept_req which will be synthesized by
   1112	 * driver. Once the driver synthesizes the request the skb will go
   1113	 * through the regular cpl_pass_accept_req processing.
   1114	 * The math here assumes sizeof cpl_pass_accept_req >= sizeof
   1115	 * cpl_rx_pkt.
   1116	 */
   1117	skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
   1118			sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
   1119	if (unlikely(!skb))
   1120		return NULL;
   1121
   1122	__skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
   1123		  sizeof(struct rss_header) - pktshift);
   1124
   1125	/*
   1126	 * This skb will contain:
   1127	 *   rss_header from the rspq descriptor (1 flit)
   1128	 *   cpl_rx_pkt struct from the rspq descriptor (2 flits)
   1129	 *   space for the difference between the size of an
   1130	 *      rx_pkt and pass_accept_req cpl (1 flit)
   1131	 *   the packet data from the gl
   1132	 */
   1133	skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
   1134				sizeof(struct rss_header));
   1135	skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
   1136				       sizeof(struct cpl_pass_accept_req),
   1137				       gl->va + pktshift,
   1138				       gl->tot_len - pktshift);
   1139	return skb;
   1140}
   1141
   1142static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
   1143			   const __be64 *rsp)
   1144{
   1145	unsigned int opcode = *(u8 *)rsp;
   1146	struct sk_buff *skb;
   1147
   1148	if (opcode != CPL_RX_PKT)
   1149		goto out;
   1150
   1151	skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
   1152	if (skb == NULL)
   1153		goto out;
   1154
   1155	if (c4iw_handlers[opcode] == NULL) {
   1156		pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
   1157		kfree_skb(skb);
   1158		goto out;
   1159	}
   1160	c4iw_handlers[opcode](dev, skb);
   1161	return 1;
   1162out:
   1163	return 0;
   1164}
   1165
   1166static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
   1167			const struct pkt_gl *gl)
   1168{
   1169	struct uld_ctx *ctx = handle;
   1170	struct c4iw_dev *dev = ctx->dev;
   1171	struct sk_buff *skb;
   1172	u8 opcode;
   1173
   1174	if (gl == NULL) {
   1175		/* omit RSS and rsp_ctrl at end of descriptor */
   1176		unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
   1177
   1178		skb = alloc_skb(256, GFP_ATOMIC);
   1179		if (!skb)
   1180			goto nomem;
   1181		__skb_put(skb, len);
   1182		skb_copy_to_linear_data(skb, &rsp[1], len);
   1183	} else if (gl == CXGB4_MSG_AN) {
   1184		const struct rsp_ctrl *rc = (void *)rsp;
   1185
   1186		u32 qid = be32_to_cpu(rc->pldbuflen_qid);
   1187		c4iw_ev_handler(dev, qid);
   1188		return 0;
   1189	} else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
   1190		if (recv_rx_pkt(dev, gl, rsp))
   1191			return 0;
   1192
   1193		pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n",
   1194			pci_name(ctx->lldi.pdev), gl->va,
   1195			be64_to_cpu(*rsp),
   1196			be64_to_cpu(*(__force __be64 *)gl->va),
   1197			gl->tot_len);
   1198
   1199		return 0;
   1200	} else {
   1201		skb = cxgb4_pktgl_to_skb(gl, 128, 128);
   1202		if (unlikely(!skb))
   1203			goto nomem;
   1204	}
   1205
   1206	opcode = *(u8 *)rsp;
   1207	if (c4iw_handlers[opcode]) {
   1208		c4iw_handlers[opcode](dev, skb);
   1209	} else {
   1210		pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
   1211		kfree_skb(skb);
   1212	}
   1213
   1214	return 0;
   1215nomem:
   1216	return -1;
   1217}
   1218
   1219static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
   1220{
   1221	struct uld_ctx *ctx = handle;
   1222
   1223	pr_debug("new_state %u\n", new_state);
   1224	switch (new_state) {
   1225	case CXGB4_STATE_UP:
   1226		pr_info("%s: Up\n", pci_name(ctx->lldi.pdev));
   1227		if (!ctx->dev) {
   1228			ctx->dev = c4iw_alloc(&ctx->lldi);
   1229			if (IS_ERR(ctx->dev)) {
   1230				pr_err("%s: initialization failed: %ld\n",
   1231				       pci_name(ctx->lldi.pdev),
   1232				       PTR_ERR(ctx->dev));
   1233				ctx->dev = NULL;
   1234				break;
   1235			}
   1236
   1237			INIT_WORK(&ctx->reg_work, c4iw_register_device);
   1238			queue_work(reg_workq, &ctx->reg_work);
   1239		}
   1240		break;
   1241	case CXGB4_STATE_DOWN:
   1242		pr_info("%s: Down\n", pci_name(ctx->lldi.pdev));
   1243		if (ctx->dev)
   1244			c4iw_remove(ctx);
   1245		break;
   1246	case CXGB4_STATE_FATAL_ERROR:
   1247	case CXGB4_STATE_START_RECOVERY:
   1248		pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
   1249		if (ctx->dev) {
   1250			struct ib_event event = {};
   1251
   1252			ctx->dev->rdev.flags |= T4_FATAL_ERROR;
   1253			event.event  = IB_EVENT_DEVICE_FATAL;
   1254			event.device = &ctx->dev->ibdev;
   1255			ib_dispatch_event(&event);
   1256			c4iw_remove(ctx);
   1257		}
   1258		break;
   1259	case CXGB4_STATE_DETACH:
   1260		pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev));
   1261		if (ctx->dev)
   1262			c4iw_remove(ctx);
   1263		break;
   1264	}
   1265	return 0;
   1266}
   1267
   1268static void stop_queues(struct uld_ctx *ctx)
   1269{
   1270	struct c4iw_qp *qp;
   1271	unsigned long index, flags;
   1272
   1273	xa_lock_irqsave(&ctx->dev->qps, flags);
   1274	ctx->dev->rdev.stats.db_state_transitions++;
   1275	ctx->dev->db_state = STOPPED;
   1276	if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
   1277		xa_for_each(&ctx->dev->qps, index, qp)
   1278			t4_disable_wq_db(&qp->wq);
   1279	} else {
   1280		ctx->dev->rdev.status_page->db_off = 1;
   1281	}
   1282	xa_unlock_irqrestore(&ctx->dev->qps, flags);
   1283}
   1284
   1285static void resume_rc_qp(struct c4iw_qp *qp)
   1286{
   1287	spin_lock(&qp->lock);
   1288	t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
   1289	qp->wq.sq.wq_pidx_inc = 0;
   1290	t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
   1291	qp->wq.rq.wq_pidx_inc = 0;
   1292	spin_unlock(&qp->lock);
   1293}
   1294
   1295static void resume_a_chunk(struct uld_ctx *ctx)
   1296{
   1297	int i;
   1298	struct c4iw_qp *qp;
   1299
   1300	for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
   1301		qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
   1302				      db_fc_entry);
   1303		list_del_init(&qp->db_fc_entry);
   1304		resume_rc_qp(qp);
   1305		if (list_empty(&ctx->dev->db_fc_list))
   1306			break;
   1307	}
   1308}
   1309
   1310static void resume_queues(struct uld_ctx *ctx)
   1311{
   1312	xa_lock_irq(&ctx->dev->qps);
   1313	if (ctx->dev->db_state != STOPPED)
   1314		goto out;
   1315	ctx->dev->db_state = FLOW_CONTROL;
   1316	while (1) {
   1317		if (list_empty(&ctx->dev->db_fc_list)) {
   1318			struct c4iw_qp *qp;
   1319			unsigned long index;
   1320
   1321			WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
   1322			ctx->dev->db_state = NORMAL;
   1323			ctx->dev->rdev.stats.db_state_transitions++;
   1324			if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
   1325				xa_for_each(&ctx->dev->qps, index, qp)
   1326					t4_enable_wq_db(&qp->wq);
   1327			} else {
   1328				ctx->dev->rdev.status_page->db_off = 0;
   1329			}
   1330			break;
   1331		} else {
   1332			if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
   1333			    < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
   1334			       DB_FC_DRAIN_THRESH)) {
   1335				resume_a_chunk(ctx);
   1336			}
   1337			if (!list_empty(&ctx->dev->db_fc_list)) {
   1338				xa_unlock_irq(&ctx->dev->qps);
   1339				if (DB_FC_RESUME_DELAY) {
   1340					set_current_state(TASK_UNINTERRUPTIBLE);
   1341					schedule_timeout(DB_FC_RESUME_DELAY);
   1342				}
   1343				xa_lock_irq(&ctx->dev->qps);
   1344				if (ctx->dev->db_state != FLOW_CONTROL)
   1345					break;
   1346			}
   1347		}
   1348	}
   1349out:
   1350	if (ctx->dev->db_state != NORMAL)
   1351		ctx->dev->rdev.stats.db_fc_interruptions++;
   1352	xa_unlock_irq(&ctx->dev->qps);
   1353}
   1354
   1355struct qp_list {
   1356	unsigned idx;
   1357	struct c4iw_qp **qps;
   1358};
   1359
   1360static void deref_qps(struct qp_list *qp_list)
   1361{
   1362	int idx;
   1363
   1364	for (idx = 0; idx < qp_list->idx; idx++)
   1365		c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
   1366}
   1367
   1368static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
   1369{
   1370	int idx;
   1371	int ret;
   1372
   1373	for (idx = 0; idx < qp_list->idx; idx++) {
   1374		struct c4iw_qp *qp = qp_list->qps[idx];
   1375
   1376		xa_lock_irq(&qp->rhp->qps);
   1377		spin_lock(&qp->lock);
   1378		ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
   1379					  qp->wq.sq.qid,
   1380					  t4_sq_host_wq_pidx(&qp->wq),
   1381					  t4_sq_wq_size(&qp->wq));
   1382		if (ret) {
   1383			pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
   1384			       pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
   1385			spin_unlock(&qp->lock);
   1386			xa_unlock_irq(&qp->rhp->qps);
   1387			return;
   1388		}
   1389		qp->wq.sq.wq_pidx_inc = 0;
   1390
   1391		ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
   1392					  qp->wq.rq.qid,
   1393					  t4_rq_host_wq_pidx(&qp->wq),
   1394					  t4_rq_wq_size(&qp->wq));
   1395
   1396		if (ret) {
   1397			pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
   1398			       pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
   1399			spin_unlock(&qp->lock);
   1400			xa_unlock_irq(&qp->rhp->qps);
   1401			return;
   1402		}
   1403		qp->wq.rq.wq_pidx_inc = 0;
   1404		spin_unlock(&qp->lock);
   1405		xa_unlock_irq(&qp->rhp->qps);
   1406
   1407		/* Wait for the dbfifo to drain */
   1408		while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
   1409			set_current_state(TASK_UNINTERRUPTIBLE);
   1410			schedule_timeout(usecs_to_jiffies(10));
   1411		}
   1412	}
   1413}
   1414
   1415static void recover_queues(struct uld_ctx *ctx)
   1416{
   1417	struct c4iw_qp *qp;
   1418	unsigned long index;
   1419	int count = 0;
   1420	struct qp_list qp_list;
   1421	int ret;
   1422
   1423	/* slow everybody down */
   1424	set_current_state(TASK_UNINTERRUPTIBLE);
   1425	schedule_timeout(usecs_to_jiffies(1000));
   1426
   1427	/* flush the SGE contexts */
   1428	ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
   1429	if (ret) {
   1430		pr_err("%s: Fatal error - DB overflow recovery failed\n",
   1431		       pci_name(ctx->lldi.pdev));
   1432		return;
   1433	}
   1434
   1435	/* Count active queues so we can build a list of queues to recover */
   1436	xa_lock_irq(&ctx->dev->qps);
   1437	WARN_ON(ctx->dev->db_state != STOPPED);
   1438	ctx->dev->db_state = RECOVERY;
   1439	xa_for_each(&ctx->dev->qps, index, qp)
   1440		count++;
   1441
   1442	qp_list.qps = kcalloc(count, sizeof(*qp_list.qps), GFP_ATOMIC);
   1443	if (!qp_list.qps) {
   1444		xa_unlock_irq(&ctx->dev->qps);
   1445		return;
   1446	}
   1447	qp_list.idx = 0;
   1448
   1449	/* add and ref each qp so it doesn't get freed */
   1450	xa_for_each(&ctx->dev->qps, index, qp) {
   1451		c4iw_qp_add_ref(&qp->ibqp);
   1452		qp_list.qps[qp_list.idx++] = qp;
   1453	}
   1454
   1455	xa_unlock_irq(&ctx->dev->qps);
   1456
   1457	/* now traverse the list in a safe context to recover the db state*/
   1458	recover_lost_dbs(ctx, &qp_list);
   1459
   1460	/* we're almost done!  deref the qps and clean up */
   1461	deref_qps(&qp_list);
   1462	kfree(qp_list.qps);
   1463
   1464	xa_lock_irq(&ctx->dev->qps);
   1465	WARN_ON(ctx->dev->db_state != RECOVERY);
   1466	ctx->dev->db_state = STOPPED;
   1467	xa_unlock_irq(&ctx->dev->qps);
   1468}
   1469
   1470static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
   1471{
   1472	struct uld_ctx *ctx = handle;
   1473
   1474	switch (control) {
   1475	case CXGB4_CONTROL_DB_FULL:
   1476		stop_queues(ctx);
   1477		ctx->dev->rdev.stats.db_full++;
   1478		break;
   1479	case CXGB4_CONTROL_DB_EMPTY:
   1480		resume_queues(ctx);
   1481		mutex_lock(&ctx->dev->rdev.stats.lock);
   1482		ctx->dev->rdev.stats.db_empty++;
   1483		mutex_unlock(&ctx->dev->rdev.stats.lock);
   1484		break;
   1485	case CXGB4_CONTROL_DB_DROP:
   1486		recover_queues(ctx);
   1487		mutex_lock(&ctx->dev->rdev.stats.lock);
   1488		ctx->dev->rdev.stats.db_drop++;
   1489		mutex_unlock(&ctx->dev->rdev.stats.lock);
   1490		break;
   1491	default:
   1492		pr_warn("%s: unknown control cmd %u\n",
   1493			pci_name(ctx->lldi.pdev), control);
   1494		break;
   1495	}
   1496	return 0;
   1497}
   1498
   1499static struct cxgb4_uld_info c4iw_uld_info = {
   1500	.name = DRV_NAME,
   1501	.nrxq = MAX_ULD_QSETS,
   1502	.ntxq = MAX_ULD_QSETS,
   1503	.rxq_size = 511,
   1504	.ciq = true,
   1505	.lro = false,
   1506	.add = c4iw_uld_add,
   1507	.rx_handler = c4iw_uld_rx_handler,
   1508	.state_change = c4iw_uld_state_change,
   1509	.control = c4iw_uld_control,
   1510};
   1511
   1512void _c4iw_free_wr_wait(struct kref *kref)
   1513{
   1514	struct c4iw_wr_wait *wr_waitp;
   1515
   1516	wr_waitp = container_of(kref, struct c4iw_wr_wait, kref);
   1517	pr_debug("Free wr_wait %p\n", wr_waitp);
   1518	kfree(wr_waitp);
   1519}
   1520
   1521struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp)
   1522{
   1523	struct c4iw_wr_wait *wr_waitp;
   1524
   1525	wr_waitp = kzalloc(sizeof(*wr_waitp), gfp);
   1526	if (wr_waitp) {
   1527		kref_init(&wr_waitp->kref);
   1528		pr_debug("wr_wait %p\n", wr_waitp);
   1529	}
   1530	return wr_waitp;
   1531}
   1532
   1533static int __init c4iw_init_module(void)
   1534{
   1535	int err;
   1536
   1537	err = c4iw_cm_init();
   1538	if (err)
   1539		return err;
   1540
   1541	c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
   1542
   1543	reg_workq = create_singlethread_workqueue("Register_iWARP_device");
   1544	if (!reg_workq) {
   1545		pr_err("Failed creating workqueue to register iwarp device\n");
   1546		return -ENOMEM;
   1547	}
   1548
   1549	cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
   1550
   1551	return 0;
   1552}
   1553
   1554static void __exit c4iw_exit_module(void)
   1555{
   1556	struct uld_ctx *ctx, *tmp;
   1557
   1558	mutex_lock(&dev_mutex);
   1559	list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
   1560		if (ctx->dev)
   1561			c4iw_remove(ctx);
   1562		kfree(ctx);
   1563	}
   1564	mutex_unlock(&dev_mutex);
   1565	destroy_workqueue(reg_workq);
   1566	cxgb4_unregister_uld(CXGB4_ULD_RDMA);
   1567	c4iw_cm_term();
   1568	debugfs_remove_recursive(c4iw_debugfs_root);
   1569}
   1570
   1571module_init(c4iw_init_module);
   1572module_exit(c4iw_exit_module);