ud.c (28695B)
1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2/* 3 * Copyright(c) 2015 - 2019 Intel Corporation. 4 */ 5 6#include <linux/net.h> 7#include <rdma/ib_smi.h> 8 9#include "hfi.h" 10#include "mad.h" 11#include "verbs_txreq.h" 12#include "trace_ibhdrs.h" 13#include "qp.h" 14 15/* We support only two types - 9B and 16B for now */ 16static const hfi1_make_req hfi1_make_ud_req_tbl[2] = { 17 [HFI1_PKT_TYPE_9B] = &hfi1_make_ud_req_9B, 18 [HFI1_PKT_TYPE_16B] = &hfi1_make_ud_req_16B 19}; 20 21/** 22 * ud_loopback - handle send on loopback QPs 23 * @sqp: the sending QP 24 * @swqe: the send work request 25 * 26 * This is called from hfi1_make_ud_req() to forward a WQE addressed 27 * to the same HFI. 28 * Note that the receive interrupt handler may be calling hfi1_ud_rcv() 29 * while this is being called. 30 */ 31static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) 32{ 33 struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); 34 struct hfi1_pportdata *ppd; 35 struct hfi1_qp_priv *priv = sqp->priv; 36 struct rvt_qp *qp; 37 struct rdma_ah_attr *ah_attr; 38 unsigned long flags; 39 struct rvt_sge_state ssge; 40 struct rvt_sge *sge; 41 struct ib_wc wc; 42 u32 length; 43 enum ib_qp_type sqptype, dqptype; 44 45 rcu_read_lock(); 46 47 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, 48 rvt_get_swqe_remote_qpn(swqe)); 49 if (!qp) { 50 ibp->rvp.n_pkt_drops++; 51 rcu_read_unlock(); 52 return; 53 } 54 55 sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? 56 IB_QPT_UD : sqp->ibqp.qp_type; 57 dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? 58 IB_QPT_UD : qp->ibqp.qp_type; 59 60 if (dqptype != sqptype || 61 !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { 62 ibp->rvp.n_pkt_drops++; 63 goto drop; 64 } 65 66 ah_attr = rvt_get_swqe_ah_attr(swqe); 67 ppd = ppd_from_ibp(ibp); 68 69 if (qp->ibqp.qp_num > 1) { 70 u16 pkey; 71 u32 slid; 72 u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; 73 74 pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index); 75 slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & 76 ((1 << ppd->lmc) - 1)); 77 if (unlikely(ingress_pkey_check(ppd, pkey, sc5, 78 qp->s_pkey_index, 79 slid, false))) { 80 hfi1_bad_pkey(ibp, pkey, 81 rdma_ah_get_sl(ah_attr), 82 sqp->ibqp.qp_num, qp->ibqp.qp_num, 83 slid, rdma_ah_get_dlid(ah_attr)); 84 goto drop; 85 } 86 } 87 88 /* 89 * Check that the qkey matches (except for QP0, see 9.6.1.4.1). 90 * Qkeys with the high order bit set mean use the 91 * qkey from the QP context instead of the WR (see 10.2.5). 92 */ 93 if (qp->ibqp.qp_num) { 94 u32 qkey; 95 96 qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ? 97 sqp->qkey : rvt_get_swqe_remote_qkey(swqe); 98 if (unlikely(qkey != qp->qkey)) 99 goto drop; /* silently drop per IBTA spec */ 100 } 101 102 /* 103 * A GRH is expected to precede the data even if not 104 * present on the wire. 105 */ 106 length = swqe->length; 107 memset(&wc, 0, sizeof(wc)); 108 wc.byte_len = length + sizeof(struct ib_grh); 109 110 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 111 wc.wc_flags = IB_WC_WITH_IMM; 112 wc.ex.imm_data = swqe->wr.ex.imm_data; 113 } 114 115 spin_lock_irqsave(&qp->r_lock, flags); 116 117 /* 118 * Get the next work request entry to find where to put the data. 119 */ 120 if (qp->r_flags & RVT_R_REUSE_SGE) { 121 qp->r_flags &= ~RVT_R_REUSE_SGE; 122 } else { 123 int ret; 124 125 ret = rvt_get_rwqe(qp, false); 126 if (ret < 0) { 127 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 128 goto bail_unlock; 129 } 130 if (!ret) { 131 if (qp->ibqp.qp_num == 0) 132 ibp->rvp.n_vl15_dropped++; 133 goto bail_unlock; 134 } 135 } 136 /* Silently drop packets which are too big. */ 137 if (unlikely(wc.byte_len > qp->r_len)) { 138 qp->r_flags |= RVT_R_REUSE_SGE; 139 ibp->rvp.n_pkt_drops++; 140 goto bail_unlock; 141 } 142 143 if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { 144 struct ib_grh grh; 145 struct ib_global_route grd = *(rdma_ah_read_grh(ah_attr)); 146 147 /* 148 * For loopback packets with extended LIDs, the 149 * sgid_index in the GRH is 0 and the dgid is 150 * OPA GID of the sender. While creating a response 151 * to the loopback packet, IB core creates the new 152 * sgid_index from the DGID and that will be the 153 * OPA_GID_INDEX. The new dgid is from the sgid 154 * index and that will be in the IB GID format. 155 * 156 * We now have a case where the sent packet had a 157 * different sgid_index and dgid compared to the 158 * one that was received in response. 159 * 160 * Fix this inconsistency. 161 */ 162 if (priv->hdr_type == HFI1_PKT_TYPE_16B) { 163 if (grd.sgid_index == 0) 164 grd.sgid_index = OPA_GID_INDEX; 165 166 if (ib_is_opa_gid(&grd.dgid)) 167 grd.dgid.global.interface_id = 168 cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]); 169 } 170 171 hfi1_make_grh(ibp, &grh, &grd, 0, 0); 172 rvt_copy_sge(qp, &qp->r_sge, &grh, 173 sizeof(grh), true, false); 174 wc.wc_flags |= IB_WC_GRH; 175 } else { 176 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 177 } 178 ssge.sg_list = swqe->sg_list + 1; 179 ssge.sge = *swqe->sg_list; 180 ssge.num_sge = swqe->wr.num_sge; 181 sge = &ssge.sge; 182 while (length) { 183 u32 len = rvt_get_sge_length(sge, length); 184 185 WARN_ON_ONCE(len == 0); 186 rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); 187 rvt_update_sge(&ssge, len, false); 188 length -= len; 189 } 190 rvt_put_ss(&qp->r_sge); 191 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 192 goto bail_unlock; 193 wc.wr_id = qp->r_wr_id; 194 wc.status = IB_WC_SUCCESS; 195 wc.opcode = IB_WC_RECV; 196 wc.qp = &qp->ibqp; 197 wc.src_qp = sqp->ibqp.qp_num; 198 if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) { 199 if (sqp->ibqp.qp_type == IB_QPT_GSI || 200 sqp->ibqp.qp_type == IB_QPT_SMI) 201 wc.pkey_index = rvt_get_swqe_pkey_index(swqe); 202 else 203 wc.pkey_index = sqp->s_pkey_index; 204 } else { 205 wc.pkey_index = 0; 206 } 207 wc.slid = (ppd->lid | (rdma_ah_get_path_bits(ah_attr) & 208 ((1 << ppd->lmc) - 1))) & U16_MAX; 209 /* Check for loopback when the port lid is not set */ 210 if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) 211 wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); 212 wc.sl = rdma_ah_get_sl(ah_attr); 213 wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1); 214 wc.port_num = qp->port_num; 215 /* Signal completion event if the solicited bit is set. */ 216 rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); 217 ibp->rvp.n_loop_pkts++; 218bail_unlock: 219 spin_unlock_irqrestore(&qp->r_lock, flags); 220drop: 221 rcu_read_unlock(); 222} 223 224static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe, 225 struct ib_other_headers *ohdr, 226 u16 *pkey, u32 extra_bytes, bool bypass) 227{ 228 u32 bth0; 229 struct hfi1_ibport *ibp; 230 231 ibp = to_iport(qp->ibqp.device, qp->port_num); 232 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 233 ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; 234 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; 235 } else { 236 bth0 = IB_OPCODE_UD_SEND_ONLY << 24; 237 } 238 239 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 240 bth0 |= IB_BTH_SOLICITED; 241 bth0 |= extra_bytes << 20; 242 if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) 243 *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe)); 244 else 245 *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); 246 if (!bypass) 247 bth0 |= *pkey; 248 ohdr->bth[0] = cpu_to_be32(bth0); 249 ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe)); 250 ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); 251 /* 252 * Qkeys with the high order bit set mean use the 253 * qkey from the QP context instead of the WR (see 10.2.5). 254 */ 255 ohdr->u.ud.deth[0] = 256 cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey : 257 rvt_get_swqe_remote_qkey(wqe)); 258 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 259} 260 261void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 262 struct rvt_swqe *wqe) 263{ 264 u32 nwords, extra_bytes; 265 u16 len, slid, dlid, pkey; 266 u16 lrh0 = 0; 267 u8 sc5; 268 struct hfi1_qp_priv *priv = qp->priv; 269 struct ib_other_headers *ohdr; 270 struct rdma_ah_attr *ah_attr; 271 struct hfi1_pportdata *ppd; 272 struct hfi1_ibport *ibp; 273 struct ib_grh *grh; 274 275 ibp = to_iport(qp->ibqp.device, qp->port_num); 276 ppd = ppd_from_ibp(ibp); 277 ah_attr = rvt_get_swqe_ah_attr(wqe); 278 279 extra_bytes = -wqe->length & 3; 280 nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC; 281 /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ 282 ps->s_txreq->hdr_dwords = 7; 283 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 284 ps->s_txreq->hdr_dwords++; 285 286 if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { 287 grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; 288 ps->s_txreq->hdr_dwords += 289 hfi1_make_grh(ibp, grh, rdma_ah_read_grh(ah_attr), 290 ps->s_txreq->hdr_dwords - LRH_9B_DWORDS, 291 nwords); 292 lrh0 = HFI1_LRH_GRH; 293 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; 294 } else { 295 lrh0 = HFI1_LRH_BTH; 296 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; 297 } 298 299 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; 300 lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; 301 if (qp->ibqp.qp_type == IB_QPT_SMI) { 302 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ 303 priv->s_sc = 0xf; 304 } else { 305 lrh0 |= (sc5 & 0xf) << 12; 306 priv->s_sc = sc5; 307 } 308 309 dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); 310 if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { 311 slid = be16_to_cpu(IB_LID_PERMISSIVE); 312 } else { 313 u16 lid = (u16)ppd->lid; 314 315 if (lid) { 316 lid |= rdma_ah_get_path_bits(ah_attr) & 317 ((1 << ppd->lmc) - 1); 318 slid = lid; 319 } else { 320 slid = be16_to_cpu(IB_LID_PERMISSIVE); 321 } 322 } 323 hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, false); 324 len = ps->s_txreq->hdr_dwords + nwords; 325 326 /* Setup the packet */ 327 ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_9B; 328 hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, 329 lrh0, len, dlid, slid); 330} 331 332void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 333 struct rvt_swqe *wqe) 334{ 335 struct hfi1_qp_priv *priv = qp->priv; 336 struct ib_other_headers *ohdr; 337 struct rdma_ah_attr *ah_attr; 338 struct hfi1_pportdata *ppd; 339 struct hfi1_ibport *ibp; 340 u32 dlid, slid, nwords, extra_bytes; 341 u32 dest_qp = rvt_get_swqe_remote_qpn(wqe); 342 u32 src_qp = qp->ibqp.qp_num; 343 u16 len, pkey; 344 u8 l4, sc5; 345 bool is_mgmt = false; 346 347 ibp = to_iport(qp->ibqp.device, qp->port_num); 348 ppd = ppd_from_ibp(ibp); 349 ah_attr = rvt_get_swqe_ah_attr(wqe); 350 351 /* 352 * Build 16B Management Packet if either the destination 353 * or source queue pair number is 0 or 1. 354 */ 355 if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) { 356 /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */ 357 ps->s_txreq->hdr_dwords = 6; 358 is_mgmt = true; 359 } else { 360 /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ 361 ps->s_txreq->hdr_dwords = 9; 362 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 363 ps->s_txreq->hdr_dwords++; 364 } 365 366 /* SW provides space for CRC and LT for bypass packets. */ 367 extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2), 368 wqe->length); 369 nwords = ((wqe->length + extra_bytes + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; 370 371 if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) && 372 hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) { 373 struct ib_grh *grh; 374 struct ib_global_route *grd = rdma_ah_retrieve_grh(ah_attr); 375 /* 376 * Ensure OPA GIDs are transformed to IB gids 377 * before creating the GRH. 378 */ 379 if (grd->sgid_index == OPA_GID_INDEX) { 380 dd_dev_warn(ppd->dd, "Bad sgid_index. sgid_index: %d\n", 381 grd->sgid_index); 382 grd->sgid_index = 0; 383 } 384 grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; 385 ps->s_txreq->hdr_dwords += hfi1_make_grh( 386 ibp, grh, grd, 387 ps->s_txreq->hdr_dwords - LRH_16B_DWORDS, 388 nwords); 389 ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; 390 l4 = OPA_16B_L4_IB_GLOBAL; 391 } else { 392 ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; 393 l4 = OPA_16B_L4_IB_LOCAL; 394 } 395 396 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; 397 if (qp->ibqp.qp_type == IB_QPT_SMI) 398 priv->s_sc = 0xf; 399 else 400 priv->s_sc = sc5; 401 402 dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 16B); 403 if (!ppd->lid) 404 slid = be32_to_cpu(OPA_LID_PERMISSIVE); 405 else 406 slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & 407 ((1 << ppd->lmc) - 1)); 408 409 if (is_mgmt) { 410 l4 = OPA_16B_L4_FM; 411 pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe)); 412 hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt, 413 dest_qp, src_qp); 414 } else { 415 hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); 416 } 417 /* Convert dwords to flits */ 418 len = (ps->s_txreq->hdr_dwords + nwords) >> 1; 419 420 /* Setup the packet */ 421 ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_16B; 422 hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah, 423 slid, dlid, len, pkey, 0, 0, l4, priv->s_sc); 424} 425 426/** 427 * hfi1_make_ud_req - construct a UD request packet 428 * @qp: the QP 429 * @ps: the current packet state 430 * 431 * Assume s_lock is held. 432 * 433 * Return 1 if constructed; otherwise, return 0. 434 */ 435int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) 436{ 437 struct hfi1_qp_priv *priv = qp->priv; 438 struct rdma_ah_attr *ah_attr; 439 struct hfi1_pportdata *ppd; 440 struct hfi1_ibport *ibp; 441 struct rvt_swqe *wqe; 442 int next_cur; 443 u32 lid; 444 445 ps->s_txreq = get_txreq(ps->dev, qp); 446 if (!ps->s_txreq) 447 goto bail_no_tx; 448 449 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 450 if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) 451 goto bail; 452 /* We are in the error state, flush the work request. */ 453 if (qp->s_last == READ_ONCE(qp->s_head)) 454 goto bail; 455 /* If DMAs are in progress, we can't flush immediately. */ 456 if (iowait_sdma_pending(&priv->s_iowait)) { 457 qp->s_flags |= RVT_S_WAIT_DMA; 458 goto bail; 459 } 460 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 461 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 462 goto done_free_tx; 463 } 464 465 /* see post_one_send() */ 466 if (qp->s_cur == READ_ONCE(qp->s_head)) 467 goto bail; 468 469 wqe = rvt_get_swqe_ptr(qp, qp->s_cur); 470 next_cur = qp->s_cur + 1; 471 if (next_cur >= qp->s_size) 472 next_cur = 0; 473 474 /* Construct the header. */ 475 ibp = to_iport(qp->ibqp.device, qp->port_num); 476 ppd = ppd_from_ibp(ibp); 477 ah_attr = rvt_get_swqe_ah_attr(wqe); 478 priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr); 479 if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) || 480 (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) { 481 lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1); 482 if (unlikely(!loopback && 483 ((lid == ppd->lid) || 484 ((lid == be32_to_cpu(OPA_LID_PERMISSIVE)) && 485 (qp->ibqp.qp_type == IB_QPT_GSI))))) { 486 unsigned long tflags = ps->flags; 487 /* 488 * If DMAs are in progress, we can't generate 489 * a completion for the loopback packet since 490 * it would be out of order. 491 * Instead of waiting, we could queue a 492 * zero length descriptor so we get a callback. 493 */ 494 if (iowait_sdma_pending(&priv->s_iowait)) { 495 qp->s_flags |= RVT_S_WAIT_DMA; 496 goto bail; 497 } 498 qp->s_cur = next_cur; 499 spin_unlock_irqrestore(&qp->s_lock, tflags); 500 ud_loopback(qp, wqe); 501 spin_lock_irqsave(&qp->s_lock, tflags); 502 ps->flags = tflags; 503 rvt_send_complete(qp, wqe, IB_WC_SUCCESS); 504 goto done_free_tx; 505 } 506 } 507 508 qp->s_cur = next_cur; 509 ps->s_txreq->s_cur_size = wqe->length; 510 ps->s_txreq->ss = &qp->s_sge; 511 qp->s_srate = rdma_ah_get_static_rate(ah_attr); 512 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); 513 qp->s_wqe = wqe; 514 qp->s_sge.sge = wqe->sg_list[0]; 515 qp->s_sge.sg_list = wqe->sg_list + 1; 516 qp->s_sge.num_sge = wqe->wr.num_sge; 517 qp->s_sge.total_len = wqe->length; 518 519 /* Make the appropriate header */ 520 hfi1_make_ud_req_tbl[priv->hdr_type](qp, ps, qp->s_wqe); 521 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 522 ps->s_txreq->sde = priv->s_sde; 523 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 524 ps->s_txreq->psc = priv->s_sendcontext; 525 /* disarm any ahg */ 526 priv->s_ahg->ahgcount = 0; 527 priv->s_ahg->ahgidx = 0; 528 priv->s_ahg->tx_flags = 0; 529 530 return 1; 531 532done_free_tx: 533 hfi1_put_txreq(ps->s_txreq); 534 ps->s_txreq = NULL; 535 return 1; 536 537bail: 538 hfi1_put_txreq(ps->s_txreq); 539 540bail_no_tx: 541 ps->s_txreq = NULL; 542 qp->s_flags &= ~RVT_S_BUSY; 543 return 0; 544} 545 546/* 547 * Hardware can't check this so we do it here. 548 * 549 * This is a slightly different algorithm than the standard pkey check. It 550 * special cases the management keys and allows for 0x7fff and 0xffff to be in 551 * the table at the same time. 552 * 553 * @returns the index found or -1 if not found 554 */ 555int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) 556{ 557 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 558 unsigned i; 559 560 if (pkey == FULL_MGMT_P_KEY || pkey == LIM_MGMT_P_KEY) { 561 unsigned lim_idx = -1; 562 563 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i) { 564 /* here we look for an exact match */ 565 if (ppd->pkeys[i] == pkey) 566 return i; 567 if (ppd->pkeys[i] == LIM_MGMT_P_KEY) 568 lim_idx = i; 569 } 570 571 /* did not find 0xffff return 0x7fff idx if found */ 572 if (pkey == FULL_MGMT_P_KEY) 573 return lim_idx; 574 575 /* no match... */ 576 return -1; 577 } 578 579 pkey &= 0x7fff; /* remove limited/full membership bit */ 580 581 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i) 582 if ((ppd->pkeys[i] & 0x7fff) == pkey) 583 return i; 584 585 /* 586 * Should not get here, this means hardware failed to validate pkeys. 587 */ 588 return -1; 589} 590 591void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, 592 u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, 593 u8 sc5, const struct ib_grh *old_grh) 594{ 595 u64 pbc, pbc_flags = 0; 596 u32 bth0, plen, vl, hwords = 7; 597 u16 len; 598 u8 l4; 599 struct hfi1_opa_header hdr; 600 struct ib_other_headers *ohdr; 601 struct pio_buf *pbuf; 602 struct send_context *ctxt = qp_to_send_context(qp, sc5); 603 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 604 u32 nwords; 605 606 hdr.hdr_type = HFI1_PKT_TYPE_16B; 607 /* Populate length */ 608 nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + 609 SIZE_OF_LT) >> 2) + SIZE_OF_CRC; 610 if (old_grh) { 611 struct ib_grh *grh = &hdr.opah.u.l.grh; 612 613 grh->version_tclass_flow = old_grh->version_tclass_flow; 614 grh->paylen = cpu_to_be16( 615 (hwords - LRH_16B_DWORDS + nwords) << 2); 616 grh->hop_limit = 0xff; 617 grh->sgid = old_grh->dgid; 618 grh->dgid = old_grh->sgid; 619 ohdr = &hdr.opah.u.l.oth; 620 l4 = OPA_16B_L4_IB_GLOBAL; 621 hwords += sizeof(struct ib_grh) / sizeof(u32); 622 } else { 623 ohdr = &hdr.opah.u.oth; 624 l4 = OPA_16B_L4_IB_LOCAL; 625 } 626 627 /* BIT 16 to 19 is TVER. Bit 20 to 22 is pad cnt */ 628 bth0 = (IB_OPCODE_CNP << 24) | (1 << 16) | 629 (hfi1_get_16b_padding(hwords << 2, 0) << 20); 630 ohdr->bth[0] = cpu_to_be32(bth0); 631 632 ohdr->bth[1] = cpu_to_be32(remote_qpn); 633 ohdr->bth[2] = 0; /* PSN 0 */ 634 635 /* Convert dwords to flits */ 636 len = (hwords + nwords) >> 1; 637 hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5); 638 639 plen = 2 /* PBC */ + hwords + nwords; 640 pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; 641 vl = sc_to_vlt(ppd->dd, sc5); 642 pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); 643 if (ctxt) { 644 pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); 645 if (!IS_ERR_OR_NULL(pbuf)) { 646 trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); 647 ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, 648 &hdr, hwords); 649 } 650 } 651} 652 653void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, 654 u16 pkey, u32 slid, u32 dlid, u8 sc5, 655 const struct ib_grh *old_grh) 656{ 657 u64 pbc, pbc_flags = 0; 658 u32 bth0, plen, vl, hwords = 5; 659 u16 lrh0; 660 u8 sl = ibp->sc_to_sl[sc5]; 661 struct hfi1_opa_header hdr; 662 struct ib_other_headers *ohdr; 663 struct pio_buf *pbuf; 664 struct send_context *ctxt = qp_to_send_context(qp, sc5); 665 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 666 667 hdr.hdr_type = HFI1_PKT_TYPE_9B; 668 if (old_grh) { 669 struct ib_grh *grh = &hdr.ibh.u.l.grh; 670 671 grh->version_tclass_flow = old_grh->version_tclass_flow; 672 grh->paylen = cpu_to_be16( 673 (hwords - LRH_9B_DWORDS + SIZE_OF_CRC) << 2); 674 grh->hop_limit = 0xff; 675 grh->sgid = old_grh->dgid; 676 grh->dgid = old_grh->sgid; 677 ohdr = &hdr.ibh.u.l.oth; 678 lrh0 = HFI1_LRH_GRH; 679 hwords += sizeof(struct ib_grh) / sizeof(u32); 680 } else { 681 ohdr = &hdr.ibh.u.oth; 682 lrh0 = HFI1_LRH_BTH; 683 } 684 685 lrh0 |= (sc5 & 0xf) << 12 | sl << 4; 686 687 bth0 = pkey | (IB_OPCODE_CNP << 24); 688 ohdr->bth[0] = cpu_to_be32(bth0); 689 690 ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); 691 ohdr->bth[2] = 0; /* PSN 0 */ 692 693 hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid); 694 plen = 2 /* PBC */ + hwords; 695 pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); 696 vl = sc_to_vlt(ppd->dd, sc5); 697 pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); 698 if (ctxt) { 699 pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); 700 if (!IS_ERR_OR_NULL(pbuf)) { 701 trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); 702 ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, 703 &hdr, hwords); 704 } 705 } 706} 707 708/* 709 * opa_smp_check() - Do the regular pkey checking, and the additional 710 * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section 711 * 9.10.25 ("SMA Packet Checks"). 712 * 713 * Note that: 714 * - Checks are done using the pkey directly from the packet's BTH, 715 * and specifically _not_ the pkey that we attach to the completion, 716 * which may be different. 717 * - These checks are specifically for "non-local" SMPs (i.e., SMPs 718 * which originated on another node). SMPs which are sent from, and 719 * destined to this node are checked in opa_local_smp_check(). 720 * 721 * At the point where opa_smp_check() is called, we know: 722 * - destination QP is QP0 723 * 724 * opa_smp_check() returns 0 if all checks succeed, 1 otherwise. 725 */ 726static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, 727 struct rvt_qp *qp, u16 slid, struct opa_smp *smp) 728{ 729 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 730 731 /* 732 * I don't think it's possible for us to get here with sc != 0xf, 733 * but check it to be certain. 734 */ 735 if (sc5 != 0xf) 736 return 1; 737 738 if (rcv_pkey_check(ppd, pkey, sc5, slid)) 739 return 1; 740 741 /* 742 * At this point we know (and so don't need to check again) that 743 * the pkey is either LIM_MGMT_P_KEY, or FULL_MGMT_P_KEY 744 * (see ingress_pkey_check). 745 */ 746 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE && 747 smp->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) { 748 ingress_pkey_table_fail(ppd, pkey, slid); 749 return 1; 750 } 751 752 /* 753 * SMPs fall into one of four (disjoint) categories: 754 * SMA request, SMA response, SMA trap, or SMA trap repress. 755 * Our response depends, in part, on which type of SMP we're 756 * processing. 757 * 758 * If this is an SMA response, skip the check here. 759 * 760 * If this is an SMA request or SMA trap repress: 761 * - pkey != FULL_MGMT_P_KEY => 762 * increment port recv constraint errors, drop MAD 763 * 764 * Otherwise: 765 * - accept if the port is running an SM 766 * - drop MAD if it's an SMA trap 767 * - pkey == FULL_MGMT_P_KEY => 768 * reply with unsupported method 769 * - pkey != FULL_MGMT_P_KEY => 770 * increment port recv constraint errors, drop MAD 771 */ 772 switch (smp->method) { 773 case IB_MGMT_METHOD_GET_RESP: 774 case IB_MGMT_METHOD_REPORT_RESP: 775 break; 776 case IB_MGMT_METHOD_GET: 777 case IB_MGMT_METHOD_SET: 778 case IB_MGMT_METHOD_REPORT: 779 case IB_MGMT_METHOD_TRAP_REPRESS: 780 if (pkey != FULL_MGMT_P_KEY) { 781 ingress_pkey_table_fail(ppd, pkey, slid); 782 return 1; 783 } 784 break; 785 default: 786 if (ibp->rvp.port_cap_flags & IB_PORT_SM) 787 return 0; 788 if (smp->method == IB_MGMT_METHOD_TRAP) 789 return 1; 790 if (pkey == FULL_MGMT_P_KEY) { 791 smp->status |= IB_SMP_UNSUP_METHOD; 792 return 0; 793 } 794 ingress_pkey_table_fail(ppd, pkey, slid); 795 return 1; 796 } 797 return 0; 798} 799 800/** 801 * hfi1_ud_rcv - receive an incoming UD packet 802 * @packet: the packet structure 803 * 804 * This is called from qp_rcv() to process an incoming UD packet 805 * for the given QP. 806 * Called at interrupt level. 807 */ 808void hfi1_ud_rcv(struct hfi1_packet *packet) 809{ 810 u32 hdrsize = packet->hlen; 811 struct ib_wc wc; 812 u32 src_qp; 813 u16 pkey; 814 int mgmt_pkey_idx = -1; 815 struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); 816 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 817 void *data = packet->payload; 818 u32 tlen = packet->tlen; 819 struct rvt_qp *qp = packet->qp; 820 u8 sc5 = packet->sc; 821 u8 sl_from_sc; 822 u8 opcode = packet->opcode; 823 u8 sl = packet->sl; 824 u32 dlid = packet->dlid; 825 u32 slid = packet->slid; 826 u8 extra_bytes; 827 u8 l4 = 0; 828 bool dlid_is_permissive; 829 bool slid_is_permissive; 830 bool solicited = false; 831 832 extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); 833 834 if (packet->etype == RHF_RCV_TYPE_BYPASS) { 835 u32 permissive_lid = 836 opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); 837 838 l4 = hfi1_16B_get_l4(packet->hdr); 839 pkey = hfi1_16B_get_pkey(packet->hdr); 840 dlid_is_permissive = (dlid == permissive_lid); 841 slid_is_permissive = (slid == permissive_lid); 842 } else { 843 pkey = ib_bth_get_pkey(packet->ohdr); 844 dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); 845 slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); 846 } 847 sl_from_sc = ibp->sc_to_sl[sc5]; 848 849 if (likely(l4 != OPA_16B_L4_FM)) { 850 src_qp = ib_get_sqpn(packet->ohdr); 851 solicited = ib_bth_is_solicited(packet->ohdr); 852 } else { 853 src_qp = hfi1_16B_get_src_qpn(packet->mgmt); 854 } 855 856 process_ecn(qp, packet); 857 /* 858 * Get the number of bytes the message was padded by 859 * and drop incomplete packets. 860 */ 861 if (unlikely(tlen < (hdrsize + extra_bytes))) 862 goto drop; 863 864 tlen -= hdrsize + extra_bytes; 865 866 /* 867 * Check that the permissive LID is only used on QP0 868 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). 869 */ 870 if (qp->ibqp.qp_num) { 871 if (unlikely(dlid_is_permissive || slid_is_permissive)) 872 goto drop; 873 if (qp->ibqp.qp_num > 1) { 874 if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { 875 /* 876 * Traps will not be sent for packets dropped 877 * by the HW. This is fine, as sending trap 878 * for invalid pkeys is optional according to 879 * IB spec (release 1.3, section 10.9.4) 880 */ 881 hfi1_bad_pkey(ibp, 882 pkey, sl, 883 src_qp, qp->ibqp.qp_num, 884 slid, dlid); 885 return; 886 } 887 } else { 888 /* GSI packet */ 889 mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey); 890 if (mgmt_pkey_idx < 0) 891 goto drop; 892 } 893 if (unlikely(l4 != OPA_16B_L4_FM && 894 ib_get_qkey(packet->ohdr) != qp->qkey)) 895 return; /* Silent drop */ 896 897 /* Drop invalid MAD packets (see 13.5.3.1). */ 898 if (unlikely(qp->ibqp.qp_num == 1 && 899 (tlen > 2048 || (sc5 == 0xF)))) 900 goto drop; 901 } else { 902 /* Received on QP0, and so by definition, this is an SMP */ 903 struct opa_smp *smp = (struct opa_smp *)data; 904 905 if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp)) 906 goto drop; 907 908 if (tlen > 2048) 909 goto drop; 910 if ((dlid_is_permissive || slid_is_permissive) && 911 smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) 912 goto drop; 913 914 /* look up SMI pkey */ 915 mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey); 916 if (mgmt_pkey_idx < 0) 917 goto drop; 918 } 919 920 if (qp->ibqp.qp_num > 1 && 921 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { 922 wc.ex.imm_data = packet->ohdr->u.ud.imm_data; 923 wc.wc_flags = IB_WC_WITH_IMM; 924 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { 925 wc.ex.imm_data = 0; 926 wc.wc_flags = 0; 927 } else { 928 goto drop; 929 } 930 931 /* 932 * A GRH is expected to precede the data even if not 933 * present on the wire. 934 */ 935 wc.byte_len = tlen + sizeof(struct ib_grh); 936 937 /* 938 * Get the next work request entry to find where to put the data. 939 */ 940 if (qp->r_flags & RVT_R_REUSE_SGE) { 941 qp->r_flags &= ~RVT_R_REUSE_SGE; 942 } else { 943 int ret; 944 945 ret = rvt_get_rwqe(qp, false); 946 if (ret < 0) { 947 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 948 return; 949 } 950 if (!ret) { 951 if (qp->ibqp.qp_num == 0) 952 ibp->rvp.n_vl15_dropped++; 953 return; 954 } 955 } 956 /* Silently drop packets which are too big. */ 957 if (unlikely(wc.byte_len > qp->r_len)) { 958 qp->r_flags |= RVT_R_REUSE_SGE; 959 goto drop; 960 } 961 if (packet->grh) { 962 rvt_copy_sge(qp, &qp->r_sge, packet->grh, 963 sizeof(struct ib_grh), true, false); 964 wc.wc_flags |= IB_WC_GRH; 965 } else if (packet->etype == RHF_RCV_TYPE_BYPASS) { 966 struct ib_grh grh; 967 /* 968 * Assuming we only created 16B on the send side 969 * if we want to use large LIDs, since GRH was stripped 970 * out when creating 16B, add back the GRH here. 971 */ 972 hfi1_make_ext_grh(packet, &grh, slid, dlid); 973 rvt_copy_sge(qp, &qp->r_sge, &grh, 974 sizeof(struct ib_grh), true, false); 975 wc.wc_flags |= IB_WC_GRH; 976 } else { 977 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 978 } 979 rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 980 true, false); 981 rvt_put_ss(&qp->r_sge); 982 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 983 return; 984 wc.wr_id = qp->r_wr_id; 985 wc.status = IB_WC_SUCCESS; 986 wc.opcode = IB_WC_RECV; 987 wc.vendor_err = 0; 988 wc.qp = &qp->ibqp; 989 wc.src_qp = src_qp; 990 991 if (qp->ibqp.qp_type == IB_QPT_GSI || 992 qp->ibqp.qp_type == IB_QPT_SMI) { 993 if (mgmt_pkey_idx < 0) { 994 if (net_ratelimit()) { 995 struct hfi1_devdata *dd = ppd->dd; 996 997 dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n", 998 qp->ibqp.qp_type); 999 mgmt_pkey_idx = 0; 1000 } 1001 } 1002 wc.pkey_index = (unsigned)mgmt_pkey_idx; 1003 } else { 1004 wc.pkey_index = 0; 1005 } 1006 if (slid_is_permissive) 1007 slid = be32_to_cpu(OPA_LID_PERMISSIVE); 1008 wc.slid = slid & U16_MAX; 1009 wc.sl = sl_from_sc; 1010 1011 /* 1012 * Save the LMC lower bits if the destination LID is a unicast LID. 1013 */ 1014 wc.dlid_path_bits = hfi1_check_mcast(dlid) ? 0 : 1015 dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); 1016 wc.port_num = qp->port_num; 1017 /* Signal completion event if the solicited bit is set. */ 1018 rvt_recv_cq(qp, &wc, solicited); 1019 return; 1020 1021drop: 1022 ibp->rvp.n_pkt_drops++; 1023}