pio.c (57904B)
1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2/* 3 * Copyright(c) 2015-2018 Intel Corporation. 4 */ 5 6#include <linux/delay.h> 7#include "hfi.h" 8#include "qp.h" 9#include "trace.h" 10 11#define SC(name) SEND_CTXT_##name 12/* 13 * Send Context functions 14 */ 15static void sc_wait_for_packet_egress(struct send_context *sc, int pause); 16 17/* 18 * Set the CM reset bit and wait for it to clear. Use the provided 19 * sendctrl register. This routine has no locking. 20 */ 21void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl) 22{ 23 write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK); 24 while (1) { 25 udelay(1); 26 sendctrl = read_csr(dd, SEND_CTRL); 27 if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0) 28 break; 29 } 30} 31 32/* global control of PIO send */ 33void pio_send_control(struct hfi1_devdata *dd, int op) 34{ 35 u64 reg, mask; 36 unsigned long flags; 37 int write = 1; /* write sendctrl back */ 38 int flush = 0; /* re-read sendctrl to make sure it is flushed */ 39 int i; 40 41 spin_lock_irqsave(&dd->sendctrl_lock, flags); 42 43 reg = read_csr(dd, SEND_CTRL); 44 switch (op) { 45 case PSC_GLOBAL_ENABLE: 46 reg |= SEND_CTRL_SEND_ENABLE_SMASK; 47 fallthrough; 48 case PSC_DATA_VL_ENABLE: 49 mask = 0; 50 for (i = 0; i < ARRAY_SIZE(dd->vld); i++) 51 if (!dd->vld[i].mtu) 52 mask |= BIT_ULL(i); 53 /* Disallow sending on VLs not enabled */ 54 mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) << 55 SEND_CTRL_UNSUPPORTED_VL_SHIFT; 56 reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask; 57 break; 58 case PSC_GLOBAL_DISABLE: 59 reg &= ~SEND_CTRL_SEND_ENABLE_SMASK; 60 break; 61 case PSC_GLOBAL_VLARB_ENABLE: 62 reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 63 break; 64 case PSC_GLOBAL_VLARB_DISABLE: 65 reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK; 66 break; 67 case PSC_CM_RESET: 68 __cm_reset(dd, reg); 69 write = 0; /* CSR already written (and flushed) */ 70 break; 71 case PSC_DATA_VL_DISABLE: 72 reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK; 73 flush = 1; 74 break; 75 default: 76 dd_dev_err(dd, "%s: invalid control %d\n", __func__, op); 77 break; 78 } 79 80 if (write) { 81 write_csr(dd, SEND_CTRL, reg); 82 if (flush) 83 (void)read_csr(dd, SEND_CTRL); /* flush write */ 84 } 85 86 spin_unlock_irqrestore(&dd->sendctrl_lock, flags); 87} 88 89/* number of send context memory pools */ 90#define NUM_SC_POOLS 2 91 92/* Send Context Size (SCS) wildcards */ 93#define SCS_POOL_0 -1 94#define SCS_POOL_1 -2 95 96/* Send Context Count (SCC) wildcards */ 97#define SCC_PER_VL -1 98#define SCC_PER_CPU -2 99#define SCC_PER_KRCVQ -3 100 101/* Send Context Size (SCS) constants */ 102#define SCS_ACK_CREDITS 32 103#define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */ 104 105#define PIO_THRESHOLD_CEILING 4096 106 107#define PIO_WAIT_BATCH_SIZE 5 108 109/* default send context sizes */ 110static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 111 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 112 .count = SCC_PER_VL }, /* one per NUMA */ 113 [SC_ACK] = { .size = SCS_ACK_CREDITS, 114 .count = SCC_PER_KRCVQ }, 115 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 116 .count = SCC_PER_CPU }, /* one per CPU */ 117 [SC_VL15] = { .size = SCS_VL15_CREDITS, 118 .count = 1 }, 119 120}; 121 122/* send context memory pool configuration */ 123struct mem_pool_config { 124 int centipercent; /* % of memory, in 100ths of 1% */ 125 int absolute_blocks; /* absolute block count */ 126}; 127 128/* default memory pool configuration: 100% in pool 0 */ 129static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = { 130 /* centi%, abs blocks */ 131 { 10000, -1 }, /* pool 0 */ 132 { 0, -1 }, /* pool 1 */ 133}; 134 135/* memory pool information, used when calculating final sizes */ 136struct mem_pool_info { 137 int centipercent; /* 138 * 100th of 1% of memory to use, -1 if blocks 139 * already set 140 */ 141 int count; /* count of contexts in the pool */ 142 int blocks; /* block size of the pool */ 143 int size; /* context size, in blocks */ 144}; 145 146/* 147 * Convert a pool wildcard to a valid pool index. The wildcards 148 * start at -1 and increase negatively. Map them as: 149 * -1 => 0 150 * -2 => 1 151 * etc. 152 * 153 * Return -1 on non-wildcard input, otherwise convert to a pool number. 154 */ 155static int wildcard_to_pool(int wc) 156{ 157 if (wc >= 0) 158 return -1; /* non-wildcard */ 159 return -wc - 1; 160} 161 162static const char *sc_type_names[SC_MAX] = { 163 "kernel", 164 "ack", 165 "user", 166 "vl15" 167}; 168 169static const char *sc_type_name(int index) 170{ 171 if (index < 0 || index >= SC_MAX) 172 return "unknown"; 173 return sc_type_names[index]; 174} 175 176/* 177 * Read the send context memory pool configuration and send context 178 * size configuration. Replace any wildcards and come up with final 179 * counts and sizes for the send context types. 180 */ 181int init_sc_pools_and_sizes(struct hfi1_devdata *dd) 182{ 183 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; 184 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; 185 int total_contexts = 0; 186 int fixed_blocks; 187 int pool_blocks; 188 int used_blocks; 189 int cp_total; /* centipercent total */ 190 int ab_total; /* absolute block total */ 191 int extra; 192 int i; 193 194 /* 195 * When SDMA is enabled, kernel context pio packet size is capped by 196 * "piothreshold". Reduce pio buffer allocation for kernel context by 197 * setting it to a fixed size. The allocation allows 3-deep buffering 198 * of the largest pio packets plus up to 128 bytes header, sufficient 199 * to maintain verbs performance. 200 * 201 * When SDMA is disabled, keep the default pooling allocation. 202 */ 203 if (HFI1_CAP_IS_KSET(SDMA)) { 204 u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ? 205 piothreshold : PIO_THRESHOLD_CEILING; 206 sc_config_sizes[SC_KERNEL].size = 207 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE; 208 } 209 210 /* 211 * Step 0: 212 * - copy the centipercents/absolute sizes from the pool config 213 * - sanity check these values 214 * - add up centipercents, then later check for full value 215 * - add up absolute blocks, then later check for over-commit 216 */ 217 cp_total = 0; 218 ab_total = 0; 219 for (i = 0; i < NUM_SC_POOLS; i++) { 220 int cp = sc_mem_pool_config[i].centipercent; 221 int ab = sc_mem_pool_config[i].absolute_blocks; 222 223 /* 224 * A negative value is "unused" or "invalid". Both *can* 225 * be valid, but centipercent wins, so check that first 226 */ 227 if (cp >= 0) { /* centipercent valid */ 228 cp_total += cp; 229 } else if (ab >= 0) { /* absolute blocks valid */ 230 ab_total += ab; 231 } else { /* neither valid */ 232 dd_dev_err( 233 dd, 234 "Send context memory pool %d: both the block count and centipercent are invalid\n", 235 i); 236 return -EINVAL; 237 } 238 239 mem_pool_info[i].centipercent = cp; 240 mem_pool_info[i].blocks = ab; 241 } 242 243 /* do not use both % and absolute blocks for different pools */ 244 if (cp_total != 0 && ab_total != 0) { 245 dd_dev_err( 246 dd, 247 "All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n"); 248 return -EINVAL; 249 } 250 251 /* if any percentages are present, they must add up to 100% x 100 */ 252 if (cp_total != 0 && cp_total != 10000) { 253 dd_dev_err( 254 dd, 255 "Send context memory pool centipercent is %d, expecting 10000\n", 256 cp_total); 257 return -EINVAL; 258 } 259 260 /* the absolute pool total cannot be more than the mem total */ 261 if (ab_total > total_blocks) { 262 dd_dev_err( 263 dd, 264 "Send context memory pool absolute block count %d is larger than the memory size %d\n", 265 ab_total, total_blocks); 266 return -EINVAL; 267 } 268 269 /* 270 * Step 2: 271 * - copy from the context size config 272 * - replace context type wildcard counts with real values 273 * - add up non-memory pool block sizes 274 * - add up memory pool user counts 275 */ 276 fixed_blocks = 0; 277 for (i = 0; i < SC_MAX; i++) { 278 int count = sc_config_sizes[i].count; 279 int size = sc_config_sizes[i].size; 280 int pool; 281 282 /* 283 * Sanity check count: Either a positive value or 284 * one of the expected wildcards is valid. The positive 285 * value is checked later when we compare against total 286 * memory available. 287 */ 288 if (i == SC_ACK) { 289 count = dd->n_krcv_queues; 290 } else if (i == SC_KERNEL) { 291 count = INIT_SC_PER_VL * num_vls; 292 } else if (count == SCC_PER_CPU) { 293 count = dd->num_rcv_contexts - dd->n_krcv_queues; 294 } else if (count < 0) { 295 dd_dev_err( 296 dd, 297 "%s send context invalid count wildcard %d\n", 298 sc_type_name(i), count); 299 return -EINVAL; 300 } 301 if (total_contexts + count > chip_send_contexts(dd)) 302 count = chip_send_contexts(dd) - total_contexts; 303 304 total_contexts += count; 305 306 /* 307 * Sanity check pool: The conversion will return a pool 308 * number or -1 if a fixed (non-negative) value. The fixed 309 * value is checked later when we compare against 310 * total memory available. 311 */ 312 pool = wildcard_to_pool(size); 313 if (pool == -1) { /* non-wildcard */ 314 fixed_blocks += size * count; 315 } else if (pool < NUM_SC_POOLS) { /* valid wildcard */ 316 mem_pool_info[pool].count += count; 317 } else { /* invalid wildcard */ 318 dd_dev_err( 319 dd, 320 "%s send context invalid pool wildcard %d\n", 321 sc_type_name(i), size); 322 return -EINVAL; 323 } 324 325 dd->sc_sizes[i].count = count; 326 dd->sc_sizes[i].size = size; 327 } 328 if (fixed_blocks > total_blocks) { 329 dd_dev_err( 330 dd, 331 "Send context fixed block count, %u, larger than total block count %u\n", 332 fixed_blocks, total_blocks); 333 return -EINVAL; 334 } 335 336 /* step 3: calculate the blocks in the pools, and pool context sizes */ 337 pool_blocks = total_blocks - fixed_blocks; 338 if (ab_total > pool_blocks) { 339 dd_dev_err( 340 dd, 341 "Send context fixed pool sizes, %u, larger than pool block count %u\n", 342 ab_total, pool_blocks); 343 return -EINVAL; 344 } 345 /* subtract off the fixed pool blocks */ 346 pool_blocks -= ab_total; 347 348 for (i = 0; i < NUM_SC_POOLS; i++) { 349 struct mem_pool_info *pi = &mem_pool_info[i]; 350 351 /* % beats absolute blocks */ 352 if (pi->centipercent >= 0) 353 pi->blocks = (pool_blocks * pi->centipercent) / 10000; 354 355 if (pi->blocks == 0 && pi->count != 0) { 356 dd_dev_err( 357 dd, 358 "Send context memory pool %d has %u contexts, but no blocks\n", 359 i, pi->count); 360 return -EINVAL; 361 } 362 if (pi->count == 0) { 363 /* warn about wasted blocks */ 364 if (pi->blocks != 0) 365 dd_dev_err( 366 dd, 367 "Send context memory pool %d has %u blocks, but zero contexts\n", 368 i, pi->blocks); 369 pi->size = 0; 370 } else { 371 pi->size = pi->blocks / pi->count; 372 } 373 } 374 375 /* step 4: fill in the context type sizes from the pool sizes */ 376 used_blocks = 0; 377 for (i = 0; i < SC_MAX; i++) { 378 if (dd->sc_sizes[i].size < 0) { 379 unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size); 380 381 WARN_ON_ONCE(pool >= NUM_SC_POOLS); 382 dd->sc_sizes[i].size = mem_pool_info[pool].size; 383 } 384 /* make sure we are not larger than what is allowed by the HW */ 385#define PIO_MAX_BLOCKS 1024 386 if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS) 387 dd->sc_sizes[i].size = PIO_MAX_BLOCKS; 388 389 /* calculate our total usage */ 390 used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count; 391 } 392 extra = total_blocks - used_blocks; 393 if (extra != 0) 394 dd_dev_info(dd, "unused send context blocks: %d\n", extra); 395 396 return total_contexts; 397} 398 399int init_send_contexts(struct hfi1_devdata *dd) 400{ 401 u16 base; 402 int ret, i, j, context; 403 404 ret = init_credit_return(dd); 405 if (ret) 406 return ret; 407 408 dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8), 409 GFP_KERNEL); 410 dd->send_contexts = kcalloc(dd->num_send_contexts, 411 sizeof(struct send_context_info), 412 GFP_KERNEL); 413 if (!dd->send_contexts || !dd->hw_to_sw) { 414 kfree(dd->hw_to_sw); 415 kfree(dd->send_contexts); 416 free_credit_return(dd); 417 return -ENOMEM; 418 } 419 420 /* hardware context map starts with invalid send context indices */ 421 for (i = 0; i < TXE_NUM_CONTEXTS; i++) 422 dd->hw_to_sw[i] = INVALID_SCI; 423 424 /* 425 * All send contexts have their credit sizes. Allocate credits 426 * for each context one after another from the global space. 427 */ 428 context = 0; 429 base = 1; 430 for (i = 0; i < SC_MAX; i++) { 431 struct sc_config_sizes *scs = &dd->sc_sizes[i]; 432 433 for (j = 0; j < scs->count; j++) { 434 struct send_context_info *sci = 435 &dd->send_contexts[context]; 436 sci->type = i; 437 sci->base = base; 438 sci->credits = scs->size; 439 440 context++; 441 base += scs->size; 442 } 443 } 444 445 return 0; 446} 447 448/* 449 * Allocate a software index and hardware context of the given type. 450 * 451 * Must be called with dd->sc_lock held. 452 */ 453static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, 454 u32 *hw_context) 455{ 456 struct send_context_info *sci; 457 u32 index; 458 u32 context; 459 460 for (index = 0, sci = &dd->send_contexts[0]; 461 index < dd->num_send_contexts; index++, sci++) { 462 if (sci->type == type && sci->allocated == 0) { 463 sci->allocated = 1; 464 /* use a 1:1 mapping, but make them non-equal */ 465 context = chip_send_contexts(dd) - index - 1; 466 dd->hw_to_sw[context] = index; 467 *sw_index = index; 468 *hw_context = context; 469 return 0; /* success */ 470 } 471 } 472 dd_dev_err(dd, "Unable to locate a free type %d send context\n", type); 473 return -ENOSPC; 474} 475 476/* 477 * Free the send context given by its software index. 478 * 479 * Must be called with dd->sc_lock held. 480 */ 481static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context) 482{ 483 struct send_context_info *sci; 484 485 sci = &dd->send_contexts[sw_index]; 486 if (!sci->allocated) { 487 dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n", 488 __func__, sw_index, hw_context); 489 } 490 sci->allocated = 0; 491 dd->hw_to_sw[hw_context] = INVALID_SCI; 492} 493 494/* return the base context of a context in a group */ 495static inline u32 group_context(u32 context, u32 group) 496{ 497 return (context >> group) << group; 498} 499 500/* return the size of a group */ 501static inline u32 group_size(u32 group) 502{ 503 return 1 << group; 504} 505 506/* 507 * Obtain the credit return addresses, kernel virtual and bus, for the 508 * given sc. 509 * 510 * To understand this routine: 511 * o va and dma are arrays of struct credit_return. One for each physical 512 * send context, per NUMA. 513 * o Each send context always looks in its relative location in a struct 514 * credit_return for its credit return. 515 * o Each send context in a group must have its return address CSR programmed 516 * with the same value. Use the address of the first send context in the 517 * group. 518 */ 519static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) 520{ 521 u32 gc = group_context(sc->hw_context, sc->group); 522 u32 index = sc->hw_context & 0x7; 523 524 sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; 525 *dma = (unsigned long) 526 &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; 527} 528 529/* 530 * Work queue function triggered in error interrupt routine for 531 * kernel contexts. 532 */ 533static void sc_halted(struct work_struct *work) 534{ 535 struct send_context *sc; 536 537 sc = container_of(work, struct send_context, halt_work); 538 sc_restart(sc); 539} 540 541/* 542 * Calculate PIO block threshold for this send context using the given MTU. 543 * Trigger a return when one MTU plus optional header of credits remain. 544 * 545 * Parameter mtu is in bytes. 546 * Parameter hdrqentsize is in DWORDs. 547 * 548 * Return value is what to write into the CSR: trigger return when 549 * unreturned credits pass this count. 550 */ 551u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize) 552{ 553 u32 release_credits; 554 u32 threshold; 555 556 /* add in the header size, then divide by the PIO block size */ 557 mtu += hdrqentsize << 2; 558 release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE); 559 560 /* check against this context's credits */ 561 if (sc->credits <= release_credits) 562 threshold = 1; 563 else 564 threshold = sc->credits - release_credits; 565 566 return threshold; 567} 568 569/* 570 * Calculate credit threshold in terms of percent of the allocated credits. 571 * Trigger when unreturned credits equal or exceed the percentage of the whole. 572 * 573 * Return value is what to write into the CSR: trigger return when 574 * unreturned credits pass this count. 575 */ 576u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 577{ 578 return (sc->credits * percent) / 100; 579} 580 581/* 582 * Set the credit return threshold. 583 */ 584void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) 585{ 586 unsigned long flags; 587 u32 old_threshold; 588 int force_return = 0; 589 590 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 591 592 old_threshold = (sc->credit_ctrl >> 593 SC(CREDIT_CTRL_THRESHOLD_SHIFT)) 594 & SC(CREDIT_CTRL_THRESHOLD_MASK); 595 596 if (new_threshold != old_threshold) { 597 sc->credit_ctrl = 598 (sc->credit_ctrl 599 & ~SC(CREDIT_CTRL_THRESHOLD_SMASK)) 600 | ((new_threshold 601 & SC(CREDIT_CTRL_THRESHOLD_MASK)) 602 << SC(CREDIT_CTRL_THRESHOLD_SHIFT)); 603 write_kctxt_csr(sc->dd, sc->hw_context, 604 SC(CREDIT_CTRL), sc->credit_ctrl); 605 606 /* force a credit return on change to avoid a possible stall */ 607 force_return = 1; 608 } 609 610 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 611 612 if (force_return) 613 sc_return_credits(sc); 614} 615 616/* 617 * set_pio_integrity 618 * 619 * Set the CHECK_ENABLE register for the send context 'sc'. 620 */ 621void set_pio_integrity(struct send_context *sc) 622{ 623 struct hfi1_devdata *dd = sc->dd; 624 u32 hw_context = sc->hw_context; 625 int type = sc->type; 626 627 write_kctxt_csr(dd, hw_context, 628 SC(CHECK_ENABLE), 629 hfi1_pkt_default_send_ctxt_mask(dd, type)); 630} 631 632static u32 get_buffers_allocated(struct send_context *sc) 633{ 634 int cpu; 635 u32 ret = 0; 636 637 for_each_possible_cpu(cpu) 638 ret += *per_cpu_ptr(sc->buffers_allocated, cpu); 639 return ret; 640} 641 642static void reset_buffers_allocated(struct send_context *sc) 643{ 644 int cpu; 645 646 for_each_possible_cpu(cpu) 647 (*per_cpu_ptr(sc->buffers_allocated, cpu)) = 0; 648} 649 650/* 651 * Allocate a NUMA relative send context structure of the given type along 652 * with a HW context. 653 */ 654struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, 655 uint hdrqentsize, int numa) 656{ 657 struct send_context_info *sci; 658 struct send_context *sc = NULL; 659 dma_addr_t dma; 660 unsigned long flags; 661 u64 reg; 662 u32 thresh; 663 u32 sw_index; 664 u32 hw_context; 665 int ret; 666 u8 opval, opmask; 667 668 /* do not allocate while frozen */ 669 if (dd->flags & HFI1_FROZEN) 670 return NULL; 671 672 sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa); 673 if (!sc) 674 return NULL; 675 676 sc->buffers_allocated = alloc_percpu(u32); 677 if (!sc->buffers_allocated) { 678 kfree(sc); 679 dd_dev_err(dd, 680 "Cannot allocate buffers_allocated per cpu counters\n" 681 ); 682 return NULL; 683 } 684 685 spin_lock_irqsave(&dd->sc_lock, flags); 686 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); 687 if (ret) { 688 spin_unlock_irqrestore(&dd->sc_lock, flags); 689 free_percpu(sc->buffers_allocated); 690 kfree(sc); 691 return NULL; 692 } 693 694 sci = &dd->send_contexts[sw_index]; 695 sci->sc = sc; 696 697 sc->dd = dd; 698 sc->node = numa; 699 sc->type = type; 700 spin_lock_init(&sc->alloc_lock); 701 spin_lock_init(&sc->release_lock); 702 spin_lock_init(&sc->credit_ctrl_lock); 703 seqlock_init(&sc->waitlock); 704 INIT_LIST_HEAD(&sc->piowait); 705 INIT_WORK(&sc->halt_work, sc_halted); 706 init_waitqueue_head(&sc->halt_wait); 707 708 /* grouping is always single context for now */ 709 sc->group = 0; 710 711 sc->sw_index = sw_index; 712 sc->hw_context = hw_context; 713 cr_group_addresses(sc, &dma); 714 sc->credits = sci->credits; 715 sc->size = sc->credits * PIO_BLOCK_SIZE; 716 717/* PIO Send Memory Address details */ 718#define PIO_ADDR_CONTEXT_MASK 0xfful 719#define PIO_ADDR_CONTEXT_SHIFT 16 720 sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK) 721 << PIO_ADDR_CONTEXT_SHIFT); 722 723 /* set base and credits */ 724 reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK)) 725 << SC(CTRL_CTXT_DEPTH_SHIFT)) 726 | ((sci->base & SC(CTRL_CTXT_BASE_MASK)) 727 << SC(CTRL_CTXT_BASE_SHIFT)); 728 write_kctxt_csr(dd, hw_context, SC(CTRL), reg); 729 730 set_pio_integrity(sc); 731 732 /* unmask all errors */ 733 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1); 734 735 /* set the default partition key */ 736 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 737 (SC(CHECK_PARTITION_KEY_VALUE_MASK) & 738 DEFAULT_PKEY) << 739 SC(CHECK_PARTITION_KEY_VALUE_SHIFT)); 740 741 /* per context type checks */ 742 if (type == SC_USER) { 743 opval = USER_OPCODE_CHECK_VAL; 744 opmask = USER_OPCODE_CHECK_MASK; 745 } else { 746 opval = OPCODE_CHECK_VAL_DISABLED; 747 opmask = OPCODE_CHECK_MASK_DISABLED; 748 } 749 750 /* set the send context check opcode mask and value */ 751 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 752 ((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) | 753 ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); 754 755 /* set up credit return */ 756 reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); 757 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); 758 759 /* 760 * Calculate the initial credit return threshold. 761 * 762 * For Ack contexts, set a threshold for half the credits. 763 * For User contexts use the given percentage. This has been 764 * sanitized on driver start-up. 765 * For Kernel contexts, use the default MTU plus a header 766 * or half the credits, whichever is smaller. This should 767 * work for both the 3-deep buffering allocation and the 768 * pooling allocation. 769 */ 770 if (type == SC_ACK) { 771 thresh = sc_percent_to_threshold(sc, 50); 772 } else if (type == SC_USER) { 773 thresh = sc_percent_to_threshold(sc, 774 user_credit_return_threshold); 775 } else { /* kernel */ 776 thresh = min(sc_percent_to_threshold(sc, 50), 777 sc_mtu_to_threshold(sc, hfi1_max_mtu, 778 hdrqentsize)); 779 } 780 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 781 /* add in early return */ 782 if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN)) 783 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 784 else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN)) /* kernel, ack */ 785 reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK); 786 787 /* set up write-through credit_ctrl */ 788 sc->credit_ctrl = reg; 789 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg); 790 791 /* User send contexts should not allow sending on VL15 */ 792 if (type == SC_USER) { 793 reg = 1ULL << 15; 794 write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg); 795 } 796 797 spin_unlock_irqrestore(&dd->sc_lock, flags); 798 799 /* 800 * Allocate shadow ring to track outstanding PIO buffers _after_ 801 * unlocking. We don't know the size until the lock is held and 802 * we can't allocate while the lock is held. No one is using 803 * the context yet, so allocate it now. 804 * 805 * User contexts do not get a shadow ring. 806 */ 807 if (type != SC_USER) { 808 /* 809 * Size the shadow ring 1 larger than the number of credits 810 * so head == tail can mean empty. 811 */ 812 sc->sr_size = sci->credits + 1; 813 sc->sr = kcalloc_node(sc->sr_size, 814 sizeof(union pio_shadow_ring), 815 GFP_KERNEL, numa); 816 if (!sc->sr) { 817 sc_free(sc); 818 return NULL; 819 } 820 } 821 822 hfi1_cdbg(PIO, 823 "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n", 824 sw_index, 825 hw_context, 826 sc_type_name(type), 827 sc->group, 828 sc->credits, 829 sc->credit_ctrl, 830 thresh); 831 832 return sc; 833} 834 835/* free a per-NUMA send context structure */ 836void sc_free(struct send_context *sc) 837{ 838 struct hfi1_devdata *dd; 839 unsigned long flags; 840 u32 sw_index; 841 u32 hw_context; 842 843 if (!sc) 844 return; 845 846 sc->flags |= SCF_IN_FREE; /* ensure no restarts */ 847 dd = sc->dd; 848 if (!list_empty(&sc->piowait)) 849 dd_dev_err(dd, "piowait list not empty!\n"); 850 sw_index = sc->sw_index; 851 hw_context = sc->hw_context; 852 sc_disable(sc); /* make sure the HW is disabled */ 853 flush_work(&sc->halt_work); 854 855 spin_lock_irqsave(&dd->sc_lock, flags); 856 dd->send_contexts[sw_index].sc = NULL; 857 858 /* clear/disable all registers set in sc_alloc */ 859 write_kctxt_csr(dd, hw_context, SC(CTRL), 0); 860 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0); 861 write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0); 862 write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0); 863 write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0); 864 write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0); 865 write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0); 866 867 /* release the index and context for re-use */ 868 sc_hw_free(dd, sw_index, hw_context); 869 spin_unlock_irqrestore(&dd->sc_lock, flags); 870 871 kfree(sc->sr); 872 free_percpu(sc->buffers_allocated); 873 kfree(sc); 874} 875 876/* disable the context */ 877void sc_disable(struct send_context *sc) 878{ 879 u64 reg; 880 struct pio_buf *pbuf; 881 LIST_HEAD(wake_list); 882 883 if (!sc) 884 return; 885 886 /* do all steps, even if already disabled */ 887 spin_lock_irq(&sc->alloc_lock); 888 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL)); 889 reg &= ~SC(CTRL_CTXT_ENABLE_SMASK); 890 sc->flags &= ~SCF_ENABLED; 891 sc_wait_for_packet_egress(sc, 1); 892 write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg); 893 894 /* 895 * Flush any waiters. Once the context is disabled, 896 * credit return interrupts are stopped (although there 897 * could be one in-process when the context is disabled). 898 * Wait one microsecond for any lingering interrupts, then 899 * proceed with the flush. 900 */ 901 udelay(1); 902 spin_lock(&sc->release_lock); 903 if (sc->sr) { /* this context has a shadow ring */ 904 while (sc->sr_tail != sc->sr_head) { 905 pbuf = &sc->sr[sc->sr_tail].pbuf; 906 if (pbuf->cb) 907 (*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE); 908 sc->sr_tail++; 909 if (sc->sr_tail >= sc->sr_size) 910 sc->sr_tail = 0; 911 } 912 } 913 spin_unlock(&sc->release_lock); 914 915 write_seqlock(&sc->waitlock); 916 if (!list_empty(&sc->piowait)) 917 list_move(&sc->piowait, &wake_list); 918 write_sequnlock(&sc->waitlock); 919 while (!list_empty(&wake_list)) { 920 struct iowait *wait; 921 struct rvt_qp *qp; 922 struct hfi1_qp_priv *priv; 923 924 wait = list_first_entry(&wake_list, struct iowait, list); 925 qp = iowait_to_qp(wait); 926 priv = qp->priv; 927 list_del_init(&priv->s_iowait.list); 928 priv->s_iowait.lock = NULL; 929 hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 930 } 931 932 spin_unlock_irq(&sc->alloc_lock); 933} 934 935/* return SendEgressCtxtStatus.PacketOccupancy */ 936static u64 packet_occupancy(u64 reg) 937{ 938 return (reg & 939 SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) 940 >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; 941} 942 943/* is egress halted on the context? */ 944static bool egress_halted(u64 reg) 945{ 946 return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); 947} 948 949/* is the send context halted? */ 950static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) 951{ 952 return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & 953 SC(STATUS_CTXT_HALTED_SMASK)); 954} 955 956/** 957 * sc_wait_for_packet_egress - wait for packet 958 * @sc: valid send context 959 * @pause: wait for credit return 960 * 961 * Wait for packet egress, optionally pause for credit return 962 * 963 * Egress halt and Context halt are not necessarily the same thing, so 964 * check for both. 965 * 966 * NOTE: The context halt bit may not be set immediately. Because of this, 967 * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW 968 * context bit to determine if the context is halted. 969 */ 970static void sc_wait_for_packet_egress(struct send_context *sc, int pause) 971{ 972 struct hfi1_devdata *dd = sc->dd; 973 u64 reg = 0; 974 u64 reg_prev; 975 u32 loop = 0; 976 977 while (1) { 978 reg_prev = reg; 979 reg = read_csr(dd, sc->hw_context * 8 + 980 SEND_EGRESS_CTXT_STATUS); 981 /* done if any halt bits, SW or HW are set */ 982 if (sc->flags & SCF_HALTED || 983 is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) 984 break; 985 reg = packet_occupancy(reg); 986 if (reg == 0) 987 break; 988 /* counter is reset if occupancy count changes */ 989 if (reg != reg_prev) 990 loop = 0; 991 if (loop > 50000) { 992 /* timed out - bounce the link */ 993 dd_dev_err(dd, 994 "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", 995 __func__, sc->sw_index, 996 sc->hw_context, (u32)reg); 997 queue_work(dd->pport->link_wq, 998 &dd->pport->link_bounce_work); 999 break; 1000 } 1001 loop++; 1002 udelay(1); 1003 } 1004 1005 if (pause) 1006 /* Add additional delay to ensure chip returns all credits */ 1007 pause_for_credit_return(dd); 1008} 1009 1010void sc_wait(struct hfi1_devdata *dd) 1011{ 1012 int i; 1013 1014 for (i = 0; i < dd->num_send_contexts; i++) { 1015 struct send_context *sc = dd->send_contexts[i].sc; 1016 1017 if (!sc) 1018 continue; 1019 sc_wait_for_packet_egress(sc, 0); 1020 } 1021} 1022 1023/* 1024 * Restart a context after it has been halted due to error. 1025 * 1026 * If the first step fails - wait for the halt to be asserted, return early. 1027 * Otherwise complain about timeouts but keep going. 1028 * 1029 * It is expected that allocations (enabled flag bit) have been shut off 1030 * already (only applies to kernel contexts). 1031 */ 1032int sc_restart(struct send_context *sc) 1033{ 1034 struct hfi1_devdata *dd = sc->dd; 1035 u64 reg; 1036 u32 loop; 1037 int count; 1038 1039 /* bounce off if not halted, or being free'd */ 1040 if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE)) 1041 return -EINVAL; 1042 1043 dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index, 1044 sc->hw_context); 1045 1046 /* 1047 * Step 1: Wait for the context to actually halt. 1048 * 1049 * The error interrupt is asynchronous to actually setting halt 1050 * on the context. 1051 */ 1052 loop = 0; 1053 while (1) { 1054 reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS)); 1055 if (reg & SC(STATUS_CTXT_HALTED_SMASK)) 1056 break; 1057 if (loop > 100) { 1058 dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n", 1059 __func__, sc->sw_index, sc->hw_context); 1060 return -ETIME; 1061 } 1062 loop++; 1063 udelay(1); 1064 } 1065 1066 /* 1067 * Step 2: Ensure no users are still trying to write to PIO. 1068 * 1069 * For kernel contexts, we have already turned off buffer allocation. 1070 * Now wait for the buffer count to go to zero. 1071 * 1072 * For user contexts, the user handling code has cut off write access 1073 * to the context's PIO pages before calling this routine and will 1074 * restore write access after this routine returns. 1075 */ 1076 if (sc->type != SC_USER) { 1077 /* kernel context */ 1078 loop = 0; 1079 while (1) { 1080 count = get_buffers_allocated(sc); 1081 if (count == 0) 1082 break; 1083 if (loop > 100) { 1084 dd_dev_err(dd, 1085 "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n", 1086 __func__, sc->sw_index, 1087 sc->hw_context, count); 1088 } 1089 loop++; 1090 udelay(1); 1091 } 1092 } 1093 1094 /* 1095 * Step 3: Wait for all packets to egress. 1096 * This is done while disabling the send context 1097 * 1098 * Step 4: Disable the context 1099 * 1100 * This is a superset of the halt. After the disable, the 1101 * errors can be cleared. 1102 */ 1103 sc_disable(sc); 1104 1105 /* 1106 * Step 5: Enable the context 1107 * 1108 * This enable will clear the halted flag and per-send context 1109 * error flags. 1110 */ 1111 return sc_enable(sc); 1112} 1113 1114/* 1115 * PIO freeze processing. To be called after the TXE block is fully frozen. 1116 * Go through all frozen send contexts and disable them. The contexts are 1117 * already stopped by the freeze. 1118 */ 1119void pio_freeze(struct hfi1_devdata *dd) 1120{ 1121 struct send_context *sc; 1122 int i; 1123 1124 for (i = 0; i < dd->num_send_contexts; i++) { 1125 sc = dd->send_contexts[i].sc; 1126 /* 1127 * Don't disable unallocated, unfrozen, or user send contexts. 1128 * User send contexts will be disabled when the process 1129 * calls into the driver to reset its context. 1130 */ 1131 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1132 continue; 1133 1134 /* only need to disable, the context is already stopped */ 1135 sc_disable(sc); 1136 } 1137} 1138 1139/* 1140 * Unfreeze PIO for kernel send contexts. The precondition for calling this 1141 * is that all PIO send contexts have been disabled and the SPC freeze has 1142 * been cleared. Now perform the last step and re-enable each kernel context. 1143 * User (PSM) processing will occur when PSM calls into the kernel to 1144 * acknowledge the freeze. 1145 */ 1146void pio_kernel_unfreeze(struct hfi1_devdata *dd) 1147{ 1148 struct send_context *sc; 1149 int i; 1150 1151 for (i = 0; i < dd->num_send_contexts; i++) { 1152 sc = dd->send_contexts[i].sc; 1153 if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER) 1154 continue; 1155 if (sc->flags & SCF_LINK_DOWN) 1156 continue; 1157 1158 sc_enable(sc); /* will clear the sc frozen flag */ 1159 } 1160} 1161 1162/** 1163 * pio_kernel_linkup() - Re-enable send contexts after linkup event 1164 * @dd: valid devive data 1165 * 1166 * When the link goes down, the freeze path is taken. However, a link down 1167 * event is different from a freeze because if the send context is re-enabled 1168 * whowever is sending data will start sending data again, which will hang 1169 * any QP that is sending data. 1170 * 1171 * The freeze path now looks at the type of event that occurs and takes this 1172 * path for link down event. 1173 */ 1174void pio_kernel_linkup(struct hfi1_devdata *dd) 1175{ 1176 struct send_context *sc; 1177 int i; 1178 1179 for (i = 0; i < dd->num_send_contexts; i++) { 1180 sc = dd->send_contexts[i].sc; 1181 if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER) 1182 continue; 1183 1184 sc_enable(sc); /* will clear the sc link down flag */ 1185 } 1186} 1187 1188/* 1189 * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear. 1190 * Returns: 1191 * -ETIMEDOUT - if we wait too long 1192 * -EIO - if there was an error 1193 */ 1194static int pio_init_wait_progress(struct hfi1_devdata *dd) 1195{ 1196 u64 reg; 1197 int max, count = 0; 1198 1199 /* max is the longest possible HW init time / delay */ 1200 max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5; 1201 while (1) { 1202 reg = read_csr(dd, SEND_PIO_INIT_CTXT); 1203 if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK)) 1204 break; 1205 if (count >= max) 1206 return -ETIMEDOUT; 1207 udelay(5); 1208 count++; 1209 } 1210 1211 return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0; 1212} 1213 1214/* 1215 * Reset all of the send contexts to their power-on state. Used 1216 * only during manual init - no lock against sc_enable needed. 1217 */ 1218void pio_reset_all(struct hfi1_devdata *dd) 1219{ 1220 int ret; 1221 1222 /* make sure the init engine is not busy */ 1223 ret = pio_init_wait_progress(dd); 1224 /* ignore any timeout */ 1225 if (ret == -EIO) { 1226 /* clear the error */ 1227 write_csr(dd, SEND_PIO_ERR_CLEAR, 1228 SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK); 1229 } 1230 1231 /* reset init all */ 1232 write_csr(dd, SEND_PIO_INIT_CTXT, 1233 SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK); 1234 udelay(2); 1235 ret = pio_init_wait_progress(dd); 1236 if (ret < 0) { 1237 dd_dev_err(dd, 1238 "PIO send context init %s while initializing all PIO blocks\n", 1239 ret == -ETIMEDOUT ? "is stuck" : "had an error"); 1240 } 1241} 1242 1243/* enable the context */ 1244int sc_enable(struct send_context *sc) 1245{ 1246 u64 sc_ctrl, reg, pio; 1247 struct hfi1_devdata *dd; 1248 unsigned long flags; 1249 int ret = 0; 1250 1251 if (!sc) 1252 return -EINVAL; 1253 dd = sc->dd; 1254 1255 /* 1256 * Obtain the allocator lock to guard against any allocation 1257 * attempts (which should not happen prior to context being 1258 * enabled). On the release/disable side we don't need to 1259 * worry about locking since the releaser will not do anything 1260 * if the context accounting values have not changed. 1261 */ 1262 spin_lock_irqsave(&sc->alloc_lock, flags); 1263 sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1264 if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK))) 1265 goto unlock; /* already enabled */ 1266 1267 /* IMPORTANT: only clear free and fill if transitioning 0 -> 1 */ 1268 1269 *sc->hw_free = 0; 1270 sc->free = 0; 1271 sc->alloc_free = 0; 1272 sc->fill = 0; 1273 sc->fill_wrap = 0; 1274 sc->sr_head = 0; 1275 sc->sr_tail = 0; 1276 sc->flags = 0; 1277 /* the alloc lock insures no fast path allocation */ 1278 reset_buffers_allocated(sc); 1279 1280 /* 1281 * Clear all per-context errors. Some of these will be set when 1282 * we are re-enabling after a context halt. Now that the context 1283 * is disabled, the halt will not clear until after the PIO init 1284 * engine runs below. 1285 */ 1286 reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS)); 1287 if (reg) 1288 write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg); 1289 1290 /* 1291 * The HW PIO initialization engine can handle only one init 1292 * request at a time. Serialize access to each device's engine. 1293 */ 1294 spin_lock(&dd->sc_init_lock); 1295 /* 1296 * Since access to this code block is serialized and 1297 * each access waits for the initialization to complete 1298 * before releasing the lock, the PIO initialization engine 1299 * should not be in use, so we don't have to wait for the 1300 * InProgress bit to go down. 1301 */ 1302 pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) << 1303 SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) | 1304 SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK; 1305 write_csr(dd, SEND_PIO_INIT_CTXT, pio); 1306 /* 1307 * Wait until the engine is done. Give the chip the required time 1308 * so, hopefully, we read the register just once. 1309 */ 1310 udelay(2); 1311 ret = pio_init_wait_progress(dd); 1312 spin_unlock(&dd->sc_init_lock); 1313 if (ret) { 1314 dd_dev_err(dd, 1315 "sctxt%u(%u): Context not enabled due to init failure %d\n", 1316 sc->sw_index, sc->hw_context, ret); 1317 goto unlock; 1318 } 1319 1320 /* 1321 * All is well. Enable the context. 1322 */ 1323 sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK); 1324 write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl); 1325 /* 1326 * Read SendCtxtCtrl to force the write out and prevent a timing 1327 * hazard where a PIO write may reach the context before the enable. 1328 */ 1329 read_kctxt_csr(dd, sc->hw_context, SC(CTRL)); 1330 sc->flags |= SCF_ENABLED; 1331 1332unlock: 1333 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1334 1335 return ret; 1336} 1337 1338/* force a credit return on the context */ 1339void sc_return_credits(struct send_context *sc) 1340{ 1341 if (!sc) 1342 return; 1343 1344 /* a 0->1 transition schedules a credit return */ 1345 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 1346 SC(CREDIT_FORCE_FORCE_RETURN_SMASK)); 1347 /* 1348 * Ensure that the write is flushed and the credit return is 1349 * scheduled. We care more about the 0 -> 1 transition. 1350 */ 1351 read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE)); 1352 /* set back to 0 for next time */ 1353 write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0); 1354} 1355 1356/* allow all in-flight packets to drain on the context */ 1357void sc_flush(struct send_context *sc) 1358{ 1359 if (!sc) 1360 return; 1361 1362 sc_wait_for_packet_egress(sc, 1); 1363} 1364 1365/* drop all packets on the context, no waiting until they are sent */ 1366void sc_drop(struct send_context *sc) 1367{ 1368 if (!sc) 1369 return; 1370 1371 dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n", 1372 __func__, sc->sw_index, sc->hw_context); 1373} 1374 1375/* 1376 * Start the software reaction to a context halt or SPC freeze: 1377 * - mark the context as halted or frozen 1378 * - stop buffer allocations 1379 * 1380 * Called from the error interrupt. Other work is deferred until 1381 * out of the interrupt. 1382 */ 1383void sc_stop(struct send_context *sc, int flag) 1384{ 1385 unsigned long flags; 1386 1387 /* stop buffer allocations */ 1388 spin_lock_irqsave(&sc->alloc_lock, flags); 1389 /* mark the context */ 1390 sc->flags |= flag; 1391 sc->flags &= ~SCF_ENABLED; 1392 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1393 wake_up(&sc->halt_wait); 1394} 1395 1396#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32)) 1397#define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS) 1398 1399/* 1400 * The send context buffer "allocator". 1401 * 1402 * @sc: the PIO send context we are allocating from 1403 * @len: length of whole packet - including PBC - in dwords 1404 * @cb: optional callback to call when the buffer is finished sending 1405 * @arg: argument for cb 1406 * 1407 * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM 1408 * when link is down. 1409 */ 1410struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, 1411 pio_release_cb cb, void *arg) 1412{ 1413 struct pio_buf *pbuf = NULL; 1414 unsigned long flags; 1415 unsigned long avail; 1416 unsigned long blocks = dwords_to_blocks(dw_len); 1417 u32 fill_wrap; 1418 int trycount = 0; 1419 u32 head, next; 1420 1421 spin_lock_irqsave(&sc->alloc_lock, flags); 1422 if (!(sc->flags & SCF_ENABLED)) { 1423 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1424 return ERR_PTR(-ECOMM); 1425 } 1426 1427retry: 1428 avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free); 1429 if (blocks > avail) { 1430 /* not enough room */ 1431 if (unlikely(trycount)) { /* already tried to get more room */ 1432 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1433 goto done; 1434 } 1435 /* copy from receiver cache line and recalculate */ 1436 sc->alloc_free = READ_ONCE(sc->free); 1437 avail = 1438 (unsigned long)sc->credits - 1439 (sc->fill - sc->alloc_free); 1440 if (blocks > avail) { 1441 /* still no room, actively update */ 1442 sc_release_update(sc); 1443 sc->alloc_free = READ_ONCE(sc->free); 1444 trycount++; 1445 goto retry; 1446 } 1447 } 1448 1449 /* there is enough room */ 1450 1451 preempt_disable(); 1452 this_cpu_inc(*sc->buffers_allocated); 1453 1454 /* read this once */ 1455 head = sc->sr_head; 1456 1457 /* "allocate" the buffer */ 1458 sc->fill += blocks; 1459 fill_wrap = sc->fill_wrap; 1460 sc->fill_wrap += blocks; 1461 if (sc->fill_wrap >= sc->credits) 1462 sc->fill_wrap = sc->fill_wrap - sc->credits; 1463 1464 /* 1465 * Fill the parts that the releaser looks at before moving the head. 1466 * The only necessary piece is the sent_at field. The credits 1467 * we have just allocated cannot have been returned yet, so the 1468 * cb and arg will not be looked at for a "while". Put them 1469 * on this side of the memory barrier anyway. 1470 */ 1471 pbuf = &sc->sr[head].pbuf; 1472 pbuf->sent_at = sc->fill; 1473 pbuf->cb = cb; 1474 pbuf->arg = arg; 1475 pbuf->sc = sc; /* could be filled in at sc->sr init time */ 1476 /* make sure this is in memory before updating the head */ 1477 1478 /* calculate next head index, do not store */ 1479 next = head + 1; 1480 if (next >= sc->sr_size) 1481 next = 0; 1482 /* 1483 * update the head - must be last! - the releaser can look at fields 1484 * in pbuf once we move the head 1485 */ 1486 smp_wmb(); 1487 sc->sr_head = next; 1488 spin_unlock_irqrestore(&sc->alloc_lock, flags); 1489 1490 /* finish filling in the buffer outside the lock */ 1491 pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; 1492 pbuf->end = sc->base_addr + sc->size; 1493 pbuf->qw_written = 0; 1494 pbuf->carry_bytes = 0; 1495 pbuf->carry.val64 = 0; 1496done: 1497 return pbuf; 1498} 1499 1500/* 1501 * There are at least two entities that can turn on credit return 1502 * interrupts and they can overlap. Avoid problems by implementing 1503 * a count scheme that is enforced by a lock. The lock is needed because 1504 * the count and CSR write must be paired. 1505 */ 1506 1507/* 1508 * Start credit return interrupts. This is managed by a count. If already 1509 * on, just increment the count. 1510 */ 1511void sc_add_credit_return_intr(struct send_context *sc) 1512{ 1513 unsigned long flags; 1514 1515 /* lock must surround both the count change and the CSR update */ 1516 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1517 if (sc->credit_intr_count == 0) { 1518 sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1519 write_kctxt_csr(sc->dd, sc->hw_context, 1520 SC(CREDIT_CTRL), sc->credit_ctrl); 1521 } 1522 sc->credit_intr_count++; 1523 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1524} 1525 1526/* 1527 * Stop credit return interrupts. This is managed by a count. Decrement the 1528 * count, if the last user, then turn the credit interrupts off. 1529 */ 1530void sc_del_credit_return_intr(struct send_context *sc) 1531{ 1532 unsigned long flags; 1533 1534 WARN_ON(sc->credit_intr_count == 0); 1535 1536 /* lock must surround both the count change and the CSR update */ 1537 spin_lock_irqsave(&sc->credit_ctrl_lock, flags); 1538 sc->credit_intr_count--; 1539 if (sc->credit_intr_count == 0) { 1540 sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK); 1541 write_kctxt_csr(sc->dd, sc->hw_context, 1542 SC(CREDIT_CTRL), sc->credit_ctrl); 1543 } 1544 spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags); 1545} 1546 1547/* 1548 * The caller must be careful when calling this. All needint calls 1549 * must be paired with !needint. 1550 */ 1551void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) 1552{ 1553 if (needint) 1554 sc_add_credit_return_intr(sc); 1555 else 1556 sc_del_credit_return_intr(sc); 1557 trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); 1558 if (needint) 1559 sc_return_credits(sc); 1560} 1561 1562/** 1563 * sc_piobufavail - callback when a PIO buffer is available 1564 * @sc: the send context 1565 * 1566 * This is called from the interrupt handler when a PIO buffer is 1567 * available after hfi1_verbs_send() returned an error that no buffers were 1568 * available. Disable the interrupt if there are no more QPs waiting. 1569 */ 1570static void sc_piobufavail(struct send_context *sc) 1571{ 1572 struct hfi1_devdata *dd = sc->dd; 1573 struct list_head *list; 1574 struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; 1575 struct rvt_qp *qp; 1576 struct hfi1_qp_priv *priv; 1577 unsigned long flags; 1578 uint i, n = 0, top_idx = 0; 1579 1580 if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && 1581 dd->send_contexts[sc->sw_index].type != SC_VL15) 1582 return; 1583 list = &sc->piowait; 1584 /* 1585 * Note: checking that the piowait list is empty and clearing 1586 * the buffer available interrupt needs to be atomic or we 1587 * could end up with QPs on the wait list with the interrupt 1588 * disabled. 1589 */ 1590 write_seqlock_irqsave(&sc->waitlock, flags); 1591 while (!list_empty(list)) { 1592 struct iowait *wait; 1593 1594 if (n == ARRAY_SIZE(qps)) 1595 break; 1596 wait = list_first_entry(list, struct iowait, list); 1597 iowait_get_priority(wait); 1598 qp = iowait_to_qp(wait); 1599 priv = qp->priv; 1600 list_del_init(&priv->s_iowait.list); 1601 priv->s_iowait.lock = NULL; 1602 if (n) { 1603 priv = qps[top_idx]->priv; 1604 top_idx = iowait_priority_update_top(wait, 1605 &priv->s_iowait, 1606 n, top_idx); 1607 } 1608 1609 /* refcount held until actual wake up */ 1610 qps[n++] = qp; 1611 } 1612 /* 1613 * If there had been waiters and there are more 1614 * insure that we redo the force to avoid a potential hang. 1615 */ 1616 if (n) { 1617 hfi1_sc_wantpiobuf_intr(sc, 0); 1618 if (!list_empty(list)) 1619 hfi1_sc_wantpiobuf_intr(sc, 1); 1620 } 1621 write_sequnlock_irqrestore(&sc->waitlock, flags); 1622 1623 /* Wake up the top-priority one first */ 1624 if (n) 1625 hfi1_qp_wakeup(qps[top_idx], 1626 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1627 for (i = 0; i < n; i++) 1628 if (i != top_idx) 1629 hfi1_qp_wakeup(qps[i], 1630 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); 1631} 1632 1633/* translate a send credit update to a bit code of reasons */ 1634static inline int fill_code(u64 hw_free) 1635{ 1636 int code = 0; 1637 1638 if (hw_free & CR_STATUS_SMASK) 1639 code |= PRC_STATUS_ERR; 1640 if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK) 1641 code |= PRC_PBC; 1642 if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK) 1643 code |= PRC_THRESHOLD; 1644 if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK) 1645 code |= PRC_FILL_ERR; 1646 if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK) 1647 code |= PRC_SC_DISABLE; 1648 return code; 1649} 1650 1651/* use the jiffies compare to get the wrap right */ 1652#define sent_before(a, b) time_before(a, b) /* a < b */ 1653 1654/* 1655 * The send context buffer "releaser". 1656 */ 1657void sc_release_update(struct send_context *sc) 1658{ 1659 struct pio_buf *pbuf; 1660 u64 hw_free; 1661 u32 head, tail; 1662 unsigned long old_free; 1663 unsigned long free; 1664 unsigned long extra; 1665 unsigned long flags; 1666 int code; 1667 1668 if (!sc) 1669 return; 1670 1671 spin_lock_irqsave(&sc->release_lock, flags); 1672 /* update free */ 1673 hw_free = le64_to_cpu(*sc->hw_free); /* volatile read */ 1674 old_free = sc->free; 1675 extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT) 1676 - (old_free & CR_COUNTER_MASK)) 1677 & CR_COUNTER_MASK; 1678 free = old_free + extra; 1679 trace_hfi1_piofree(sc, extra); 1680 1681 /* call sent buffer callbacks */ 1682 code = -1; /* code not yet set */ 1683 head = READ_ONCE(sc->sr_head); /* snapshot the head */ 1684 tail = sc->sr_tail; 1685 while (head != tail) { 1686 pbuf = &sc->sr[tail].pbuf; 1687 1688 if (sent_before(free, pbuf->sent_at)) { 1689 /* not sent yet */ 1690 break; 1691 } 1692 if (pbuf->cb) { 1693 if (code < 0) /* fill in code on first user */ 1694 code = fill_code(hw_free); 1695 (*pbuf->cb)(pbuf->arg, code); 1696 } 1697 1698 tail++; 1699 if (tail >= sc->sr_size) 1700 tail = 0; 1701 } 1702 sc->sr_tail = tail; 1703 /* make sure tail is updated before free */ 1704 smp_wmb(); 1705 sc->free = free; 1706 spin_unlock_irqrestore(&sc->release_lock, flags); 1707 sc_piobufavail(sc); 1708} 1709 1710/* 1711 * Send context group releaser. Argument is the send context that caused 1712 * the interrupt. Called from the send context interrupt handler. 1713 * 1714 * Call release on all contexts in the group. 1715 * 1716 * This routine takes the sc_lock without an irqsave because it is only 1717 * called from an interrupt handler. Adjust if that changes. 1718 */ 1719void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context) 1720{ 1721 struct send_context *sc; 1722 u32 sw_index; 1723 u32 gc, gc_end; 1724 1725 spin_lock(&dd->sc_lock); 1726 sw_index = dd->hw_to_sw[hw_context]; 1727 if (unlikely(sw_index >= dd->num_send_contexts)) { 1728 dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n", 1729 __func__, hw_context, sw_index); 1730 goto done; 1731 } 1732 sc = dd->send_contexts[sw_index].sc; 1733 if (unlikely(!sc)) 1734 goto done; 1735 1736 gc = group_context(hw_context, sc->group); 1737 gc_end = gc + group_size(sc->group); 1738 for (; gc < gc_end; gc++) { 1739 sw_index = dd->hw_to_sw[gc]; 1740 if (unlikely(sw_index >= dd->num_send_contexts)) { 1741 dd_dev_err(dd, 1742 "%s: invalid hw (%u) to sw (%u) mapping\n", 1743 __func__, hw_context, sw_index); 1744 continue; 1745 } 1746 sc_release_update(dd->send_contexts[sw_index].sc); 1747 } 1748done: 1749 spin_unlock(&dd->sc_lock); 1750} 1751 1752/* 1753 * pio_select_send_context_vl() - select send context 1754 * @dd: devdata 1755 * @selector: a spreading factor 1756 * @vl: this vl 1757 * 1758 * This function returns a send context based on the selector and a vl. 1759 * The mapping fields are protected by RCU 1760 */ 1761struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd, 1762 u32 selector, u8 vl) 1763{ 1764 struct pio_vl_map *m; 1765 struct pio_map_elem *e; 1766 struct send_context *rval; 1767 1768 /* 1769 * NOTE This should only happen if SC->VL changed after the initial 1770 * checks on the QP/AH 1771 * Default will return VL0's send context below 1772 */ 1773 if (unlikely(vl >= num_vls)) { 1774 rval = NULL; 1775 goto done; 1776 } 1777 1778 rcu_read_lock(); 1779 m = rcu_dereference(dd->pio_map); 1780 if (unlikely(!m)) { 1781 rcu_read_unlock(); 1782 return dd->vld[0].sc; 1783 } 1784 e = m->map[vl & m->mask]; 1785 rval = e->ksc[selector & e->mask]; 1786 rcu_read_unlock(); 1787 1788done: 1789 rval = !rval ? dd->vld[0].sc : rval; 1790 return rval; 1791} 1792 1793/* 1794 * pio_select_send_context_sc() - select send context 1795 * @dd: devdata 1796 * @selector: a spreading factor 1797 * @sc5: the 5 bit sc 1798 * 1799 * This function returns an send context based on the selector and an sc 1800 */ 1801struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd, 1802 u32 selector, u8 sc5) 1803{ 1804 u8 vl = sc_to_vlt(dd, sc5); 1805 1806 return pio_select_send_context_vl(dd, selector, vl); 1807} 1808 1809/* 1810 * Free the indicated map struct 1811 */ 1812static void pio_map_free(struct pio_vl_map *m) 1813{ 1814 int i; 1815 1816 for (i = 0; m && i < m->actual_vls; i++) 1817 kfree(m->map[i]); 1818 kfree(m); 1819} 1820 1821/* 1822 * Handle RCU callback 1823 */ 1824static void pio_map_rcu_callback(struct rcu_head *list) 1825{ 1826 struct pio_vl_map *m = container_of(list, struct pio_vl_map, list); 1827 1828 pio_map_free(m); 1829} 1830 1831/* 1832 * Set credit return threshold for the kernel send context 1833 */ 1834static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) 1835{ 1836 u32 thres; 1837 1838 thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], 1839 50), 1840 sc_mtu_to_threshold(dd->kernel_send_context[scontext], 1841 dd->vld[i].mtu, 1842 dd->rcd[0]->rcvhdrqentsize)); 1843 sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); 1844} 1845 1846/* 1847 * pio_map_init - called when #vls change 1848 * @dd: hfi1_devdata 1849 * @port: port number 1850 * @num_vls: number of vls 1851 * @vl_scontexts: per vl send context mapping (optional) 1852 * 1853 * This routine changes the mapping based on the number of vls. 1854 * 1855 * vl_scontexts is used to specify a non-uniform vl/send context 1856 * loading. NULL implies auto computing the loading and giving each 1857 * VL an uniform distribution of send contexts per VL. 1858 * 1859 * The auto algorithm computers the sc_per_vl and the number of extra 1860 * send contexts. Any extra send contexts are added from the last VL 1861 * on down 1862 * 1863 * rcu locking is used here to control access to the mapping fields. 1864 * 1865 * If either the num_vls or num_send_contexts are non-power of 2, the 1866 * array sizes in the struct pio_vl_map and the struct pio_map_elem are 1867 * rounded up to the next highest power of 2 and the first entry is 1868 * reused in a round robin fashion. 1869 * 1870 * If an error occurs the map change is not done and the mapping is not 1871 * chaged. 1872 * 1873 */ 1874int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) 1875{ 1876 int i, j; 1877 int extra, sc_per_vl; 1878 int scontext = 1; 1879 int num_kernel_send_contexts = 0; 1880 u8 lvl_scontexts[OPA_MAX_VLS]; 1881 struct pio_vl_map *oldmap, *newmap; 1882 1883 if (!vl_scontexts) { 1884 for (i = 0; i < dd->num_send_contexts; i++) 1885 if (dd->send_contexts[i].type == SC_KERNEL) 1886 num_kernel_send_contexts++; 1887 /* truncate divide */ 1888 sc_per_vl = num_kernel_send_contexts / num_vls; 1889 /* extras */ 1890 extra = num_kernel_send_contexts % num_vls; 1891 vl_scontexts = lvl_scontexts; 1892 /* add extras from last vl down */ 1893 for (i = num_vls - 1; i >= 0; i--, extra--) 1894 vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); 1895 } 1896 /* build new map */ 1897 newmap = kzalloc(sizeof(*newmap) + 1898 roundup_pow_of_two(num_vls) * 1899 sizeof(struct pio_map_elem *), 1900 GFP_KERNEL); 1901 if (!newmap) 1902 goto bail; 1903 newmap->actual_vls = num_vls; 1904 newmap->vls = roundup_pow_of_two(num_vls); 1905 newmap->mask = (1 << ilog2(newmap->vls)) - 1; 1906 for (i = 0; i < newmap->vls; i++) { 1907 /* save for wrap around */ 1908 int first_scontext = scontext; 1909 1910 if (i < newmap->actual_vls) { 1911 int sz = roundup_pow_of_two(vl_scontexts[i]); 1912 1913 /* only allocate once */ 1914 newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) + 1915 sz * sizeof(struct 1916 send_context *), 1917 GFP_KERNEL); 1918 if (!newmap->map[i]) 1919 goto bail; 1920 newmap->map[i]->mask = (1 << ilog2(sz)) - 1; 1921 /* 1922 * assign send contexts and 1923 * adjust credit return threshold 1924 */ 1925 for (j = 0; j < sz; j++) { 1926 if (dd->kernel_send_context[scontext]) { 1927 newmap->map[i]->ksc[j] = 1928 dd->kernel_send_context[scontext]; 1929 set_threshold(dd, scontext, i); 1930 } 1931 if (++scontext >= first_scontext + 1932 vl_scontexts[i]) 1933 /* wrap back to first send context */ 1934 scontext = first_scontext; 1935 } 1936 } else { 1937 /* just re-use entry without allocating */ 1938 newmap->map[i] = newmap->map[i % num_vls]; 1939 } 1940 scontext = first_scontext + vl_scontexts[i]; 1941 } 1942 /* newmap in hand, save old map */ 1943 spin_lock_irq(&dd->pio_map_lock); 1944 oldmap = rcu_dereference_protected(dd->pio_map, 1945 lockdep_is_held(&dd->pio_map_lock)); 1946 1947 /* publish newmap */ 1948 rcu_assign_pointer(dd->pio_map, newmap); 1949 1950 spin_unlock_irq(&dd->pio_map_lock); 1951 /* success, free any old map after grace period */ 1952 if (oldmap) 1953 call_rcu(&oldmap->list, pio_map_rcu_callback); 1954 return 0; 1955bail: 1956 /* free any partial allocation */ 1957 pio_map_free(newmap); 1958 return -ENOMEM; 1959} 1960 1961void free_pio_map(struct hfi1_devdata *dd) 1962{ 1963 /* Free PIO map if allocated */ 1964 if (rcu_access_pointer(dd->pio_map)) { 1965 spin_lock_irq(&dd->pio_map_lock); 1966 pio_map_free(rcu_access_pointer(dd->pio_map)); 1967 RCU_INIT_POINTER(dd->pio_map, NULL); 1968 spin_unlock_irq(&dd->pio_map_lock); 1969 synchronize_rcu(); 1970 } 1971 kfree(dd->kernel_send_context); 1972 dd->kernel_send_context = NULL; 1973} 1974 1975int init_pervl_scs(struct hfi1_devdata *dd) 1976{ 1977 int i; 1978 u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */ 1979 u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */ 1980 u32 ctxt; 1981 struct hfi1_pportdata *ppd = dd->pport; 1982 1983 dd->vld[15].sc = sc_alloc(dd, SC_VL15, 1984 dd->rcd[0]->rcvhdrqentsize, dd->node); 1985 if (!dd->vld[15].sc) 1986 return -ENOMEM; 1987 1988 hfi1_init_ctxt(dd->vld[15].sc); 1989 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); 1990 1991 dd->kernel_send_context = kcalloc_node(dd->num_send_contexts, 1992 sizeof(struct send_context *), 1993 GFP_KERNEL, dd->node); 1994 if (!dd->kernel_send_context) 1995 goto freesc15; 1996 1997 dd->kernel_send_context[0] = dd->vld[15].sc; 1998 1999 for (i = 0; i < num_vls; i++) { 2000 /* 2001 * Since this function does not deal with a specific 2002 * receive context but we need the RcvHdrQ entry size, 2003 * use the size from rcd[0]. It is guaranteed to be 2004 * valid at this point and will remain the same for all 2005 * receive contexts. 2006 */ 2007 dd->vld[i].sc = sc_alloc(dd, SC_KERNEL, 2008 dd->rcd[0]->rcvhdrqentsize, dd->node); 2009 if (!dd->vld[i].sc) 2010 goto nomem; 2011 dd->kernel_send_context[i + 1] = dd->vld[i].sc; 2012 hfi1_init_ctxt(dd->vld[i].sc); 2013 /* non VL15 start with the max MTU */ 2014 dd->vld[i].mtu = hfi1_max_mtu; 2015 } 2016 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2017 dd->kernel_send_context[i + 1] = 2018 sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node); 2019 if (!dd->kernel_send_context[i + 1]) 2020 goto nomem; 2021 hfi1_init_ctxt(dd->kernel_send_context[i + 1]); 2022 } 2023 2024 sc_enable(dd->vld[15].sc); 2025 ctxt = dd->vld[15].sc->hw_context; 2026 mask = all_vl_mask & ~(1LL << 15); 2027 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2028 dd_dev_info(dd, 2029 "Using send context %u(%u) for VL15\n", 2030 dd->vld[15].sc->sw_index, ctxt); 2031 2032 for (i = 0; i < num_vls; i++) { 2033 sc_enable(dd->vld[i].sc); 2034 ctxt = dd->vld[i].sc->hw_context; 2035 mask = all_vl_mask & ~(data_vls_mask); 2036 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2037 } 2038 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) { 2039 sc_enable(dd->kernel_send_context[i + 1]); 2040 ctxt = dd->kernel_send_context[i + 1]->hw_context; 2041 mask = all_vl_mask & ~(data_vls_mask); 2042 write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask); 2043 } 2044 2045 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) 2046 goto nomem; 2047 return 0; 2048 2049nomem: 2050 for (i = 0; i < num_vls; i++) { 2051 sc_free(dd->vld[i].sc); 2052 dd->vld[i].sc = NULL; 2053 } 2054 2055 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) 2056 sc_free(dd->kernel_send_context[i + 1]); 2057 2058 kfree(dd->kernel_send_context); 2059 dd->kernel_send_context = NULL; 2060 2061freesc15: 2062 sc_free(dd->vld[15].sc); 2063 return -ENOMEM; 2064} 2065 2066int init_credit_return(struct hfi1_devdata *dd) 2067{ 2068 int ret; 2069 int i; 2070 2071 dd->cr_base = kcalloc( 2072 node_affinity.num_possible_nodes, 2073 sizeof(struct credit_return_base), 2074 GFP_KERNEL); 2075 if (!dd->cr_base) { 2076 ret = -ENOMEM; 2077 goto done; 2078 } 2079 for_each_node_with_cpus(i) { 2080 int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); 2081 2082 set_dev_node(&dd->pcidev->dev, i); 2083 dd->cr_base[i].va = dma_alloc_coherent(&dd->pcidev->dev, 2084 bytes, 2085 &dd->cr_base[i].dma, 2086 GFP_KERNEL); 2087 if (!dd->cr_base[i].va) { 2088 set_dev_node(&dd->pcidev->dev, dd->node); 2089 dd_dev_err(dd, 2090 "Unable to allocate credit return DMA range for NUMA %d\n", 2091 i); 2092 ret = -ENOMEM; 2093 goto done; 2094 } 2095 } 2096 set_dev_node(&dd->pcidev->dev, dd->node); 2097 2098 ret = 0; 2099done: 2100 return ret; 2101} 2102 2103void free_credit_return(struct hfi1_devdata *dd) 2104{ 2105 int i; 2106 2107 if (!dd->cr_base) 2108 return; 2109 for (i = 0; i < node_affinity.num_possible_nodes; i++) { 2110 if (dd->cr_base[i].va) { 2111 dma_free_coherent(&dd->pcidev->dev, 2112 TXE_NUM_CONTEXTS * 2113 sizeof(struct credit_return), 2114 dd->cr_base[i].va, 2115 dd->cr_base[i].dma); 2116 } 2117 } 2118 kfree(dd->cr_base); 2119 dd->cr_base = NULL; 2120} 2121 2122void seqfile_dump_sci(struct seq_file *s, u32 i, 2123 struct send_context_info *sci) 2124{ 2125 struct send_context *sc = sci->sc; 2126 u64 reg; 2127 2128 seq_printf(s, "SCI %u: type %u base %u credits %u\n", 2129 i, sci->type, sci->base, sci->credits); 2130 seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", 2131 sc->flags, sc->sw_index, sc->hw_context, sc->group); 2132 seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", 2133 sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); 2134 seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", 2135 sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); 2136 seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", 2137 sc->credit_intr_count, sc->credit_ctrl); 2138 reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); 2139 seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", 2140 (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> 2141 CR_COUNTER_SHIFT, 2142 (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & 2143 SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), 2144 reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); 2145}