pio_copy.c (18467B)
1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2/* 3 * Copyright(c) 2015, 2016 Intel Corporation. 4 */ 5 6#include "hfi.h" 7 8/* additive distance between non-SOP and SOP space */ 9#define SOP_DISTANCE (TXE_PIO_SIZE / 2) 10#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1) 11/* number of QUADWORDs in a block */ 12#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64)) 13 14/** 15 * pio_copy - copy data block to MMIO space 16 * @dd: hfi1 dev data 17 * @pbuf: a number of blocks allocated within a PIO send context 18 * @pbc: PBC to send 19 * @from: source, must be 8 byte aligned 20 * @count: number of DWORD (32-bit) quantities to copy from source 21 * 22 * Copy data from source to PIO Send Buffer memory, 8 bytes at a time. 23 * Must always write full BLOCK_SIZE bytes blocks. The first block must 24 * be written to the corresponding SOP=1 address. 25 * 26 * Known: 27 * o pbuf->start always starts on a block boundary 28 * o pbuf can wrap only at a block boundary 29 */ 30void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, 31 const void *from, size_t count) 32{ 33 void __iomem *dest = pbuf->start + SOP_DISTANCE; 34 void __iomem *send = dest + PIO_BLOCK_SIZE; 35 void __iomem *dend; /* 8-byte data end */ 36 37 /* write the PBC */ 38 writeq(pbc, dest); 39 dest += sizeof(u64); 40 41 /* calculate where the QWORD data ends - in SOP=1 space */ 42 dend = dest + ((count >> 1) * sizeof(u64)); 43 44 if (dend < send) { 45 /* 46 * all QWORD data is within the SOP block, does *not* 47 * reach the end of the SOP block 48 */ 49 50 while (dest < dend) { 51 writeq(*(u64 *)from, dest); 52 from += sizeof(u64); 53 dest += sizeof(u64); 54 } 55 /* 56 * No boundary checks are needed here: 57 * 0. We're not on the SOP block boundary 58 * 1. The possible DWORD dangle will still be within 59 * the SOP block 60 * 2. We cannot wrap except on a block boundary. 61 */ 62 } else { 63 /* QWORD data extends _to_ or beyond the SOP block */ 64 65 /* write 8-byte SOP chunk data */ 66 while (dest < send) { 67 writeq(*(u64 *)from, dest); 68 from += sizeof(u64); 69 dest += sizeof(u64); 70 } 71 /* drop out of the SOP range */ 72 dest -= SOP_DISTANCE; 73 dend -= SOP_DISTANCE; 74 75 /* 76 * If the wrap comes before or matches the data end, 77 * copy until until the wrap, then wrap. 78 * 79 * If the data ends at the end of the SOP above and 80 * the buffer wraps, then pbuf->end == dend == dest 81 * and nothing will get written, but we will wrap in 82 * case there is a dangling DWORD. 83 */ 84 if (pbuf->end <= dend) { 85 while (dest < pbuf->end) { 86 writeq(*(u64 *)from, dest); 87 from += sizeof(u64); 88 dest += sizeof(u64); 89 } 90 91 dest -= pbuf->sc->size; 92 dend -= pbuf->sc->size; 93 } 94 95 /* write 8-byte non-SOP, non-wrap chunk data */ 96 while (dest < dend) { 97 writeq(*(u64 *)from, dest); 98 from += sizeof(u64); 99 dest += sizeof(u64); 100 } 101 } 102 /* at this point we have wrapped if we are going to wrap */ 103 104 /* write dangling u32, if any */ 105 if (count & 1) { 106 union mix val; 107 108 val.val64 = 0; 109 val.val32[0] = *(u32 *)from; 110 writeq(val.val64, dest); 111 dest += sizeof(u64); 112 } 113 /* 114 * fill in rest of block, no need to check pbuf->end 115 * as we only wrap on a block boundary 116 */ 117 while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { 118 writeq(0, dest); 119 dest += sizeof(u64); 120 } 121 122 /* finished with this buffer */ 123 this_cpu_dec(*pbuf->sc->buffers_allocated); 124 preempt_enable(); 125} 126 127/* 128 * Handle carry bytes using shifts and masks. 129 * 130 * NOTE: the value the unused portion of carry is expected to always be zero. 131 */ 132 133/* 134 * "zero" shift - bit shift used to zero out upper bytes. Input is 135 * the count of LSB bytes to preserve. 136 */ 137#define zshift(x) (8 * (8 - (x))) 138 139/* 140 * "merge" shift - bit shift used to merge with carry bytes. Input is 141 * the LSB byte count to move beyond. 142 */ 143#define mshift(x) (8 * (x)) 144 145/* 146 * Jump copy - no-loop copy for < 8 bytes. 147 */ 148static inline void jcopy(u8 *dest, const u8 *src, u32 n) 149{ 150 switch (n) { 151 case 7: 152 *dest++ = *src++; 153 fallthrough; 154 case 6: 155 *dest++ = *src++; 156 fallthrough; 157 case 5: 158 *dest++ = *src++; 159 fallthrough; 160 case 4: 161 *dest++ = *src++; 162 fallthrough; 163 case 3: 164 *dest++ = *src++; 165 fallthrough; 166 case 2: 167 *dest++ = *src++; 168 fallthrough; 169 case 1: 170 *dest++ = *src++; 171 } 172} 173 174/* 175 * Read nbytes from "from" and and place them in the low bytes 176 * of pbuf->carry. Other bytes are left as-is. Any previous 177 * value in pbuf->carry is lost. 178 * 179 * NOTES: 180 * o do not read from from if nbytes is zero 181 * o from may _not_ be u64 aligned. 182 */ 183static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, 184 unsigned int nbytes) 185{ 186 pbuf->carry.val64 = 0; 187 jcopy(&pbuf->carry.val8[0], from, nbytes); 188 pbuf->carry_bytes = nbytes; 189} 190 191/* 192 * Read nbytes bytes from "from" and put them at the end of pbuf->carry. 193 * It is expected that the extra read does not overfill carry. 194 * 195 * NOTES: 196 * o from may _not_ be u64 aligned 197 * o nbytes may span a QW boundary 198 */ 199static inline void read_extra_bytes(struct pio_buf *pbuf, 200 const void *from, unsigned int nbytes) 201{ 202 jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes); 203 pbuf->carry_bytes += nbytes; 204} 205 206/* 207 * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. 208 * Put the unused part of the next 8 bytes of src into the LSB bytes of 209 * pbuf->carry with the upper bytes zeroed.. 210 * 211 * NOTES: 212 * o result must keep unused bytes zeroed 213 * o src must be u64 aligned 214 */ 215static inline void merge_write8( 216 struct pio_buf *pbuf, 217 void __iomem *dest, 218 const void *src) 219{ 220 u64 new, temp; 221 222 new = *(u64 *)src; 223 temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); 224 writeq(temp, dest); 225 pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); 226} 227 228/* 229 * Write a quad word using all bytes of carry. 230 */ 231static inline void carry8_write8(union mix carry, void __iomem *dest) 232{ 233 writeq(carry.val64, dest); 234} 235 236/* 237 * Write a quad word using all the valid bytes of carry. If carry 238 * has zero valid bytes, nothing is written. 239 * Returns 0 on nothing written, non-zero on quad word written. 240 */ 241static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) 242{ 243 if (pbuf->carry_bytes) { 244 /* unused bytes are always kept zeroed, so just write */ 245 writeq(pbuf->carry.val64, dest); 246 return 1; 247 } 248 249 return 0; 250} 251 252/* 253 * Segmented PIO Copy - start 254 * 255 * Start a PIO copy. 256 * 257 * @pbuf: destination buffer 258 * @pbc: the PBC for the PIO buffer 259 * @from: data source, QWORD aligned 260 * @nbytes: bytes to copy 261 */ 262void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, 263 const void *from, size_t nbytes) 264{ 265 void __iomem *dest = pbuf->start + SOP_DISTANCE; 266 void __iomem *send = dest + PIO_BLOCK_SIZE; 267 void __iomem *dend; /* 8-byte data end */ 268 269 writeq(pbc, dest); 270 dest += sizeof(u64); 271 272 /* calculate where the QWORD data ends - in SOP=1 space */ 273 dend = dest + ((nbytes >> 3) * sizeof(u64)); 274 275 if (dend < send) { 276 /* 277 * all QWORD data is within the SOP block, does *not* 278 * reach the end of the SOP block 279 */ 280 281 while (dest < dend) { 282 writeq(*(u64 *)from, dest); 283 from += sizeof(u64); 284 dest += sizeof(u64); 285 } 286 /* 287 * No boundary checks are needed here: 288 * 0. We're not on the SOP block boundary 289 * 1. The possible DWORD dangle will still be within 290 * the SOP block 291 * 2. We cannot wrap except on a block boundary. 292 */ 293 } else { 294 /* QWORD data extends _to_ or beyond the SOP block */ 295 296 /* write 8-byte SOP chunk data */ 297 while (dest < send) { 298 writeq(*(u64 *)from, dest); 299 from += sizeof(u64); 300 dest += sizeof(u64); 301 } 302 /* drop out of the SOP range */ 303 dest -= SOP_DISTANCE; 304 dend -= SOP_DISTANCE; 305 306 /* 307 * If the wrap comes before or matches the data end, 308 * copy until until the wrap, then wrap. 309 * 310 * If the data ends at the end of the SOP above and 311 * the buffer wraps, then pbuf->end == dend == dest 312 * and nothing will get written, but we will wrap in 313 * case there is a dangling DWORD. 314 */ 315 if (pbuf->end <= dend) { 316 while (dest < pbuf->end) { 317 writeq(*(u64 *)from, dest); 318 from += sizeof(u64); 319 dest += sizeof(u64); 320 } 321 322 dest -= pbuf->sc->size; 323 dend -= pbuf->sc->size; 324 } 325 326 /* write 8-byte non-SOP, non-wrap chunk data */ 327 while (dest < dend) { 328 writeq(*(u64 *)from, dest); 329 from += sizeof(u64); 330 dest += sizeof(u64); 331 } 332 } 333 /* at this point we have wrapped if we are going to wrap */ 334 335 /* ...but it doesn't matter as we're done writing */ 336 337 /* save dangling bytes, if any */ 338 read_low_bytes(pbuf, from, nbytes & 0x7); 339 340 pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3); 341} 342 343/* 344 * Mid copy helper, "mixed case" - source is 64-bit aligned but carry 345 * bytes are non-zero. 346 * 347 * Whole u64s must be written to the chip, so bytes must be manually merged. 348 * 349 * @pbuf: destination buffer 350 * @from: data source, is QWORD aligned. 351 * @nbytes: bytes to copy 352 * 353 * Must handle nbytes < 8. 354 */ 355static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) 356{ 357 void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 358 void __iomem *dend; /* 8-byte data end */ 359 unsigned long qw_to_write = nbytes >> 3; 360 unsigned long bytes_left = nbytes & 0x7; 361 362 /* calculate 8-byte data end */ 363 dend = dest + (qw_to_write * sizeof(u64)); 364 365 if (pbuf->qw_written < PIO_BLOCK_QWS) { 366 /* 367 * Still within SOP block. We don't need to check for 368 * wrap because we are still in the first block and 369 * can only wrap on block boundaries. 370 */ 371 void __iomem *send; /* SOP end */ 372 void __iomem *xend; 373 374 /* 375 * calculate the end of data or end of block, whichever 376 * comes first 377 */ 378 send = pbuf->start + PIO_BLOCK_SIZE; 379 xend = min(send, dend); 380 381 /* shift up to SOP=1 space */ 382 dest += SOP_DISTANCE; 383 xend += SOP_DISTANCE; 384 385 /* write 8-byte chunk data */ 386 while (dest < xend) { 387 merge_write8(pbuf, dest, from); 388 from += sizeof(u64); 389 dest += sizeof(u64); 390 } 391 392 /* shift down to SOP=0 space */ 393 dest -= SOP_DISTANCE; 394 } 395 /* 396 * At this point dest could be (either, both, or neither): 397 * - at dend 398 * - at the wrap 399 */ 400 401 /* 402 * If the wrap comes before or matches the data end, 403 * copy until until the wrap, then wrap. 404 * 405 * If dest is at the wrap, we will fall into the if, 406 * not do the loop, when wrap. 407 * 408 * If the data ends at the end of the SOP above and 409 * the buffer wraps, then pbuf->end == dend == dest 410 * and nothing will get written. 411 */ 412 if (pbuf->end <= dend) { 413 while (dest < pbuf->end) { 414 merge_write8(pbuf, dest, from); 415 from += sizeof(u64); 416 dest += sizeof(u64); 417 } 418 419 dest -= pbuf->sc->size; 420 dend -= pbuf->sc->size; 421 } 422 423 /* write 8-byte non-SOP, non-wrap chunk data */ 424 while (dest < dend) { 425 merge_write8(pbuf, dest, from); 426 from += sizeof(u64); 427 dest += sizeof(u64); 428 } 429 430 pbuf->qw_written += qw_to_write; 431 432 /* handle carry and left-over bytes */ 433 if (pbuf->carry_bytes + bytes_left >= 8) { 434 unsigned long nread; 435 436 /* there is enough to fill another qw - fill carry */ 437 nread = 8 - pbuf->carry_bytes; 438 read_extra_bytes(pbuf, from, nread); 439 440 /* 441 * One more write - but need to make sure dest is correct. 442 * Check for wrap and the possibility the write 443 * should be in SOP space. 444 * 445 * The two checks immediately below cannot both be true, hence 446 * the else. If we have wrapped, we cannot still be within the 447 * first block. Conversely, if we are still in the first block, 448 * we cannot have wrapped. We do the wrap check first as that 449 * is more likely. 450 */ 451 /* adjust if we have wrapped */ 452 if (dest >= pbuf->end) 453 dest -= pbuf->sc->size; 454 /* jump to the SOP range if within the first block */ 455 else if (pbuf->qw_written < PIO_BLOCK_QWS) 456 dest += SOP_DISTANCE; 457 458 /* flush out full carry */ 459 carry8_write8(pbuf->carry, dest); 460 pbuf->qw_written++; 461 462 /* now adjust and read the rest of the bytes into carry */ 463 bytes_left -= nread; 464 from += nread; /* from is now not aligned */ 465 read_low_bytes(pbuf, from, bytes_left); 466 } else { 467 /* not enough to fill another qw, append the rest to carry */ 468 read_extra_bytes(pbuf, from, bytes_left); 469 } 470} 471 472/* 473 * Mid copy helper, "straight case" - source pointer is 64-bit aligned 474 * with no carry bytes. 475 * 476 * @pbuf: destination buffer 477 * @from: data source, is QWORD aligned 478 * @nbytes: bytes to copy 479 * 480 * Must handle nbytes < 8. 481 */ 482static void mid_copy_straight(struct pio_buf *pbuf, 483 const void *from, size_t nbytes) 484{ 485 void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 486 void __iomem *dend; /* 8-byte data end */ 487 488 /* calculate 8-byte data end */ 489 dend = dest + ((nbytes >> 3) * sizeof(u64)); 490 491 if (pbuf->qw_written < PIO_BLOCK_QWS) { 492 /* 493 * Still within SOP block. We don't need to check for 494 * wrap because we are still in the first block and 495 * can only wrap on block boundaries. 496 */ 497 void __iomem *send; /* SOP end */ 498 void __iomem *xend; 499 500 /* 501 * calculate the end of data or end of block, whichever 502 * comes first 503 */ 504 send = pbuf->start + PIO_BLOCK_SIZE; 505 xend = min(send, dend); 506 507 /* shift up to SOP=1 space */ 508 dest += SOP_DISTANCE; 509 xend += SOP_DISTANCE; 510 511 /* write 8-byte chunk data */ 512 while (dest < xend) { 513 writeq(*(u64 *)from, dest); 514 from += sizeof(u64); 515 dest += sizeof(u64); 516 } 517 518 /* shift down to SOP=0 space */ 519 dest -= SOP_DISTANCE; 520 } 521 /* 522 * At this point dest could be (either, both, or neither): 523 * - at dend 524 * - at the wrap 525 */ 526 527 /* 528 * If the wrap comes before or matches the data end, 529 * copy until until the wrap, then wrap. 530 * 531 * If dest is at the wrap, we will fall into the if, 532 * not do the loop, when wrap. 533 * 534 * If the data ends at the end of the SOP above and 535 * the buffer wraps, then pbuf->end == dend == dest 536 * and nothing will get written. 537 */ 538 if (pbuf->end <= dend) { 539 while (dest < pbuf->end) { 540 writeq(*(u64 *)from, dest); 541 from += sizeof(u64); 542 dest += sizeof(u64); 543 } 544 545 dest -= pbuf->sc->size; 546 dend -= pbuf->sc->size; 547 } 548 549 /* write 8-byte non-SOP, non-wrap chunk data */ 550 while (dest < dend) { 551 writeq(*(u64 *)from, dest); 552 from += sizeof(u64); 553 dest += sizeof(u64); 554 } 555 556 /* we know carry_bytes was zero on entry to this routine */ 557 read_low_bytes(pbuf, from, nbytes & 0x7); 558 559 pbuf->qw_written += nbytes >> 3; 560} 561 562/* 563 * Segmented PIO Copy - middle 564 * 565 * Must handle any aligned tail and any aligned source with any byte count. 566 * 567 * @pbuf: a number of blocks allocated within a PIO send context 568 * @from: data source 569 * @nbytes: number of bytes to copy 570 */ 571void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) 572{ 573 unsigned long from_align = (unsigned long)from & 0x7; 574 575 if (pbuf->carry_bytes + nbytes < 8) { 576 /* not enough bytes to fill a QW */ 577 read_extra_bytes(pbuf, from, nbytes); 578 return; 579 } 580 581 if (from_align) { 582 /* misaligned source pointer - align it */ 583 unsigned long to_align; 584 585 /* bytes to read to align "from" */ 586 to_align = 8 - from_align; 587 588 /* 589 * In the advance-to-alignment logic below, we do not need 590 * to check if we are using more than nbytes. This is because 591 * if we are here, we already know that carry+nbytes will 592 * fill at least one QW. 593 */ 594 if (pbuf->carry_bytes + to_align < 8) { 595 /* not enough align bytes to fill a QW */ 596 read_extra_bytes(pbuf, from, to_align); 597 from += to_align; 598 nbytes -= to_align; 599 } else { 600 /* bytes to fill carry */ 601 unsigned long to_fill = 8 - pbuf->carry_bytes; 602 /* bytes left over to be read */ 603 unsigned long extra = to_align - to_fill; 604 void __iomem *dest; 605 606 /* fill carry... */ 607 read_extra_bytes(pbuf, from, to_fill); 608 from += to_fill; 609 nbytes -= to_fill; 610 /* may not be enough valid bytes left to align */ 611 if (extra > nbytes) 612 extra = nbytes; 613 614 /* ...now write carry */ 615 dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 616 617 /* 618 * The two checks immediately below cannot both be 619 * true, hence the else. If we have wrapped, we 620 * cannot still be within the first block. 621 * Conversely, if we are still in the first block, we 622 * cannot have wrapped. We do the wrap check first 623 * as that is more likely. 624 */ 625 /* adjust if we've wrapped */ 626 if (dest >= pbuf->end) 627 dest -= pbuf->sc->size; 628 /* jump to SOP range if within the first block */ 629 else if (pbuf->qw_written < PIO_BLOCK_QWS) 630 dest += SOP_DISTANCE; 631 632 carry8_write8(pbuf->carry, dest); 633 pbuf->qw_written++; 634 635 /* read any extra bytes to do final alignment */ 636 /* this will overwrite anything in pbuf->carry */ 637 read_low_bytes(pbuf, from, extra); 638 from += extra; 639 nbytes -= extra; 640 /* 641 * If no bytes are left, return early - we are done. 642 * NOTE: This short-circuit is *required* because 643 * "extra" may have been reduced in size and "from" 644 * is not aligned, as required when leaving this 645 * if block. 646 */ 647 if (nbytes == 0) 648 return; 649 } 650 651 /* at this point, from is QW aligned */ 652 } 653 654 if (pbuf->carry_bytes) 655 mid_copy_mix(pbuf, from, nbytes); 656 else 657 mid_copy_straight(pbuf, from, nbytes); 658} 659 660/* 661 * Segmented PIO Copy - end 662 * 663 * Write any remainder (in pbuf->carry) and finish writing the whole block. 664 * 665 * @pbuf: a number of blocks allocated within a PIO send context 666 */ 667void seg_pio_copy_end(struct pio_buf *pbuf) 668{ 669 void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 670 671 /* 672 * The two checks immediately below cannot both be true, hence the 673 * else. If we have wrapped, we cannot still be within the first 674 * block. Conversely, if we are still in the first block, we 675 * cannot have wrapped. We do the wrap check first as that is 676 * more likely. 677 */ 678 /* adjust if we have wrapped */ 679 if (dest >= pbuf->end) 680 dest -= pbuf->sc->size; 681 /* jump to the SOP range if within the first block */ 682 else if (pbuf->qw_written < PIO_BLOCK_QWS) 683 dest += SOP_DISTANCE; 684 685 /* write final bytes, if any */ 686 if (carry_write8(pbuf, dest)) { 687 dest += sizeof(u64); 688 /* 689 * NOTE: We do not need to recalculate whether dest needs 690 * SOP_DISTANCE or not. 691 * 692 * If we are in the first block and the dangle write 693 * keeps us in the same block, dest will need 694 * to retain SOP_DISTANCE in the loop below. 695 * 696 * If we are in the first block and the dangle write pushes 697 * us to the next block, then loop below will not run 698 * and dest is not used. Hence we do not need to update 699 * it. 700 * 701 * If we are past the first block, then SOP_DISTANCE 702 * was never added, so there is nothing to do. 703 */ 704 } 705 706 /* fill in rest of block */ 707 while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { 708 writeq(0, dest); 709 dest += sizeof(u64); 710 } 711 712 /* finished with this buffer */ 713 this_cpu_dec(*pbuf->sc->buffers_allocated); 714 preempt_enable(); 715}