virtio_ring.c (66048B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* Virtio ring implementation. 3 * 4 * Copyright 2007 Rusty Russell IBM Corporation 5 */ 6#include <linux/virtio.h> 7#include <linux/virtio_ring.h> 8#include <linux/virtio_config.h> 9#include <linux/device.h> 10#include <linux/slab.h> 11#include <linux/module.h> 12#include <linux/hrtimer.h> 13#include <linux/dma-mapping.h> 14#include <linux/spinlock.h> 15#include <xen/xen.h> 16 17#ifdef DEBUG 18/* For development, we want to crash whenever the ring is screwed. */ 19#define BAD_RING(_vq, fmt, args...) \ 20 do { \ 21 dev_err(&(_vq)->vq.vdev->dev, \ 22 "%s:"fmt, (_vq)->vq.name, ##args); \ 23 BUG(); \ 24 } while (0) 25/* Caller is supposed to guarantee no reentry. */ 26#define START_USE(_vq) \ 27 do { \ 28 if ((_vq)->in_use) \ 29 panic("%s:in_use = %i\n", \ 30 (_vq)->vq.name, (_vq)->in_use); \ 31 (_vq)->in_use = __LINE__; \ 32 } while (0) 33#define END_USE(_vq) \ 34 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 35#define LAST_ADD_TIME_UPDATE(_vq) \ 36 do { \ 37 ktime_t now = ktime_get(); \ 38 \ 39 /* No kick or get, with .1 second between? Warn. */ \ 40 if ((_vq)->last_add_time_valid) \ 41 WARN_ON(ktime_to_ms(ktime_sub(now, \ 42 (_vq)->last_add_time)) > 100); \ 43 (_vq)->last_add_time = now; \ 44 (_vq)->last_add_time_valid = true; \ 45 } while (0) 46#define LAST_ADD_TIME_CHECK(_vq) \ 47 do { \ 48 if ((_vq)->last_add_time_valid) { \ 49 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 50 (_vq)->last_add_time)) > 100); \ 51 } \ 52 } while (0) 53#define LAST_ADD_TIME_INVALID(_vq) \ 54 ((_vq)->last_add_time_valid = false) 55#else 56#define BAD_RING(_vq, fmt, args...) \ 57 do { \ 58 dev_err(&_vq->vq.vdev->dev, \ 59 "%s:"fmt, (_vq)->vq.name, ##args); \ 60 (_vq)->broken = true; \ 61 } while (0) 62#define START_USE(vq) 63#define END_USE(vq) 64#define LAST_ADD_TIME_UPDATE(vq) 65#define LAST_ADD_TIME_CHECK(vq) 66#define LAST_ADD_TIME_INVALID(vq) 67#endif 68 69struct vring_desc_state_split { 70 void *data; /* Data for callback. */ 71 struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72}; 73 74struct vring_desc_state_packed { 75 void *data; /* Data for callback. */ 76 struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 77 u16 num; /* Descriptor list length. */ 78 u16 last; /* The last desc state in a list. */ 79}; 80 81struct vring_desc_extra { 82 dma_addr_t addr; /* Descriptor DMA addr. */ 83 u32 len; /* Descriptor length. */ 84 u16 flags; /* Descriptor flags. */ 85 u16 next; /* The next desc state in a list. */ 86}; 87 88struct vring_virtqueue { 89 struct virtqueue vq; 90 91 /* Is this a packed ring? */ 92 bool packed_ring; 93 94 /* Is DMA API used? */ 95 bool use_dma_api; 96 97 /* Can we use weak barriers? */ 98 bool weak_barriers; 99 100 /* Other side has made a mess, don't try any more. */ 101 bool broken; 102 103 /* Host supports indirect buffers */ 104 bool indirect; 105 106 /* Host publishes avail event idx */ 107 bool event; 108 109 /* Head of free buffer list. */ 110 unsigned int free_head; 111 /* Number we've added since last sync. */ 112 unsigned int num_added; 113 114 /* Last used index we've seen. 115 * for split ring, it just contains last used index 116 * for packed ring: 117 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 118 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 119 */ 120 u16 last_used_idx; 121 122 /* Hint for event idx: already triggered no need to disable. */ 123 bool event_triggered; 124 125 union { 126 /* Available for split ring */ 127 struct { 128 /* Actual memory layout for this queue. */ 129 struct vring vring; 130 131 /* Last written value to avail->flags */ 132 u16 avail_flags_shadow; 133 134 /* 135 * Last written value to avail->idx in 136 * guest byte order. 137 */ 138 u16 avail_idx_shadow; 139 140 /* Per-descriptor state. */ 141 struct vring_desc_state_split *desc_state; 142 struct vring_desc_extra *desc_extra; 143 144 /* DMA address and size information */ 145 dma_addr_t queue_dma_addr; 146 size_t queue_size_in_bytes; 147 } split; 148 149 /* Available for packed ring */ 150 struct { 151 /* Actual memory layout for this queue. */ 152 struct { 153 unsigned int num; 154 struct vring_packed_desc *desc; 155 struct vring_packed_desc_event *driver; 156 struct vring_packed_desc_event *device; 157 } vring; 158 159 /* Driver ring wrap counter. */ 160 bool avail_wrap_counter; 161 162 /* Avail used flags. */ 163 u16 avail_used_flags; 164 165 /* Index of the next avail descriptor. */ 166 u16 next_avail_idx; 167 168 /* 169 * Last written value to driver->flags in 170 * guest byte order. 171 */ 172 u16 event_flags_shadow; 173 174 /* Per-descriptor state. */ 175 struct vring_desc_state_packed *desc_state; 176 struct vring_desc_extra *desc_extra; 177 178 /* DMA address and size information */ 179 dma_addr_t ring_dma_addr; 180 dma_addr_t driver_event_dma_addr; 181 dma_addr_t device_event_dma_addr; 182 size_t ring_size_in_bytes; 183 size_t event_size_in_bytes; 184 } packed; 185 }; 186 187 /* How to notify other side. FIXME: commonalize hcalls! */ 188 bool (*notify)(struct virtqueue *vq); 189 190 /* DMA, allocation, and size information */ 191 bool we_own_ring; 192 193#ifdef DEBUG 194 /* They're supposed to lock for us. */ 195 unsigned int in_use; 196 197 /* Figure out if their kicks are too delayed. */ 198 bool last_add_time_valid; 199 ktime_t last_add_time; 200#endif 201}; 202 203 204/* 205 * Helpers. 206 */ 207 208#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 209 210static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, 211 unsigned int total_sg) 212{ 213 /* 214 * If the host supports indirect descriptor tables, and we have multiple 215 * buffers, then go indirect. FIXME: tune this threshold 216 */ 217 return (vq->indirect && total_sg > 1 && vq->vq.num_free); 218} 219 220/* 221 * Modern virtio devices have feature bits to specify whether they need a 222 * quirk and bypass the IOMMU. If not there, just use the DMA API. 223 * 224 * If there, the interaction between virtio and DMA API is messy. 225 * 226 * On most systems with virtio, physical addresses match bus addresses, 227 * and it doesn't particularly matter whether we use the DMA API. 228 * 229 * On some systems, including Xen and any system with a physical device 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API 231 * for virtio DMA to work at all. 232 * 233 * On other systems, including SPARC and PPC64, virtio-pci devices are 234 * enumerated as though they are behind an IOMMU, but the virtio host 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't 236 * there or somehow map everything as the identity. 237 * 238 * For the time being, we preserve historic behavior and bypass the DMA 239 * API. 240 * 241 * TODO: install a per-device DMA ops structure that does the right thing 242 * taking into account all the above quirks, and use the DMA API 243 * unconditionally on data path. 244 */ 245 246static bool vring_use_dma_api(struct virtio_device *vdev) 247{ 248 if (!virtio_has_dma_quirk(vdev)) 249 return true; 250 251 /* Otherwise, we are left to guess. */ 252 /* 253 * In theory, it's possible to have a buggy QEMU-supposed 254 * emulated Q35 IOMMU and Xen enabled at the same time. On 255 * such a configuration, virtio has never worked and will 256 * not work without an even larger kludge. Instead, enable 257 * the DMA API if we're a Xen guest, which at least allows 258 * all of the sensible Xen configurations to work correctly. 259 */ 260 if (xen_domain()) 261 return true; 262 263 return false; 264} 265 266size_t virtio_max_dma_size(struct virtio_device *vdev) 267{ 268 size_t max_segment_size = SIZE_MAX; 269 270 if (vring_use_dma_api(vdev)) 271 max_segment_size = dma_max_mapping_size(vdev->dev.parent); 272 273 return max_segment_size; 274} 275EXPORT_SYMBOL_GPL(virtio_max_dma_size); 276 277static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 278 dma_addr_t *dma_handle, gfp_t flag) 279{ 280 if (vring_use_dma_api(vdev)) { 281 return dma_alloc_coherent(vdev->dev.parent, size, 282 dma_handle, flag); 283 } else { 284 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 285 286 if (queue) { 287 phys_addr_t phys_addr = virt_to_phys(queue); 288 *dma_handle = (dma_addr_t)phys_addr; 289 290 /* 291 * Sanity check: make sure we dind't truncate 292 * the address. The only arches I can find that 293 * have 64-bit phys_addr_t but 32-bit dma_addr_t 294 * are certain non-highmem MIPS and x86 295 * configurations, but these configurations 296 * should never allocate physical pages above 32 297 * bits, so this is fine. Just in case, throw a 298 * warning and abort if we end up with an 299 * unrepresentable address. 300 */ 301 if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 302 free_pages_exact(queue, PAGE_ALIGN(size)); 303 return NULL; 304 } 305 } 306 return queue; 307 } 308} 309 310static void vring_free_queue(struct virtio_device *vdev, size_t size, 311 void *queue, dma_addr_t dma_handle) 312{ 313 if (vring_use_dma_api(vdev)) 314 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 315 else 316 free_pages_exact(queue, PAGE_ALIGN(size)); 317} 318 319/* 320 * The DMA ops on various arches are rather gnarly right now, and 321 * making all of the arch DMA ops work on the vring device itself 322 * is a mess. For now, we use the parent device for DMA ops. 323 */ 324static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 325{ 326 return vq->vq.vdev->dev.parent; 327} 328 329/* Map one sg entry. */ 330static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 331 struct scatterlist *sg, 332 enum dma_data_direction direction) 333{ 334 if (!vq->use_dma_api) 335 return (dma_addr_t)sg_phys(sg); 336 337 /* 338 * We can't use dma_map_sg, because we don't use scatterlists in 339 * the way it expects (we don't guarantee that the scatterlist 340 * will exist for the lifetime of the mapping). 341 */ 342 return dma_map_page(vring_dma_dev(vq), 343 sg_page(sg), sg->offset, sg->length, 344 direction); 345} 346 347static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 348 void *cpu_addr, size_t size, 349 enum dma_data_direction direction) 350{ 351 if (!vq->use_dma_api) 352 return (dma_addr_t)virt_to_phys(cpu_addr); 353 354 return dma_map_single(vring_dma_dev(vq), 355 cpu_addr, size, direction); 356} 357 358static int vring_mapping_error(const struct vring_virtqueue *vq, 359 dma_addr_t addr) 360{ 361 if (!vq->use_dma_api) 362 return 0; 363 364 return dma_mapping_error(vring_dma_dev(vq), addr); 365} 366 367 368/* 369 * Split ring specific functions - *_split(). 370 */ 371 372static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 373 struct vring_desc *desc) 374{ 375 u16 flags; 376 377 if (!vq->use_dma_api) 378 return; 379 380 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 381 382 dma_unmap_page(vring_dma_dev(vq), 383 virtio64_to_cpu(vq->vq.vdev, desc->addr), 384 virtio32_to_cpu(vq->vq.vdev, desc->len), 385 (flags & VRING_DESC_F_WRITE) ? 386 DMA_FROM_DEVICE : DMA_TO_DEVICE); 387} 388 389static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 390 unsigned int i) 391{ 392 struct vring_desc_extra *extra = vq->split.desc_extra; 393 u16 flags; 394 395 if (!vq->use_dma_api) 396 goto out; 397 398 flags = extra[i].flags; 399 400 if (flags & VRING_DESC_F_INDIRECT) { 401 dma_unmap_single(vring_dma_dev(vq), 402 extra[i].addr, 403 extra[i].len, 404 (flags & VRING_DESC_F_WRITE) ? 405 DMA_FROM_DEVICE : DMA_TO_DEVICE); 406 } else { 407 dma_unmap_page(vring_dma_dev(vq), 408 extra[i].addr, 409 extra[i].len, 410 (flags & VRING_DESC_F_WRITE) ? 411 DMA_FROM_DEVICE : DMA_TO_DEVICE); 412 } 413 414out: 415 return extra[i].next; 416} 417 418static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 419 unsigned int total_sg, 420 gfp_t gfp) 421{ 422 struct vring_desc *desc; 423 unsigned int i; 424 425 /* 426 * We require lowmem mappings for the descriptors because 427 * otherwise virt_to_phys will give us bogus addresses in the 428 * virtqueue. 429 */ 430 gfp &= ~__GFP_HIGHMEM; 431 432 desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 433 if (!desc) 434 return NULL; 435 436 for (i = 0; i < total_sg; i++) 437 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 438 return desc; 439} 440 441static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 442 struct vring_desc *desc, 443 unsigned int i, 444 dma_addr_t addr, 445 unsigned int len, 446 u16 flags, 447 bool indirect) 448{ 449 struct vring_virtqueue *vring = to_vvq(vq); 450 struct vring_desc_extra *extra = vring->split.desc_extra; 451 u16 next; 452 453 desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 454 desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 455 desc[i].len = cpu_to_virtio32(vq->vdev, len); 456 457 if (!indirect) { 458 next = extra[i].next; 459 desc[i].next = cpu_to_virtio16(vq->vdev, next); 460 461 extra[i].addr = addr; 462 extra[i].len = len; 463 extra[i].flags = flags; 464 } else 465 next = virtio16_to_cpu(vq->vdev, desc[i].next); 466 467 return next; 468} 469 470static inline int virtqueue_add_split(struct virtqueue *_vq, 471 struct scatterlist *sgs[], 472 unsigned int total_sg, 473 unsigned int out_sgs, 474 unsigned int in_sgs, 475 void *data, 476 void *ctx, 477 gfp_t gfp) 478{ 479 struct vring_virtqueue *vq = to_vvq(_vq); 480 struct scatterlist *sg; 481 struct vring_desc *desc; 482 unsigned int i, n, avail, descs_used, prev, err_idx; 483 int head; 484 bool indirect; 485 486 START_USE(vq); 487 488 BUG_ON(data == NULL); 489 BUG_ON(ctx && vq->indirect); 490 491 if (unlikely(vq->broken)) { 492 END_USE(vq); 493 return -EIO; 494 } 495 496 LAST_ADD_TIME_UPDATE(vq); 497 498 BUG_ON(total_sg == 0); 499 500 head = vq->free_head; 501 502 if (virtqueue_use_indirect(vq, total_sg)) 503 desc = alloc_indirect_split(_vq, total_sg, gfp); 504 else { 505 desc = NULL; 506 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 507 } 508 509 if (desc) { 510 /* Use a single buffer which doesn't continue */ 511 indirect = true; 512 /* Set up rest to use this indirect table. */ 513 i = 0; 514 descs_used = 1; 515 } else { 516 indirect = false; 517 desc = vq->split.vring.desc; 518 i = head; 519 descs_used = total_sg; 520 } 521 522 if (unlikely(vq->vq.num_free < descs_used)) { 523 pr_debug("Can't add buf len %i - avail = %i\n", 524 descs_used, vq->vq.num_free); 525 /* FIXME: for historical reasons, we force a notify here if 526 * there are outgoing parts to the buffer. Presumably the 527 * host should service the ring ASAP. */ 528 if (out_sgs) 529 vq->notify(&vq->vq); 530 if (indirect) 531 kfree(desc); 532 END_USE(vq); 533 return -ENOSPC; 534 } 535 536 for (n = 0; n < out_sgs; n++) { 537 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 538 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 539 if (vring_mapping_error(vq, addr)) 540 goto unmap_release; 541 542 prev = i; 543 /* Note that we trust indirect descriptor 544 * table since it use stream DMA mapping. 545 */ 546 i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 547 VRING_DESC_F_NEXT, 548 indirect); 549 } 550 } 551 for (; n < (out_sgs + in_sgs); n++) { 552 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 553 dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 554 if (vring_mapping_error(vq, addr)) 555 goto unmap_release; 556 557 prev = i; 558 /* Note that we trust indirect descriptor 559 * table since it use stream DMA mapping. 560 */ 561 i = virtqueue_add_desc_split(_vq, desc, i, addr, 562 sg->length, 563 VRING_DESC_F_NEXT | 564 VRING_DESC_F_WRITE, 565 indirect); 566 } 567 } 568 /* Last one doesn't continue. */ 569 desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 570 if (!indirect && vq->use_dma_api) 571 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 572 ~VRING_DESC_F_NEXT; 573 574 if (indirect) { 575 /* Now that the indirect table is filled in, map it. */ 576 dma_addr_t addr = vring_map_single( 577 vq, desc, total_sg * sizeof(struct vring_desc), 578 DMA_TO_DEVICE); 579 if (vring_mapping_error(vq, addr)) 580 goto unmap_release; 581 582 virtqueue_add_desc_split(_vq, vq->split.vring.desc, 583 head, addr, 584 total_sg * sizeof(struct vring_desc), 585 VRING_DESC_F_INDIRECT, 586 false); 587 } 588 589 /* We're using some buffers from the free list. */ 590 vq->vq.num_free -= descs_used; 591 592 /* Update free pointer */ 593 if (indirect) 594 vq->free_head = vq->split.desc_extra[head].next; 595 else 596 vq->free_head = i; 597 598 /* Store token and indirect buffer state. */ 599 vq->split.desc_state[head].data = data; 600 if (indirect) 601 vq->split.desc_state[head].indir_desc = desc; 602 else 603 vq->split.desc_state[head].indir_desc = ctx; 604 605 /* Put entry in available array (but don't update avail->idx until they 606 * do sync). */ 607 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 608 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 609 610 /* Descriptors and available array need to be set before we expose the 611 * new available array entries. */ 612 virtio_wmb(vq->weak_barriers); 613 vq->split.avail_idx_shadow++; 614 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 615 vq->split.avail_idx_shadow); 616 vq->num_added++; 617 618 pr_debug("Added buffer head %i to %p\n", head, vq); 619 END_USE(vq); 620 621 /* This is very unlikely, but theoretically possible. Kick 622 * just in case. */ 623 if (unlikely(vq->num_added == (1 << 16) - 1)) 624 virtqueue_kick(_vq); 625 626 return 0; 627 628unmap_release: 629 err_idx = i; 630 631 if (indirect) 632 i = 0; 633 else 634 i = head; 635 636 for (n = 0; n < total_sg; n++) { 637 if (i == err_idx) 638 break; 639 if (indirect) { 640 vring_unmap_one_split_indirect(vq, &desc[i]); 641 i = virtio16_to_cpu(_vq->vdev, desc[i].next); 642 } else 643 i = vring_unmap_one_split(vq, i); 644 } 645 646 if (indirect) 647 kfree(desc); 648 649 END_USE(vq); 650 return -ENOMEM; 651} 652 653static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 654{ 655 struct vring_virtqueue *vq = to_vvq(_vq); 656 u16 new, old; 657 bool needs_kick; 658 659 START_USE(vq); 660 /* We need to expose available array entries before checking avail 661 * event. */ 662 virtio_mb(vq->weak_barriers); 663 664 old = vq->split.avail_idx_shadow - vq->num_added; 665 new = vq->split.avail_idx_shadow; 666 vq->num_added = 0; 667 668 LAST_ADD_TIME_CHECK(vq); 669 LAST_ADD_TIME_INVALID(vq); 670 671 if (vq->event) { 672 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 673 vring_avail_event(&vq->split.vring)), 674 new, old); 675 } else { 676 needs_kick = !(vq->split.vring.used->flags & 677 cpu_to_virtio16(_vq->vdev, 678 VRING_USED_F_NO_NOTIFY)); 679 } 680 END_USE(vq); 681 return needs_kick; 682} 683 684static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 685 void **ctx) 686{ 687 unsigned int i, j; 688 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 689 690 /* Clear data ptr. */ 691 vq->split.desc_state[head].data = NULL; 692 693 /* Put back on free list: unmap first-level descriptors and find end */ 694 i = head; 695 696 while (vq->split.vring.desc[i].flags & nextflag) { 697 vring_unmap_one_split(vq, i); 698 i = vq->split.desc_extra[i].next; 699 vq->vq.num_free++; 700 } 701 702 vring_unmap_one_split(vq, i); 703 vq->split.desc_extra[i].next = vq->free_head; 704 vq->free_head = head; 705 706 /* Plus final descriptor */ 707 vq->vq.num_free++; 708 709 if (vq->indirect) { 710 struct vring_desc *indir_desc = 711 vq->split.desc_state[head].indir_desc; 712 u32 len; 713 714 /* Free the indirect table, if any, now that it's unmapped. */ 715 if (!indir_desc) 716 return; 717 718 len = vq->split.desc_extra[head].len; 719 720 BUG_ON(!(vq->split.desc_extra[head].flags & 721 VRING_DESC_F_INDIRECT)); 722 BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 723 724 for (j = 0; j < len / sizeof(struct vring_desc); j++) 725 vring_unmap_one_split_indirect(vq, &indir_desc[j]); 726 727 kfree(indir_desc); 728 vq->split.desc_state[head].indir_desc = NULL; 729 } else if (ctx) { 730 *ctx = vq->split.desc_state[head].indir_desc; 731 } 732} 733 734static inline bool more_used_split(const struct vring_virtqueue *vq) 735{ 736 return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 737 vq->split.vring.used->idx); 738} 739 740static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 741 unsigned int *len, 742 void **ctx) 743{ 744 struct vring_virtqueue *vq = to_vvq(_vq); 745 void *ret; 746 unsigned int i; 747 u16 last_used; 748 749 START_USE(vq); 750 751 if (unlikely(vq->broken)) { 752 END_USE(vq); 753 return NULL; 754 } 755 756 if (!more_used_split(vq)) { 757 pr_debug("No more buffers in queue\n"); 758 END_USE(vq); 759 return NULL; 760 } 761 762 /* Only get used array entries after they have been exposed by host. */ 763 virtio_rmb(vq->weak_barriers); 764 765 last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 766 i = virtio32_to_cpu(_vq->vdev, 767 vq->split.vring.used->ring[last_used].id); 768 *len = virtio32_to_cpu(_vq->vdev, 769 vq->split.vring.used->ring[last_used].len); 770 771 if (unlikely(i >= vq->split.vring.num)) { 772 BAD_RING(vq, "id %u out of range\n", i); 773 return NULL; 774 } 775 if (unlikely(!vq->split.desc_state[i].data)) { 776 BAD_RING(vq, "id %u is not a head!\n", i); 777 return NULL; 778 } 779 780 /* detach_buf_split clears data, so grab it now. */ 781 ret = vq->split.desc_state[i].data; 782 detach_buf_split(vq, i, ctx); 783 vq->last_used_idx++; 784 /* If we expect an interrupt for the next entry, tell host 785 * by writing event index and flush out the write before 786 * the read in the next get_buf call. */ 787 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 788 virtio_store_mb(vq->weak_barriers, 789 &vring_used_event(&vq->split.vring), 790 cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 791 792 LAST_ADD_TIME_INVALID(vq); 793 794 END_USE(vq); 795 return ret; 796} 797 798static void virtqueue_disable_cb_split(struct virtqueue *_vq) 799{ 800 struct vring_virtqueue *vq = to_vvq(_vq); 801 802 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 803 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 804 if (vq->event) 805 /* TODO: this is a hack. Figure out a cleaner value to write. */ 806 vring_used_event(&vq->split.vring) = 0x0; 807 else 808 vq->split.vring.avail->flags = 809 cpu_to_virtio16(_vq->vdev, 810 vq->split.avail_flags_shadow); 811 } 812} 813 814static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 815{ 816 struct vring_virtqueue *vq = to_vvq(_vq); 817 u16 last_used_idx; 818 819 START_USE(vq); 820 821 /* We optimistically turn back on interrupts, then check if there was 822 * more to do. */ 823 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 824 * either clear the flags bit or point the event index at the next 825 * entry. Always do both to keep code simple. */ 826 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 827 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 828 if (!vq->event) 829 vq->split.vring.avail->flags = 830 cpu_to_virtio16(_vq->vdev, 831 vq->split.avail_flags_shadow); 832 } 833 vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 834 last_used_idx = vq->last_used_idx); 835 END_USE(vq); 836 return last_used_idx; 837} 838 839static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 840{ 841 struct vring_virtqueue *vq = to_vvq(_vq); 842 843 return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 844 vq->split.vring.used->idx); 845} 846 847static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 848{ 849 struct vring_virtqueue *vq = to_vvq(_vq); 850 u16 bufs; 851 852 START_USE(vq); 853 854 /* We optimistically turn back on interrupts, then check if there was 855 * more to do. */ 856 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 857 * either clear the flags bit or point the event index at the next 858 * entry. Always update the event index to keep code simple. */ 859 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 860 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 861 if (!vq->event) 862 vq->split.vring.avail->flags = 863 cpu_to_virtio16(_vq->vdev, 864 vq->split.avail_flags_shadow); 865 } 866 /* TODO: tune this threshold */ 867 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 868 869 virtio_store_mb(vq->weak_barriers, 870 &vring_used_event(&vq->split.vring), 871 cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 872 873 if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 874 - vq->last_used_idx) > bufs)) { 875 END_USE(vq); 876 return false; 877 } 878 879 END_USE(vq); 880 return true; 881} 882 883static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 884{ 885 struct vring_virtqueue *vq = to_vvq(_vq); 886 unsigned int i; 887 void *buf; 888 889 START_USE(vq); 890 891 for (i = 0; i < vq->split.vring.num; i++) { 892 if (!vq->split.desc_state[i].data) 893 continue; 894 /* detach_buf_split clears data, so grab it now. */ 895 buf = vq->split.desc_state[i].data; 896 detach_buf_split(vq, i, NULL); 897 vq->split.avail_idx_shadow--; 898 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 899 vq->split.avail_idx_shadow); 900 END_USE(vq); 901 return buf; 902 } 903 /* That should have freed everything. */ 904 BUG_ON(vq->vq.num_free != vq->split.vring.num); 905 906 END_USE(vq); 907 return NULL; 908} 909 910static struct virtqueue *vring_create_virtqueue_split( 911 unsigned int index, 912 unsigned int num, 913 unsigned int vring_align, 914 struct virtio_device *vdev, 915 bool weak_barriers, 916 bool may_reduce_num, 917 bool context, 918 bool (*notify)(struct virtqueue *), 919 void (*callback)(struct virtqueue *), 920 const char *name) 921{ 922 struct virtqueue *vq; 923 void *queue = NULL; 924 dma_addr_t dma_addr; 925 size_t queue_size_in_bytes; 926 struct vring vring; 927 928 /* We assume num is a power of 2. */ 929 if (num & (num - 1)) { 930 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 931 return NULL; 932 } 933 934 /* TODO: allocate each queue chunk individually */ 935 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 936 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 937 &dma_addr, 938 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 939 if (queue) 940 break; 941 if (!may_reduce_num) 942 return NULL; 943 } 944 945 if (!num) 946 return NULL; 947 948 if (!queue) { 949 /* Try to get a single page. You are my only hope! */ 950 queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 951 &dma_addr, GFP_KERNEL|__GFP_ZERO); 952 } 953 if (!queue) 954 return NULL; 955 956 queue_size_in_bytes = vring_size(num, vring_align); 957 vring_init(&vring, num, queue, vring_align); 958 959 vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 960 notify, callback, name); 961 if (!vq) { 962 vring_free_queue(vdev, queue_size_in_bytes, queue, 963 dma_addr); 964 return NULL; 965 } 966 967 to_vvq(vq)->split.queue_dma_addr = dma_addr; 968 to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 969 to_vvq(vq)->we_own_ring = true; 970 971 return vq; 972} 973 974 975/* 976 * Packed ring specific functions - *_packed(). 977 */ 978static inline bool packed_used_wrap_counter(u16 last_used_idx) 979{ 980 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 981} 982 983static inline u16 packed_last_used(u16 last_used_idx) 984{ 985 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 986} 987 988static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 989 struct vring_desc_extra *extra) 990{ 991 u16 flags; 992 993 if (!vq->use_dma_api) 994 return; 995 996 flags = extra->flags; 997 998 if (flags & VRING_DESC_F_INDIRECT) { 999 dma_unmap_single(vring_dma_dev(vq), 1000 extra->addr, extra->len, 1001 (flags & VRING_DESC_F_WRITE) ? 1002 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1003 } else { 1004 dma_unmap_page(vring_dma_dev(vq), 1005 extra->addr, extra->len, 1006 (flags & VRING_DESC_F_WRITE) ? 1007 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1008 } 1009} 1010 1011static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 1012 struct vring_packed_desc *desc) 1013{ 1014 u16 flags; 1015 1016 if (!vq->use_dma_api) 1017 return; 1018 1019 flags = le16_to_cpu(desc->flags); 1020 1021 dma_unmap_page(vring_dma_dev(vq), 1022 le64_to_cpu(desc->addr), 1023 le32_to_cpu(desc->len), 1024 (flags & VRING_DESC_F_WRITE) ? 1025 DMA_FROM_DEVICE : DMA_TO_DEVICE); 1026} 1027 1028static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 1029 gfp_t gfp) 1030{ 1031 struct vring_packed_desc *desc; 1032 1033 /* 1034 * We require lowmem mappings for the descriptors because 1035 * otherwise virt_to_phys will give us bogus addresses in the 1036 * virtqueue. 1037 */ 1038 gfp &= ~__GFP_HIGHMEM; 1039 1040 desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 1041 1042 return desc; 1043} 1044 1045static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1046 struct scatterlist *sgs[], 1047 unsigned int total_sg, 1048 unsigned int out_sgs, 1049 unsigned int in_sgs, 1050 void *data, 1051 gfp_t gfp) 1052{ 1053 struct vring_packed_desc *desc; 1054 struct scatterlist *sg; 1055 unsigned int i, n, err_idx; 1056 u16 head, id; 1057 dma_addr_t addr; 1058 1059 head = vq->packed.next_avail_idx; 1060 desc = alloc_indirect_packed(total_sg, gfp); 1061 if (!desc) 1062 return -ENOMEM; 1063 1064 if (unlikely(vq->vq.num_free < 1)) { 1065 pr_debug("Can't add buf len 1 - avail = 0\n"); 1066 kfree(desc); 1067 END_USE(vq); 1068 return -ENOSPC; 1069 } 1070 1071 i = 0; 1072 id = vq->free_head; 1073 BUG_ON(id == vq->packed.vring.num); 1074 1075 for (n = 0; n < out_sgs + in_sgs; n++) { 1076 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1077 addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1078 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1079 if (vring_mapping_error(vq, addr)) 1080 goto unmap_release; 1081 1082 desc[i].flags = cpu_to_le16(n < out_sgs ? 1083 0 : VRING_DESC_F_WRITE); 1084 desc[i].addr = cpu_to_le64(addr); 1085 desc[i].len = cpu_to_le32(sg->length); 1086 i++; 1087 } 1088 } 1089 1090 /* Now that the indirect table is filled in, map it. */ 1091 addr = vring_map_single(vq, desc, 1092 total_sg * sizeof(struct vring_packed_desc), 1093 DMA_TO_DEVICE); 1094 if (vring_mapping_error(vq, addr)) 1095 goto unmap_release; 1096 1097 vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 1098 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 1099 sizeof(struct vring_packed_desc)); 1100 vq->packed.vring.desc[head].id = cpu_to_le16(id); 1101 1102 if (vq->use_dma_api) { 1103 vq->packed.desc_extra[id].addr = addr; 1104 vq->packed.desc_extra[id].len = total_sg * 1105 sizeof(struct vring_packed_desc); 1106 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 1107 vq->packed.avail_used_flags; 1108 } 1109 1110 /* 1111 * A driver MUST NOT make the first descriptor in the list 1112 * available before all subsequent descriptors comprising 1113 * the list are made available. 1114 */ 1115 virtio_wmb(vq->weak_barriers); 1116 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 1117 vq->packed.avail_used_flags); 1118 1119 /* We're using some buffers from the free list. */ 1120 vq->vq.num_free -= 1; 1121 1122 /* Update free pointer */ 1123 n = head + 1; 1124 if (n >= vq->packed.vring.num) { 1125 n = 0; 1126 vq->packed.avail_wrap_counter ^= 1; 1127 vq->packed.avail_used_flags ^= 1128 1 << VRING_PACKED_DESC_F_AVAIL | 1129 1 << VRING_PACKED_DESC_F_USED; 1130 } 1131 vq->packed.next_avail_idx = n; 1132 vq->free_head = vq->packed.desc_extra[id].next; 1133 1134 /* Store token and indirect buffer state. */ 1135 vq->packed.desc_state[id].num = 1; 1136 vq->packed.desc_state[id].data = data; 1137 vq->packed.desc_state[id].indir_desc = desc; 1138 vq->packed.desc_state[id].last = id; 1139 1140 vq->num_added += 1; 1141 1142 pr_debug("Added buffer head %i to %p\n", head, vq); 1143 END_USE(vq); 1144 1145 return 0; 1146 1147unmap_release: 1148 err_idx = i; 1149 1150 for (i = 0; i < err_idx; i++) 1151 vring_unmap_desc_packed(vq, &desc[i]); 1152 1153 kfree(desc); 1154 1155 END_USE(vq); 1156 return -ENOMEM; 1157} 1158 1159static inline int virtqueue_add_packed(struct virtqueue *_vq, 1160 struct scatterlist *sgs[], 1161 unsigned int total_sg, 1162 unsigned int out_sgs, 1163 unsigned int in_sgs, 1164 void *data, 1165 void *ctx, 1166 gfp_t gfp) 1167{ 1168 struct vring_virtqueue *vq = to_vvq(_vq); 1169 struct vring_packed_desc *desc; 1170 struct scatterlist *sg; 1171 unsigned int i, n, c, descs_used, err_idx; 1172 __le16 head_flags, flags; 1173 u16 head, id, prev, curr, avail_used_flags; 1174 int err; 1175 1176 START_USE(vq); 1177 1178 BUG_ON(data == NULL); 1179 BUG_ON(ctx && vq->indirect); 1180 1181 if (unlikely(vq->broken)) { 1182 END_USE(vq); 1183 return -EIO; 1184 } 1185 1186 LAST_ADD_TIME_UPDATE(vq); 1187 1188 BUG_ON(total_sg == 0); 1189 1190 if (virtqueue_use_indirect(vq, total_sg)) { 1191 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1192 in_sgs, data, gfp); 1193 if (err != -ENOMEM) { 1194 END_USE(vq); 1195 return err; 1196 } 1197 1198 /* fall back on direct */ 1199 } 1200 1201 head = vq->packed.next_avail_idx; 1202 avail_used_flags = vq->packed.avail_used_flags; 1203 1204 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 1205 1206 desc = vq->packed.vring.desc; 1207 i = head; 1208 descs_used = total_sg; 1209 1210 if (unlikely(vq->vq.num_free < descs_used)) { 1211 pr_debug("Can't add buf len %i - avail = %i\n", 1212 descs_used, vq->vq.num_free); 1213 END_USE(vq); 1214 return -ENOSPC; 1215 } 1216 1217 id = vq->free_head; 1218 BUG_ON(id == vq->packed.vring.num); 1219 1220 curr = id; 1221 c = 0; 1222 for (n = 0; n < out_sgs + in_sgs; n++) { 1223 for (sg = sgs[n]; sg; sg = sg_next(sg)) { 1224 dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 1225 DMA_TO_DEVICE : DMA_FROM_DEVICE); 1226 if (vring_mapping_error(vq, addr)) 1227 goto unmap_release; 1228 1229 flags = cpu_to_le16(vq->packed.avail_used_flags | 1230 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 1231 (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 1232 if (i == head) 1233 head_flags = flags; 1234 else 1235 desc[i].flags = flags; 1236 1237 desc[i].addr = cpu_to_le64(addr); 1238 desc[i].len = cpu_to_le32(sg->length); 1239 desc[i].id = cpu_to_le16(id); 1240 1241 if (unlikely(vq->use_dma_api)) { 1242 vq->packed.desc_extra[curr].addr = addr; 1243 vq->packed.desc_extra[curr].len = sg->length; 1244 vq->packed.desc_extra[curr].flags = 1245 le16_to_cpu(flags); 1246 } 1247 prev = curr; 1248 curr = vq->packed.desc_extra[curr].next; 1249 1250 if ((unlikely(++i >= vq->packed.vring.num))) { 1251 i = 0; 1252 vq->packed.avail_used_flags ^= 1253 1 << VRING_PACKED_DESC_F_AVAIL | 1254 1 << VRING_PACKED_DESC_F_USED; 1255 } 1256 } 1257 } 1258 1259 if (i < head) 1260 vq->packed.avail_wrap_counter ^= 1; 1261 1262 /* We're using some buffers from the free list. */ 1263 vq->vq.num_free -= descs_used; 1264 1265 /* Update free pointer */ 1266 vq->packed.next_avail_idx = i; 1267 vq->free_head = curr; 1268 1269 /* Store token. */ 1270 vq->packed.desc_state[id].num = descs_used; 1271 vq->packed.desc_state[id].data = data; 1272 vq->packed.desc_state[id].indir_desc = ctx; 1273 vq->packed.desc_state[id].last = prev; 1274 1275 /* 1276 * A driver MUST NOT make the first descriptor in the list 1277 * available before all subsequent descriptors comprising 1278 * the list are made available. 1279 */ 1280 virtio_wmb(vq->weak_barriers); 1281 vq->packed.vring.desc[head].flags = head_flags; 1282 vq->num_added += descs_used; 1283 1284 pr_debug("Added buffer head %i to %p\n", head, vq); 1285 END_USE(vq); 1286 1287 return 0; 1288 1289unmap_release: 1290 err_idx = i; 1291 i = head; 1292 curr = vq->free_head; 1293 1294 vq->packed.avail_used_flags = avail_used_flags; 1295 1296 for (n = 0; n < total_sg; n++) { 1297 if (i == err_idx) 1298 break; 1299 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 1300 curr = vq->packed.desc_extra[curr].next; 1301 i++; 1302 if (i >= vq->packed.vring.num) 1303 i = 0; 1304 } 1305 1306 END_USE(vq); 1307 return -EIO; 1308} 1309 1310static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 1311{ 1312 struct vring_virtqueue *vq = to_vvq(_vq); 1313 u16 new, old, off_wrap, flags, wrap_counter, event_idx; 1314 bool needs_kick; 1315 union { 1316 struct { 1317 __le16 off_wrap; 1318 __le16 flags; 1319 }; 1320 u32 u32; 1321 } snapshot; 1322 1323 START_USE(vq); 1324 1325 /* 1326 * We need to expose the new flags value before checking notification 1327 * suppressions. 1328 */ 1329 virtio_mb(vq->weak_barriers); 1330 1331 old = vq->packed.next_avail_idx - vq->num_added; 1332 new = vq->packed.next_avail_idx; 1333 vq->num_added = 0; 1334 1335 snapshot.u32 = *(u32 *)vq->packed.vring.device; 1336 flags = le16_to_cpu(snapshot.flags); 1337 1338 LAST_ADD_TIME_CHECK(vq); 1339 LAST_ADD_TIME_INVALID(vq); 1340 1341 if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 1342 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1343 goto out; 1344 } 1345 1346 off_wrap = le16_to_cpu(snapshot.off_wrap); 1347 1348 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1349 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1350 if (wrap_counter != vq->packed.avail_wrap_counter) 1351 event_idx -= vq->packed.vring.num; 1352 1353 needs_kick = vring_need_event(event_idx, new, old); 1354out: 1355 END_USE(vq); 1356 return needs_kick; 1357} 1358 1359static void detach_buf_packed(struct vring_virtqueue *vq, 1360 unsigned int id, void **ctx) 1361{ 1362 struct vring_desc_state_packed *state = NULL; 1363 struct vring_packed_desc *desc; 1364 unsigned int i, curr; 1365 1366 state = &vq->packed.desc_state[id]; 1367 1368 /* Clear data ptr. */ 1369 state->data = NULL; 1370 1371 vq->packed.desc_extra[state->last].next = vq->free_head; 1372 vq->free_head = id; 1373 vq->vq.num_free += state->num; 1374 1375 if (unlikely(vq->use_dma_api)) { 1376 curr = id; 1377 for (i = 0; i < state->num; i++) { 1378 vring_unmap_extra_packed(vq, 1379 &vq->packed.desc_extra[curr]); 1380 curr = vq->packed.desc_extra[curr].next; 1381 } 1382 } 1383 1384 if (vq->indirect) { 1385 u32 len; 1386 1387 /* Free the indirect table, if any, now that it's unmapped. */ 1388 desc = state->indir_desc; 1389 if (!desc) 1390 return; 1391 1392 if (vq->use_dma_api) { 1393 len = vq->packed.desc_extra[id].len; 1394 for (i = 0; i < len / sizeof(struct vring_packed_desc); 1395 i++) 1396 vring_unmap_desc_packed(vq, &desc[i]); 1397 } 1398 kfree(desc); 1399 state->indir_desc = NULL; 1400 } else if (ctx) { 1401 *ctx = state->indir_desc; 1402 } 1403} 1404 1405static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 1406 u16 idx, bool used_wrap_counter) 1407{ 1408 bool avail, used; 1409 u16 flags; 1410 1411 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 1412 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 1413 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 1414 1415 return avail == used && used == used_wrap_counter; 1416} 1417 1418static inline bool more_used_packed(const struct vring_virtqueue *vq) 1419{ 1420 u16 last_used; 1421 u16 last_used_idx; 1422 bool used_wrap_counter; 1423 1424 last_used_idx = READ_ONCE(vq->last_used_idx); 1425 last_used = packed_last_used(last_used_idx); 1426 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1427 return is_used_desc_packed(vq, last_used, used_wrap_counter); 1428} 1429 1430static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 1431 unsigned int *len, 1432 void **ctx) 1433{ 1434 struct vring_virtqueue *vq = to_vvq(_vq); 1435 u16 last_used, id, last_used_idx; 1436 bool used_wrap_counter; 1437 void *ret; 1438 1439 START_USE(vq); 1440 1441 if (unlikely(vq->broken)) { 1442 END_USE(vq); 1443 return NULL; 1444 } 1445 1446 if (!more_used_packed(vq)) { 1447 pr_debug("No more buffers in queue\n"); 1448 END_USE(vq); 1449 return NULL; 1450 } 1451 1452 /* Only get used elements after they have been exposed by host. */ 1453 virtio_rmb(vq->weak_barriers); 1454 1455 last_used_idx = READ_ONCE(vq->last_used_idx); 1456 used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1457 last_used = packed_last_used(last_used_idx); 1458 id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 1459 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 1460 1461 if (unlikely(id >= vq->packed.vring.num)) { 1462 BAD_RING(vq, "id %u out of range\n", id); 1463 return NULL; 1464 } 1465 if (unlikely(!vq->packed.desc_state[id].data)) { 1466 BAD_RING(vq, "id %u is not a head!\n", id); 1467 return NULL; 1468 } 1469 1470 /* detach_buf_packed clears data, so grab it now. */ 1471 ret = vq->packed.desc_state[id].data; 1472 detach_buf_packed(vq, id, ctx); 1473 1474 last_used += vq->packed.desc_state[id].num; 1475 if (unlikely(last_used >= vq->packed.vring.num)) { 1476 last_used -= vq->packed.vring.num; 1477 used_wrap_counter ^= 1; 1478 } 1479 1480 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1481 WRITE_ONCE(vq->last_used_idx, last_used); 1482 1483 /* 1484 * If we expect an interrupt for the next entry, tell host 1485 * by writing event index and flush out the write before 1486 * the read in the next get_buf call. 1487 */ 1488 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1489 virtio_store_mb(vq->weak_barriers, 1490 &vq->packed.vring.driver->off_wrap, 1491 cpu_to_le16(vq->last_used_idx)); 1492 1493 LAST_ADD_TIME_INVALID(vq); 1494 1495 END_USE(vq); 1496 return ret; 1497} 1498 1499static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 1500{ 1501 struct vring_virtqueue *vq = to_vvq(_vq); 1502 1503 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 1504 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1505 vq->packed.vring.driver->flags = 1506 cpu_to_le16(vq->packed.event_flags_shadow); 1507 } 1508} 1509 1510static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 1511{ 1512 struct vring_virtqueue *vq = to_vvq(_vq); 1513 1514 START_USE(vq); 1515 1516 /* 1517 * We optimistically turn back on interrupts, then check if there was 1518 * more to do. 1519 */ 1520 1521 if (vq->event) { 1522 vq->packed.vring.driver->off_wrap = 1523 cpu_to_le16(vq->last_used_idx); 1524 /* 1525 * We need to update event offset and event wrap 1526 * counter first before updating event flags. 1527 */ 1528 virtio_wmb(vq->weak_barriers); 1529 } 1530 1531 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1532 vq->packed.event_flags_shadow = vq->event ? 1533 VRING_PACKED_EVENT_FLAG_DESC : 1534 VRING_PACKED_EVENT_FLAG_ENABLE; 1535 vq->packed.vring.driver->flags = 1536 cpu_to_le16(vq->packed.event_flags_shadow); 1537 } 1538 1539 END_USE(vq); 1540 return vq->last_used_idx; 1541} 1542 1543static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 1544{ 1545 struct vring_virtqueue *vq = to_vvq(_vq); 1546 bool wrap_counter; 1547 u16 used_idx; 1548 1549 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1550 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1551 1552 return is_used_desc_packed(vq, used_idx, wrap_counter); 1553} 1554 1555static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 1556{ 1557 struct vring_virtqueue *vq = to_vvq(_vq); 1558 u16 used_idx, wrap_counter, last_used_idx; 1559 u16 bufs; 1560 1561 START_USE(vq); 1562 1563 /* 1564 * We optimistically turn back on interrupts, then check if there was 1565 * more to do. 1566 */ 1567 1568 if (vq->event) { 1569 /* TODO: tune this threshold */ 1570 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1571 last_used_idx = READ_ONCE(vq->last_used_idx); 1572 wrap_counter = packed_used_wrap_counter(last_used_idx); 1573 1574 used_idx = packed_last_used(last_used_idx) + bufs; 1575 if (used_idx >= vq->packed.vring.num) { 1576 used_idx -= vq->packed.vring.num; 1577 wrap_counter ^= 1; 1578 } 1579 1580 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1581 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1582 1583 /* 1584 * We need to update event offset and event wrap 1585 * counter first before updating event flags. 1586 */ 1587 virtio_wmb(vq->weak_barriers); 1588 } 1589 1590 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1591 vq->packed.event_flags_shadow = vq->event ? 1592 VRING_PACKED_EVENT_FLAG_DESC : 1593 VRING_PACKED_EVENT_FLAG_ENABLE; 1594 vq->packed.vring.driver->flags = 1595 cpu_to_le16(vq->packed.event_flags_shadow); 1596 } 1597 1598 /* 1599 * We need to update event suppression structure first 1600 * before re-checking for more used buffers. 1601 */ 1602 virtio_mb(vq->weak_barriers); 1603 1604 last_used_idx = READ_ONCE(vq->last_used_idx); 1605 wrap_counter = packed_used_wrap_counter(last_used_idx); 1606 used_idx = packed_last_used(last_used_idx); 1607 if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 1608 END_USE(vq); 1609 return false; 1610 } 1611 1612 END_USE(vq); 1613 return true; 1614} 1615 1616static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 1617{ 1618 struct vring_virtqueue *vq = to_vvq(_vq); 1619 unsigned int i; 1620 void *buf; 1621 1622 START_USE(vq); 1623 1624 for (i = 0; i < vq->packed.vring.num; i++) { 1625 if (!vq->packed.desc_state[i].data) 1626 continue; 1627 /* detach_buf clears data, so grab it now. */ 1628 buf = vq->packed.desc_state[i].data; 1629 detach_buf_packed(vq, i, NULL); 1630 END_USE(vq); 1631 return buf; 1632 } 1633 /* That should have freed everything. */ 1634 BUG_ON(vq->vq.num_free != vq->packed.vring.num); 1635 1636 END_USE(vq); 1637 return NULL; 1638} 1639 1640static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 1641 unsigned int num) 1642{ 1643 struct vring_desc_extra *desc_extra; 1644 unsigned int i; 1645 1646 desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 1647 GFP_KERNEL); 1648 if (!desc_extra) 1649 return NULL; 1650 1651 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 1652 1653 for (i = 0; i < num - 1; i++) 1654 desc_extra[i].next = i + 1; 1655 1656 return desc_extra; 1657} 1658 1659static struct virtqueue *vring_create_virtqueue_packed( 1660 unsigned int index, 1661 unsigned int num, 1662 unsigned int vring_align, 1663 struct virtio_device *vdev, 1664 bool weak_barriers, 1665 bool may_reduce_num, 1666 bool context, 1667 bool (*notify)(struct virtqueue *), 1668 void (*callback)(struct virtqueue *), 1669 const char *name) 1670{ 1671 struct vring_virtqueue *vq; 1672 struct vring_packed_desc *ring; 1673 struct vring_packed_desc_event *driver, *device; 1674 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1675 size_t ring_size_in_bytes, event_size_in_bytes; 1676 1677 ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 1678 1679 ring = vring_alloc_queue(vdev, ring_size_in_bytes, 1680 &ring_dma_addr, 1681 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1682 if (!ring) 1683 goto err_ring; 1684 1685 event_size_in_bytes = sizeof(struct vring_packed_desc_event); 1686 1687 driver = vring_alloc_queue(vdev, event_size_in_bytes, 1688 &driver_event_dma_addr, 1689 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1690 if (!driver) 1691 goto err_driver; 1692 1693 device = vring_alloc_queue(vdev, event_size_in_bytes, 1694 &device_event_dma_addr, 1695 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 1696 if (!device) 1697 goto err_device; 1698 1699 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 1700 if (!vq) 1701 goto err_vq; 1702 1703 vq->vq.callback = callback; 1704 vq->vq.vdev = vdev; 1705 vq->vq.name = name; 1706 vq->vq.num_free = num; 1707 vq->vq.index = index; 1708 vq->we_own_ring = true; 1709 vq->notify = notify; 1710 vq->weak_barriers = weak_barriers; 1711#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 1712 vq->broken = true; 1713#else 1714 vq->broken = false; 1715#endif 1716 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1717 vq->event_triggered = false; 1718 vq->num_added = 0; 1719 vq->packed_ring = true; 1720 vq->use_dma_api = vring_use_dma_api(vdev); 1721#ifdef DEBUG 1722 vq->in_use = false; 1723 vq->last_add_time_valid = false; 1724#endif 1725 1726 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 1727 !context; 1728 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 1729 1730 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 1731 vq->weak_barriers = false; 1732 1733 vq->packed.ring_dma_addr = ring_dma_addr; 1734 vq->packed.driver_event_dma_addr = driver_event_dma_addr; 1735 vq->packed.device_event_dma_addr = device_event_dma_addr; 1736 1737 vq->packed.ring_size_in_bytes = ring_size_in_bytes; 1738 vq->packed.event_size_in_bytes = event_size_in_bytes; 1739 1740 vq->packed.vring.num = num; 1741 vq->packed.vring.desc = ring; 1742 vq->packed.vring.driver = driver; 1743 vq->packed.vring.device = device; 1744 1745 vq->packed.next_avail_idx = 0; 1746 vq->packed.avail_wrap_counter = 1; 1747 vq->packed.event_flags_shadow = 0; 1748 vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 1749 1750 vq->packed.desc_state = kmalloc_array(num, 1751 sizeof(struct vring_desc_state_packed), 1752 GFP_KERNEL); 1753 if (!vq->packed.desc_state) 1754 goto err_desc_state; 1755 1756 memset(vq->packed.desc_state, 0, 1757 num * sizeof(struct vring_desc_state_packed)); 1758 1759 /* Put everything in free lists. */ 1760 vq->free_head = 0; 1761 1762 vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 1763 if (!vq->packed.desc_extra) 1764 goto err_desc_extra; 1765 1766 /* No callback? Tell other side not to bother us. */ 1767 if (!callback) { 1768 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 1769 vq->packed.vring.driver->flags = 1770 cpu_to_le16(vq->packed.event_flags_shadow); 1771 } 1772 1773 spin_lock(&vdev->vqs_list_lock); 1774 list_add_tail(&vq->vq.list, &vdev->vqs); 1775 spin_unlock(&vdev->vqs_list_lock); 1776 return &vq->vq; 1777 1778err_desc_extra: 1779 kfree(vq->packed.desc_state); 1780err_desc_state: 1781 kfree(vq); 1782err_vq: 1783 vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 1784err_device: 1785 vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 1786err_driver: 1787 vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 1788err_ring: 1789 return NULL; 1790} 1791 1792 1793/* 1794 * Generic functions and exported symbols. 1795 */ 1796 1797static inline int virtqueue_add(struct virtqueue *_vq, 1798 struct scatterlist *sgs[], 1799 unsigned int total_sg, 1800 unsigned int out_sgs, 1801 unsigned int in_sgs, 1802 void *data, 1803 void *ctx, 1804 gfp_t gfp) 1805{ 1806 struct vring_virtqueue *vq = to_vvq(_vq); 1807 1808 return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 1809 out_sgs, in_sgs, data, ctx, gfp) : 1810 virtqueue_add_split(_vq, sgs, total_sg, 1811 out_sgs, in_sgs, data, ctx, gfp); 1812} 1813 1814/** 1815 * virtqueue_add_sgs - expose buffers to other end 1816 * @_vq: the struct virtqueue we're talking about. 1817 * @sgs: array of terminated scatterlists. 1818 * @out_sgs: the number of scatterlists readable by other side 1819 * @in_sgs: the number of scatterlists which are writable (after readable ones) 1820 * @data: the token identifying the buffer. 1821 * @gfp: how to do memory allocations (if necessary). 1822 * 1823 * Caller must ensure we don't call this with other virtqueue operations 1824 * at the same time (except where noted). 1825 * 1826 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1827 */ 1828int virtqueue_add_sgs(struct virtqueue *_vq, 1829 struct scatterlist *sgs[], 1830 unsigned int out_sgs, 1831 unsigned int in_sgs, 1832 void *data, 1833 gfp_t gfp) 1834{ 1835 unsigned int i, total_sg = 0; 1836 1837 /* Count them first. */ 1838 for (i = 0; i < out_sgs + in_sgs; i++) { 1839 struct scatterlist *sg; 1840 1841 for (sg = sgs[i]; sg; sg = sg_next(sg)) 1842 total_sg++; 1843 } 1844 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1845 data, NULL, gfp); 1846} 1847EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1848 1849/** 1850 * virtqueue_add_outbuf - expose output buffers to other end 1851 * @vq: the struct virtqueue we're talking about. 1852 * @sg: scatterlist (must be well-formed and terminated!) 1853 * @num: the number of entries in @sg readable by other side 1854 * @data: the token identifying the buffer. 1855 * @gfp: how to do memory allocations (if necessary). 1856 * 1857 * Caller must ensure we don't call this with other virtqueue operations 1858 * at the same time (except where noted). 1859 * 1860 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1861 */ 1862int virtqueue_add_outbuf(struct virtqueue *vq, 1863 struct scatterlist *sg, unsigned int num, 1864 void *data, 1865 gfp_t gfp) 1866{ 1867 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1868} 1869EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1870 1871/** 1872 * virtqueue_add_inbuf - expose input buffers to other end 1873 * @vq: the struct virtqueue we're talking about. 1874 * @sg: scatterlist (must be well-formed and terminated!) 1875 * @num: the number of entries in @sg writable by other side 1876 * @data: the token identifying the buffer. 1877 * @gfp: how to do memory allocations (if necessary). 1878 * 1879 * Caller must ensure we don't call this with other virtqueue operations 1880 * at the same time (except where noted). 1881 * 1882 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1883 */ 1884int virtqueue_add_inbuf(struct virtqueue *vq, 1885 struct scatterlist *sg, unsigned int num, 1886 void *data, 1887 gfp_t gfp) 1888{ 1889 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1890} 1891EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1892 1893/** 1894 * virtqueue_add_inbuf_ctx - expose input buffers to other end 1895 * @vq: the struct virtqueue we're talking about. 1896 * @sg: scatterlist (must be well-formed and terminated!) 1897 * @num: the number of entries in @sg writable by other side 1898 * @data: the token identifying the buffer. 1899 * @ctx: extra context for the token 1900 * @gfp: how to do memory allocations (if necessary). 1901 * 1902 * Caller must ensure we don't call this with other virtqueue operations 1903 * at the same time (except where noted). 1904 * 1905 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1906 */ 1907int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1908 struct scatterlist *sg, unsigned int num, 1909 void *data, 1910 void *ctx, 1911 gfp_t gfp) 1912{ 1913 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1914} 1915EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1916 1917/** 1918 * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1919 * @_vq: the struct virtqueue 1920 * 1921 * Instead of virtqueue_kick(), you can do: 1922 * if (virtqueue_kick_prepare(vq)) 1923 * virtqueue_notify(vq); 1924 * 1925 * This is sometimes useful because the virtqueue_kick_prepare() needs 1926 * to be serialized, but the actual virtqueue_notify() call does not. 1927 */ 1928bool virtqueue_kick_prepare(struct virtqueue *_vq) 1929{ 1930 struct vring_virtqueue *vq = to_vvq(_vq); 1931 1932 return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 1933 virtqueue_kick_prepare_split(_vq); 1934} 1935EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1936 1937/** 1938 * virtqueue_notify - second half of split virtqueue_kick call. 1939 * @_vq: the struct virtqueue 1940 * 1941 * This does not need to be serialized. 1942 * 1943 * Returns false if host notify failed or queue is broken, otherwise true. 1944 */ 1945bool virtqueue_notify(struct virtqueue *_vq) 1946{ 1947 struct vring_virtqueue *vq = to_vvq(_vq); 1948 1949 if (unlikely(vq->broken)) 1950 return false; 1951 1952 /* Prod other side to tell it about changes. */ 1953 if (!vq->notify(_vq)) { 1954 vq->broken = true; 1955 return false; 1956 } 1957 return true; 1958} 1959EXPORT_SYMBOL_GPL(virtqueue_notify); 1960 1961/** 1962 * virtqueue_kick - update after add_buf 1963 * @vq: the struct virtqueue 1964 * 1965 * After one or more virtqueue_add_* calls, invoke this to kick 1966 * the other side. 1967 * 1968 * Caller must ensure we don't call this with other virtqueue 1969 * operations at the same time (except where noted). 1970 * 1971 * Returns false if kick failed, otherwise true. 1972 */ 1973bool virtqueue_kick(struct virtqueue *vq) 1974{ 1975 if (virtqueue_kick_prepare(vq)) 1976 return virtqueue_notify(vq); 1977 return true; 1978} 1979EXPORT_SYMBOL_GPL(virtqueue_kick); 1980 1981/** 1982 * virtqueue_get_buf_ctx - get the next used buffer 1983 * @_vq: the struct virtqueue we're talking about. 1984 * @len: the length written into the buffer 1985 * @ctx: extra context for the token 1986 * 1987 * If the device wrote data into the buffer, @len will be set to the 1988 * amount written. This means you don't need to clear the buffer 1989 * beforehand to ensure there's no data leakage in the case of short 1990 * writes. 1991 * 1992 * Caller must ensure we don't call this with other virtqueue 1993 * operations at the same time (except where noted). 1994 * 1995 * Returns NULL if there are no used buffers, or the "data" token 1996 * handed to virtqueue_add_*(). 1997 */ 1998void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1999 void **ctx) 2000{ 2001 struct vring_virtqueue *vq = to_vvq(_vq); 2002 2003 return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 2004 virtqueue_get_buf_ctx_split(_vq, len, ctx); 2005} 2006EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2007 2008void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2009{ 2010 return virtqueue_get_buf_ctx(_vq, len, NULL); 2011} 2012EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2013/** 2014 * virtqueue_disable_cb - disable callbacks 2015 * @_vq: the struct virtqueue we're talking about. 2016 * 2017 * Note that this is not necessarily synchronous, hence unreliable and only 2018 * useful as an optimization. 2019 * 2020 * Unlike other operations, this need not be serialized. 2021 */ 2022void virtqueue_disable_cb(struct virtqueue *_vq) 2023{ 2024 struct vring_virtqueue *vq = to_vvq(_vq); 2025 2026 /* If device triggered an event already it won't trigger one again: 2027 * no need to disable. 2028 */ 2029 if (vq->event_triggered) 2030 return; 2031 2032 if (vq->packed_ring) 2033 virtqueue_disable_cb_packed(_vq); 2034 else 2035 virtqueue_disable_cb_split(_vq); 2036} 2037EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2038 2039/** 2040 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2041 * @_vq: the struct virtqueue we're talking about. 2042 * 2043 * This re-enables callbacks; it returns current queue state 2044 * in an opaque unsigned value. This value should be later tested by 2045 * virtqueue_poll, to detect a possible race between the driver checking for 2046 * more work, and enabling callbacks. 2047 * 2048 * Caller must ensure we don't call this with other virtqueue 2049 * operations at the same time (except where noted). 2050 */ 2051unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2052{ 2053 struct vring_virtqueue *vq = to_vvq(_vq); 2054 2055 if (vq->event_triggered) 2056 vq->event_triggered = false; 2057 2058 return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 2059 virtqueue_enable_cb_prepare_split(_vq); 2060} 2061EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2062 2063/** 2064 * virtqueue_poll - query pending used buffers 2065 * @_vq: the struct virtqueue we're talking about. 2066 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2067 * 2068 * Returns "true" if there are pending used buffers in the queue. 2069 * 2070 * This does not need to be serialized. 2071 */ 2072bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2073{ 2074 struct vring_virtqueue *vq = to_vvq(_vq); 2075 2076 if (unlikely(vq->broken)) 2077 return false; 2078 2079 virtio_mb(vq->weak_barriers); 2080 return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 2081 virtqueue_poll_split(_vq, last_used_idx); 2082} 2083EXPORT_SYMBOL_GPL(virtqueue_poll); 2084 2085/** 2086 * virtqueue_enable_cb - restart callbacks after disable_cb. 2087 * @_vq: the struct virtqueue we're talking about. 2088 * 2089 * This re-enables callbacks; it returns "false" if there are pending 2090 * buffers in the queue, to detect a possible race between the driver 2091 * checking for more work, and enabling callbacks. 2092 * 2093 * Caller must ensure we don't call this with other virtqueue 2094 * operations at the same time (except where noted). 2095 */ 2096bool virtqueue_enable_cb(struct virtqueue *_vq) 2097{ 2098 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2099 2100 return !virtqueue_poll(_vq, last_used_idx); 2101} 2102EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2103 2104/** 2105 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2106 * @_vq: the struct virtqueue we're talking about. 2107 * 2108 * This re-enables callbacks but hints to the other side to delay 2109 * interrupts until most of the available buffers have been processed; 2110 * it returns "false" if there are many pending buffers in the queue, 2111 * to detect a possible race between the driver checking for more work, 2112 * and enabling callbacks. 2113 * 2114 * Caller must ensure we don't call this with other virtqueue 2115 * operations at the same time (except where noted). 2116 */ 2117bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2118{ 2119 struct vring_virtqueue *vq = to_vvq(_vq); 2120 2121 if (vq->event_triggered) 2122 vq->event_triggered = false; 2123 2124 return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 2125 virtqueue_enable_cb_delayed_split(_vq); 2126} 2127EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2128 2129/** 2130 * virtqueue_detach_unused_buf - detach first unused buffer 2131 * @_vq: the struct virtqueue we're talking about. 2132 * 2133 * Returns NULL or the "data" token handed to virtqueue_add_*(). 2134 * This is not valid on an active queue; it is useful only for device 2135 * shutdown. 2136 */ 2137void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2138{ 2139 struct vring_virtqueue *vq = to_vvq(_vq); 2140 2141 return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 2142 virtqueue_detach_unused_buf_split(_vq); 2143} 2144EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2145 2146static inline bool more_used(const struct vring_virtqueue *vq) 2147{ 2148 return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2149} 2150 2151irqreturn_t vring_interrupt(int irq, void *_vq) 2152{ 2153 struct vring_virtqueue *vq = to_vvq(_vq); 2154 2155 if (!more_used(vq)) { 2156 pr_debug("virtqueue interrupt with no work for %p\n", vq); 2157 return IRQ_NONE; 2158 } 2159 2160 if (unlikely(vq->broken)) { 2161#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2162 dev_warn_once(&vq->vq.vdev->dev, 2163 "virtio vring IRQ raised before DRIVER_OK"); 2164 return IRQ_NONE; 2165#else 2166 return IRQ_HANDLED; 2167#endif 2168 } 2169 2170 /* Just a hint for performance: so it's ok that this can be racy! */ 2171 if (vq->event) 2172 vq->event_triggered = true; 2173 2174 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 2175 if (vq->vq.callback) 2176 vq->vq.callback(&vq->vq); 2177 2178 return IRQ_HANDLED; 2179} 2180EXPORT_SYMBOL_GPL(vring_interrupt); 2181 2182/* Only available for split ring */ 2183struct virtqueue *__vring_new_virtqueue(unsigned int index, 2184 struct vring vring, 2185 struct virtio_device *vdev, 2186 bool weak_barriers, 2187 bool context, 2188 bool (*notify)(struct virtqueue *), 2189 void (*callback)(struct virtqueue *), 2190 const char *name) 2191{ 2192 struct vring_virtqueue *vq; 2193 2194 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2195 return NULL; 2196 2197 vq = kmalloc(sizeof(*vq), GFP_KERNEL); 2198 if (!vq) 2199 return NULL; 2200 2201 vq->packed_ring = false; 2202 vq->vq.callback = callback; 2203 vq->vq.vdev = vdev; 2204 vq->vq.name = name; 2205 vq->vq.num_free = vring.num; 2206 vq->vq.index = index; 2207 vq->we_own_ring = false; 2208 vq->notify = notify; 2209 vq->weak_barriers = weak_barriers; 2210#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 2211 vq->broken = true; 2212#else 2213 vq->broken = false; 2214#endif 2215 vq->last_used_idx = 0; 2216 vq->event_triggered = false; 2217 vq->num_added = 0; 2218 vq->use_dma_api = vring_use_dma_api(vdev); 2219#ifdef DEBUG 2220 vq->in_use = false; 2221 vq->last_add_time_valid = false; 2222#endif 2223 2224 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 2225 !context; 2226 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 2227 2228 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 2229 vq->weak_barriers = false; 2230 2231 vq->split.queue_dma_addr = 0; 2232 vq->split.queue_size_in_bytes = 0; 2233 2234 vq->split.vring = vring; 2235 vq->split.avail_flags_shadow = 0; 2236 vq->split.avail_idx_shadow = 0; 2237 2238 /* No callback? Tell other side not to bother us. */ 2239 if (!callback) { 2240 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 2241 if (!vq->event) 2242 vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2243 vq->split.avail_flags_shadow); 2244 } 2245 2246 vq->split.desc_state = kmalloc_array(vring.num, 2247 sizeof(struct vring_desc_state_split), GFP_KERNEL); 2248 if (!vq->split.desc_state) 2249 goto err_state; 2250 2251 vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 2252 if (!vq->split.desc_extra) 2253 goto err_extra; 2254 2255 /* Put everything in free lists. */ 2256 vq->free_head = 0; 2257 memset(vq->split.desc_state, 0, vring.num * 2258 sizeof(struct vring_desc_state_split)); 2259 2260 spin_lock(&vdev->vqs_list_lock); 2261 list_add_tail(&vq->vq.list, &vdev->vqs); 2262 spin_unlock(&vdev->vqs_list_lock); 2263 return &vq->vq; 2264 2265err_extra: 2266 kfree(vq->split.desc_state); 2267err_state: 2268 kfree(vq); 2269 return NULL; 2270} 2271EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 2272 2273struct virtqueue *vring_create_virtqueue( 2274 unsigned int index, 2275 unsigned int num, 2276 unsigned int vring_align, 2277 struct virtio_device *vdev, 2278 bool weak_barriers, 2279 bool may_reduce_num, 2280 bool context, 2281 bool (*notify)(struct virtqueue *), 2282 void (*callback)(struct virtqueue *), 2283 const char *name) 2284{ 2285 2286 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2287 return vring_create_virtqueue_packed(index, num, vring_align, 2288 vdev, weak_barriers, may_reduce_num, 2289 context, notify, callback, name); 2290 2291 return vring_create_virtqueue_split(index, num, vring_align, 2292 vdev, weak_barriers, may_reduce_num, 2293 context, notify, callback, name); 2294} 2295EXPORT_SYMBOL_GPL(vring_create_virtqueue); 2296 2297/* Only available for split ring */ 2298struct virtqueue *vring_new_virtqueue(unsigned int index, 2299 unsigned int num, 2300 unsigned int vring_align, 2301 struct virtio_device *vdev, 2302 bool weak_barriers, 2303 bool context, 2304 void *pages, 2305 bool (*notify)(struct virtqueue *vq), 2306 void (*callback)(struct virtqueue *vq), 2307 const char *name) 2308{ 2309 struct vring vring; 2310 2311 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 2312 return NULL; 2313 2314 vring_init(&vring, num, pages, vring_align); 2315 return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 2316 notify, callback, name); 2317} 2318EXPORT_SYMBOL_GPL(vring_new_virtqueue); 2319 2320void vring_del_virtqueue(struct virtqueue *_vq) 2321{ 2322 struct vring_virtqueue *vq = to_vvq(_vq); 2323 2324 spin_lock(&vq->vq.vdev->vqs_list_lock); 2325 list_del(&_vq->list); 2326 spin_unlock(&vq->vq.vdev->vqs_list_lock); 2327 2328 if (vq->we_own_ring) { 2329 if (vq->packed_ring) { 2330 vring_free_queue(vq->vq.vdev, 2331 vq->packed.ring_size_in_bytes, 2332 vq->packed.vring.desc, 2333 vq->packed.ring_dma_addr); 2334 2335 vring_free_queue(vq->vq.vdev, 2336 vq->packed.event_size_in_bytes, 2337 vq->packed.vring.driver, 2338 vq->packed.driver_event_dma_addr); 2339 2340 vring_free_queue(vq->vq.vdev, 2341 vq->packed.event_size_in_bytes, 2342 vq->packed.vring.device, 2343 vq->packed.device_event_dma_addr); 2344 2345 kfree(vq->packed.desc_state); 2346 kfree(vq->packed.desc_extra); 2347 } else { 2348 vring_free_queue(vq->vq.vdev, 2349 vq->split.queue_size_in_bytes, 2350 vq->split.vring.desc, 2351 vq->split.queue_dma_addr); 2352 } 2353 } 2354 if (!vq->packed_ring) { 2355 kfree(vq->split.desc_state); 2356 kfree(vq->split.desc_extra); 2357 } 2358 kfree(vq); 2359} 2360EXPORT_SYMBOL_GPL(vring_del_virtqueue); 2361 2362/* Manipulates transport-specific feature bits. */ 2363void vring_transport_features(struct virtio_device *vdev) 2364{ 2365 unsigned int i; 2366 2367 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2368 switch (i) { 2369 case VIRTIO_RING_F_INDIRECT_DESC: 2370 break; 2371 case VIRTIO_RING_F_EVENT_IDX: 2372 break; 2373 case VIRTIO_F_VERSION_1: 2374 break; 2375 case VIRTIO_F_ACCESS_PLATFORM: 2376 break; 2377 case VIRTIO_F_RING_PACKED: 2378 break; 2379 case VIRTIO_F_ORDER_PLATFORM: 2380 break; 2381 default: 2382 /* We don't understand this bit. */ 2383 __virtio_clear_bit(vdev, i); 2384 } 2385 } 2386} 2387EXPORT_SYMBOL_GPL(vring_transport_features); 2388 2389/** 2390 * virtqueue_get_vring_size - return the size of the virtqueue's vring 2391 * @_vq: the struct virtqueue containing the vring of interest. 2392 * 2393 * Returns the size of the vring. This is mainly used for boasting to 2394 * userspace. Unlike other operations, this need not be serialized. 2395 */ 2396unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 2397{ 2398 2399 struct vring_virtqueue *vq = to_vvq(_vq); 2400 2401 return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 2402} 2403EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 2404 2405bool virtqueue_is_broken(struct virtqueue *_vq) 2406{ 2407 struct vring_virtqueue *vq = to_vvq(_vq); 2408 2409 return READ_ONCE(vq->broken); 2410} 2411EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2412 2413/* 2414 * This should prevent the device from being used, allowing drivers to 2415 * recover. You may need to grab appropriate locks to flush. 2416 */ 2417void virtio_break_device(struct virtio_device *dev) 2418{ 2419 struct virtqueue *_vq; 2420 2421 spin_lock(&dev->vqs_list_lock); 2422 list_for_each_entry(_vq, &dev->vqs, list) { 2423 struct vring_virtqueue *vq = to_vvq(_vq); 2424 2425 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2426 WRITE_ONCE(vq->broken, true); 2427 } 2428 spin_unlock(&dev->vqs_list_lock); 2429} 2430EXPORT_SYMBOL_GPL(virtio_break_device); 2431 2432/* 2433 * This should allow the device to be used by the driver. You may 2434 * need to grab appropriate locks to flush the write to 2435 * vq->broken. This should only be used in some specific case e.g 2436 * (probing and restoring). This function should only be called by the 2437 * core, not directly by the driver. 2438 */ 2439void __virtio_unbreak_device(struct virtio_device *dev) 2440{ 2441 struct virtqueue *_vq; 2442 2443 spin_lock(&dev->vqs_list_lock); 2444 list_for_each_entry(_vq, &dev->vqs, list) { 2445 struct vring_virtqueue *vq = to_vvq(_vq); 2446 2447 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2448 WRITE_ONCE(vq->broken, false); 2449 } 2450 spin_unlock(&dev->vqs_list_lock); 2451} 2452EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2453 2454dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 2455{ 2456 struct vring_virtqueue *vq = to_vvq(_vq); 2457 2458 BUG_ON(!vq->we_own_ring); 2459 2460 if (vq->packed_ring) 2461 return vq->packed.ring_dma_addr; 2462 2463 return vq->split.queue_dma_addr; 2464} 2465EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 2466 2467dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 2468{ 2469 struct vring_virtqueue *vq = to_vvq(_vq); 2470 2471 BUG_ON(!vq->we_own_ring); 2472 2473 if (vq->packed_ring) 2474 return vq->packed.driver_event_dma_addr; 2475 2476 return vq->split.queue_dma_addr + 2477 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 2478} 2479EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 2480 2481dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 2482{ 2483 struct vring_virtqueue *vq = to_vvq(_vq); 2484 2485 BUG_ON(!vq->we_own_ring); 2486 2487 if (vq->packed_ring) 2488 return vq->packed.device_event_dma_addr; 2489 2490 return vq->split.queue_dma_addr + 2491 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 2492} 2493EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 2494 2495/* Only available for split ring */ 2496const struct vring *virtqueue_get_vring(struct virtqueue *vq) 2497{ 2498 return &to_vvq(vq)->split.vring; 2499} 2500EXPORT_SYMBOL_GPL(virtqueue_get_vring); 2501 2502MODULE_LICENSE("GPL");