blk-map.c (14821B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Functions related to mapping data to requests 4 */ 5#include <linux/kernel.h> 6#include <linux/sched/task_stack.h> 7#include <linux/module.h> 8#include <linux/bio.h> 9#include <linux/blkdev.h> 10#include <linux/uio.h> 11 12#include "blk.h" 13 14struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19}; 20 21static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23{ 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 33 bmd->iter = *data; 34 bmd->iter.iov = bmd->iov; 35 return bmd; 36} 37 38/** 39 * bio_copy_from_iter - copy all pages from iov_iter to bio 40 * @bio: The &struct bio which describes the I/O as destination 41 * @iter: iov_iter as source 42 * 43 * Copy all pages from iov_iter to bio. 44 * Returns 0 on success, or error on failure. 45 */ 46static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 47{ 48 struct bio_vec *bvec; 49 struct bvec_iter_all iter_all; 50 51 bio_for_each_segment_all(bvec, bio, iter_all) { 52 ssize_t ret; 53 54 ret = copy_page_from_iter(bvec->bv_page, 55 bvec->bv_offset, 56 bvec->bv_len, 57 iter); 58 59 if (!iov_iter_count(iter)) 60 break; 61 62 if (ret < bvec->bv_len) 63 return -EFAULT; 64 } 65 66 return 0; 67} 68 69/** 70 * bio_copy_to_iter - copy all pages from bio to iov_iter 71 * @bio: The &struct bio which describes the I/O as source 72 * @iter: iov_iter as destination 73 * 74 * Copy all pages from bio to iov_iter. 75 * Returns 0 on success, or error on failure. 76 */ 77static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 78{ 79 struct bio_vec *bvec; 80 struct bvec_iter_all iter_all; 81 82 bio_for_each_segment_all(bvec, bio, iter_all) { 83 ssize_t ret; 84 85 ret = copy_page_to_iter(bvec->bv_page, 86 bvec->bv_offset, 87 bvec->bv_len, 88 &iter); 89 90 if (!iov_iter_count(&iter)) 91 break; 92 93 if (ret < bvec->bv_len) 94 return -EFAULT; 95 } 96 97 return 0; 98} 99 100/** 101 * bio_uncopy_user - finish previously mapped bio 102 * @bio: bio being terminated 103 * 104 * Free pages allocated from bio_copy_user_iov() and write back data 105 * to user space in case of a read. 106 */ 107static int bio_uncopy_user(struct bio *bio) 108{ 109 struct bio_map_data *bmd = bio->bi_private; 110 int ret = 0; 111 112 if (!bmd->is_null_mapped) { 113 /* 114 * if we're in a workqueue, the request is orphaned, so 115 * don't copy into a random user address space, just free 116 * and return -EINTR so user space doesn't expect any data. 117 */ 118 if (!current->mm) 119 ret = -EINTR; 120 else if (bio_data_dir(bio) == READ) 121 ret = bio_copy_to_iter(bio, bmd->iter); 122 if (bmd->is_our_pages) 123 bio_free_pages(bio); 124 } 125 kfree(bmd); 126 return ret; 127} 128 129static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 130 struct iov_iter *iter, gfp_t gfp_mask) 131{ 132 struct bio_map_data *bmd; 133 struct page *page; 134 struct bio *bio; 135 int i = 0, ret; 136 int nr_pages; 137 unsigned int len = iter->count; 138 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 139 140 bmd = bio_alloc_map_data(iter, gfp_mask); 141 if (!bmd) 142 return -ENOMEM; 143 144 /* 145 * We need to do a deep copy of the iov_iter including the iovecs. 146 * The caller provided iov might point to an on-stack or otherwise 147 * shortlived one. 148 */ 149 bmd->is_our_pages = !map_data; 150 bmd->is_null_mapped = (map_data && map_data->null_mapped); 151 152 nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); 153 154 ret = -ENOMEM; 155 bio = bio_kmalloc(nr_pages, gfp_mask); 156 if (!bio) 157 goto out_bmd; 158 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq)); 159 160 if (map_data) { 161 nr_pages = 1 << map_data->page_order; 162 i = map_data->offset / PAGE_SIZE; 163 } 164 while (len) { 165 unsigned int bytes = PAGE_SIZE; 166 167 bytes -= offset; 168 169 if (bytes > len) 170 bytes = len; 171 172 if (map_data) { 173 if (i == map_data->nr_entries * nr_pages) { 174 ret = -ENOMEM; 175 goto cleanup; 176 } 177 178 page = map_data->pages[i / nr_pages]; 179 page += (i % nr_pages); 180 181 i++; 182 } else { 183 page = alloc_page(GFP_NOIO | gfp_mask); 184 if (!page) { 185 ret = -ENOMEM; 186 goto cleanup; 187 } 188 } 189 190 if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { 191 if (!map_data) 192 __free_page(page); 193 break; 194 } 195 196 len -= bytes; 197 offset = 0; 198 } 199 200 if (map_data) 201 map_data->offset += bio->bi_iter.bi_size; 202 203 /* 204 * success 205 */ 206 if ((iov_iter_rw(iter) == WRITE && 207 (!map_data || !map_data->null_mapped)) || 208 (map_data && map_data->from_user)) { 209 ret = bio_copy_from_iter(bio, iter); 210 if (ret) 211 goto cleanup; 212 } else { 213 if (bmd->is_our_pages) 214 zero_fill_bio(bio); 215 iov_iter_advance(iter, bio->bi_iter.bi_size); 216 } 217 218 bio->bi_private = bmd; 219 220 ret = blk_rq_append_bio(rq, bio); 221 if (ret) 222 goto cleanup; 223 return 0; 224cleanup: 225 if (!map_data) 226 bio_free_pages(bio); 227 bio_uninit(bio); 228 kfree(bio); 229out_bmd: 230 kfree(bmd); 231 return ret; 232} 233 234static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 235 gfp_t gfp_mask) 236{ 237 unsigned int max_sectors = queue_max_hw_sectors(rq->q); 238 unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); 239 struct bio *bio; 240 int ret; 241 int j; 242 243 if (!iov_iter_count(iter)) 244 return -EINVAL; 245 246 bio = bio_kmalloc(nr_vecs, gfp_mask); 247 if (!bio) 248 return -ENOMEM; 249 bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); 250 251 while (iov_iter_count(iter)) { 252 struct page **pages; 253 ssize_t bytes; 254 size_t offs, added = 0; 255 int npages; 256 257 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 258 if (unlikely(bytes <= 0)) { 259 ret = bytes ? bytes : -EFAULT; 260 goto out_unmap; 261 } 262 263 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 264 265 if (unlikely(offs & queue_dma_alignment(rq->q))) 266 j = 0; 267 else { 268 for (j = 0; j < npages; j++) { 269 struct page *page = pages[j]; 270 unsigned int n = PAGE_SIZE - offs; 271 bool same_page = false; 272 273 if (n > bytes) 274 n = bytes; 275 276 if (!bio_add_hw_page(rq->q, bio, page, n, offs, 277 max_sectors, &same_page)) { 278 if (same_page) 279 put_page(page); 280 break; 281 } 282 283 added += n; 284 bytes -= n; 285 offs = 0; 286 } 287 iov_iter_advance(iter, added); 288 } 289 /* 290 * release the pages we didn't map into the bio, if any 291 */ 292 while (j < npages) 293 put_page(pages[j++]); 294 kvfree(pages); 295 /* couldn't stuff something into bio? */ 296 if (bytes) 297 break; 298 } 299 300 ret = blk_rq_append_bio(rq, bio); 301 if (ret) 302 goto out_unmap; 303 return 0; 304 305 out_unmap: 306 bio_release_pages(bio, false); 307 bio_uninit(bio); 308 kfree(bio); 309 return ret; 310} 311 312static void bio_invalidate_vmalloc_pages(struct bio *bio) 313{ 314#ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 315 if (bio->bi_private && !op_is_write(bio_op(bio))) { 316 unsigned long i, len = 0; 317 318 for (i = 0; i < bio->bi_vcnt; i++) 319 len += bio->bi_io_vec[i].bv_len; 320 invalidate_kernel_vmap_range(bio->bi_private, len); 321 } 322#endif 323} 324 325static void bio_map_kern_endio(struct bio *bio) 326{ 327 bio_invalidate_vmalloc_pages(bio); 328 bio_uninit(bio); 329 kfree(bio); 330} 331 332/** 333 * bio_map_kern - map kernel address into bio 334 * @q: the struct request_queue for the bio 335 * @data: pointer to buffer to map 336 * @len: length in bytes 337 * @gfp_mask: allocation flags for bio allocation 338 * 339 * Map the kernel address into a bio suitable for io to a block 340 * device. Returns an error pointer in case of error. 341 */ 342static struct bio *bio_map_kern(struct request_queue *q, void *data, 343 unsigned int len, gfp_t gfp_mask) 344{ 345 unsigned long kaddr = (unsigned long)data; 346 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 347 unsigned long start = kaddr >> PAGE_SHIFT; 348 const int nr_pages = end - start; 349 bool is_vmalloc = is_vmalloc_addr(data); 350 struct page *page; 351 int offset, i; 352 struct bio *bio; 353 354 bio = bio_kmalloc(nr_pages, gfp_mask); 355 if (!bio) 356 return ERR_PTR(-ENOMEM); 357 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); 358 359 if (is_vmalloc) { 360 flush_kernel_vmap_range(data, len); 361 bio->bi_private = data; 362 } 363 364 offset = offset_in_page(kaddr); 365 for (i = 0; i < nr_pages; i++) { 366 unsigned int bytes = PAGE_SIZE - offset; 367 368 if (len <= 0) 369 break; 370 371 if (bytes > len) 372 bytes = len; 373 374 if (!is_vmalloc) 375 page = virt_to_page(data); 376 else 377 page = vmalloc_to_page(data); 378 if (bio_add_pc_page(q, bio, page, bytes, 379 offset) < bytes) { 380 /* we don't support partial mappings */ 381 bio_uninit(bio); 382 kfree(bio); 383 return ERR_PTR(-EINVAL); 384 } 385 386 data += bytes; 387 len -= bytes; 388 offset = 0; 389 } 390 391 bio->bi_end_io = bio_map_kern_endio; 392 return bio; 393} 394 395static void bio_copy_kern_endio(struct bio *bio) 396{ 397 bio_free_pages(bio); 398 bio_uninit(bio); 399 kfree(bio); 400} 401 402static void bio_copy_kern_endio_read(struct bio *bio) 403{ 404 char *p = bio->bi_private; 405 struct bio_vec *bvec; 406 struct bvec_iter_all iter_all; 407 408 bio_for_each_segment_all(bvec, bio, iter_all) { 409 memcpy_from_bvec(p, bvec); 410 p += bvec->bv_len; 411 } 412 413 bio_copy_kern_endio(bio); 414} 415 416/** 417 * bio_copy_kern - copy kernel address into bio 418 * @q: the struct request_queue for the bio 419 * @data: pointer to buffer to copy 420 * @len: length in bytes 421 * @gfp_mask: allocation flags for bio and page allocation 422 * @reading: data direction is READ 423 * 424 * copy the kernel address into a bio suitable for io to a block 425 * device. Returns an error pointer in case of error. 426 */ 427static struct bio *bio_copy_kern(struct request_queue *q, void *data, 428 unsigned int len, gfp_t gfp_mask, int reading) 429{ 430 unsigned long kaddr = (unsigned long)data; 431 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 432 unsigned long start = kaddr >> PAGE_SHIFT; 433 struct bio *bio; 434 void *p = data; 435 int nr_pages = 0; 436 437 /* 438 * Overflow, abort 439 */ 440 if (end < start) 441 return ERR_PTR(-EINVAL); 442 443 nr_pages = end - start; 444 bio = bio_kmalloc(nr_pages, gfp_mask); 445 if (!bio) 446 return ERR_PTR(-ENOMEM); 447 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); 448 449 while (len) { 450 struct page *page; 451 unsigned int bytes = PAGE_SIZE; 452 453 if (bytes > len) 454 bytes = len; 455 456 page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); 457 if (!page) 458 goto cleanup; 459 460 if (!reading) 461 memcpy(page_address(page), p, bytes); 462 463 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 464 break; 465 466 len -= bytes; 467 p += bytes; 468 } 469 470 if (reading) { 471 bio->bi_end_io = bio_copy_kern_endio_read; 472 bio->bi_private = data; 473 } else { 474 bio->bi_end_io = bio_copy_kern_endio; 475 } 476 477 return bio; 478 479cleanup: 480 bio_free_pages(bio); 481 bio_uninit(bio); 482 kfree(bio); 483 return ERR_PTR(-ENOMEM); 484} 485 486/* 487 * Append a bio to a passthrough request. Only works if the bio can be merged 488 * into the request based on the driver constraints. 489 */ 490int blk_rq_append_bio(struct request *rq, struct bio *bio) 491{ 492 struct bvec_iter iter; 493 struct bio_vec bv; 494 unsigned int nr_segs = 0; 495 496 bio_for_each_bvec(bv, bio, iter) 497 nr_segs++; 498 499 if (!rq->bio) { 500 blk_rq_bio_prep(rq, bio, nr_segs); 501 } else { 502 if (!ll_back_merge_fn(rq, bio, nr_segs)) 503 return -EINVAL; 504 rq->biotail->bi_next = bio; 505 rq->biotail = bio; 506 rq->__data_len += (bio)->bi_iter.bi_size; 507 bio_crypt_free_ctx(bio); 508 } 509 510 return 0; 511} 512EXPORT_SYMBOL(blk_rq_append_bio); 513 514/** 515 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 516 * @q: request queue where request should be inserted 517 * @rq: request to map data to 518 * @map_data: pointer to the rq_map_data holding pages (if necessary) 519 * @iter: iovec iterator 520 * @gfp_mask: memory allocation flags 521 * 522 * Description: 523 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 524 * a kernel bounce buffer is used. 525 * 526 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 527 * still in process context. 528 */ 529int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 530 struct rq_map_data *map_data, 531 const struct iov_iter *iter, gfp_t gfp_mask) 532{ 533 bool copy = false; 534 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 535 struct bio *bio = NULL; 536 struct iov_iter i; 537 int ret = -EINVAL; 538 539 if (!iter_is_iovec(iter)) 540 goto fail; 541 542 if (map_data) 543 copy = true; 544 else if (blk_queue_may_bounce(q)) 545 copy = true; 546 else if (iov_iter_alignment(iter) & align) 547 copy = true; 548 else if (queue_virt_boundary(q)) 549 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 550 551 i = *iter; 552 do { 553 if (copy) 554 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 555 else 556 ret = bio_map_user_iov(rq, &i, gfp_mask); 557 if (ret) 558 goto unmap_rq; 559 if (!bio) 560 bio = rq->bio; 561 } while (iov_iter_count(&i)); 562 563 return 0; 564 565unmap_rq: 566 blk_rq_unmap_user(bio); 567fail: 568 rq->bio = NULL; 569 return ret; 570} 571EXPORT_SYMBOL(blk_rq_map_user_iov); 572 573int blk_rq_map_user(struct request_queue *q, struct request *rq, 574 struct rq_map_data *map_data, void __user *ubuf, 575 unsigned long len, gfp_t gfp_mask) 576{ 577 struct iovec iov; 578 struct iov_iter i; 579 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 580 581 if (unlikely(ret < 0)) 582 return ret; 583 584 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 585} 586EXPORT_SYMBOL(blk_rq_map_user); 587 588/** 589 * blk_rq_unmap_user - unmap a request with user data 590 * @bio: start of bio list 591 * 592 * Description: 593 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 594 * supply the original rq->bio from the blk_rq_map_user() return, since 595 * the I/O completion may have changed rq->bio. 596 */ 597int blk_rq_unmap_user(struct bio *bio) 598{ 599 struct bio *next_bio; 600 int ret = 0, ret2; 601 602 while (bio) { 603 if (bio->bi_private) { 604 ret2 = bio_uncopy_user(bio); 605 if (ret2 && !ret) 606 ret = ret2; 607 } else { 608 bio_release_pages(bio, bio_data_dir(bio) == READ); 609 } 610 611 next_bio = bio; 612 bio = bio->bi_next; 613 bio_uninit(next_bio); 614 kfree(next_bio); 615 } 616 617 return ret; 618} 619EXPORT_SYMBOL(blk_rq_unmap_user); 620 621/** 622 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 623 * @q: request queue where request should be inserted 624 * @rq: request to fill 625 * @kbuf: the kernel buffer 626 * @len: length of user data 627 * @gfp_mask: memory allocation flags 628 * 629 * Description: 630 * Data will be mapped directly if possible. Otherwise a bounce 631 * buffer is used. Can be called multiple times to append multiple 632 * buffers. 633 */ 634int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 635 unsigned int len, gfp_t gfp_mask) 636{ 637 int reading = rq_data_dir(rq) == READ; 638 unsigned long addr = (unsigned long) kbuf; 639 struct bio *bio; 640 int ret; 641 642 if (len > (queue_max_hw_sectors(q) << 9)) 643 return -EINVAL; 644 if (!len || !kbuf) 645 return -EINVAL; 646 647 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) || 648 blk_queue_may_bounce(q)) 649 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 650 else 651 bio = bio_map_kern(q, kbuf, len, gfp_mask); 652 653 if (IS_ERR(bio)) 654 return PTR_ERR(bio); 655 656 bio->bi_opf &= ~REQ_OP_MASK; 657 bio->bi_opf |= req_op(rq); 658 659 ret = blk_rq_append_bio(rq, bio); 660 if (unlikely(ret)) { 661 bio_uninit(bio); 662 kfree(bio); 663 } 664 return ret; 665} 666EXPORT_SYMBOL(blk_rq_map_kern);