memory.c (80791B)
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include <uapi/misc/habanalabs.h> 9#include "habanalabs.h" 10#include "../include/hw_ip/mmu/mmu_general.h" 11 12#include <linux/uaccess.h> 13#include <linux/slab.h> 14#include <linux/vmalloc.h> 15#include <linux/pci-p2pdma.h> 16 17MODULE_IMPORT_NS(DMA_BUF); 18 19#define HL_MMU_DEBUG 0 20 21/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */ 22#define DRAM_POOL_PAGE_SIZE SZ_8M 23 24static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, 25 struct hl_mem_in *args, u64 *handle); 26 27static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size) 28{ 29 struct asic_fixed_properties *prop = &hdev->asic_prop; 30 u32 psize; 31 32 /* 33 * for ASIC that supports setting the allocation page size by user we will address 34 * user's choice only if it is not 0 (as 0 means taking the default page size) 35 */ 36 if (prop->supports_user_set_page_size && args->alloc.page_size) { 37 psize = args->alloc.page_size; 38 39 if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) { 40 dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize); 41 return -EINVAL; 42 } 43 } else { 44 psize = prop->device_mem_alloc_default_page_size; 45 } 46 47 *page_size = psize; 48 49 return 0; 50} 51 52/* 53 * The va ranges in context object contain a list with the available chunks of 54 * device virtual memory. 55 * There is one range for host allocations and one for DRAM allocations. 56 * 57 * On initialization each range contains one chunk of all of its available 58 * virtual range which is a half of the total device virtual range. 59 * 60 * On each mapping of physical pages, a suitable virtual range chunk (with a 61 * minimum size) is selected from the list. If the chunk size equals the 62 * requested size, the chunk is returned. Otherwise, the chunk is split into 63 * two chunks - one to return as result and a remainder to stay in the list. 64 * 65 * On each Unmapping of a virtual address, the relevant virtual chunk is 66 * returned to the list. The chunk is added to the list and if its edges match 67 * the edges of the adjacent chunks (means a contiguous chunk can be created), 68 * the chunks are merged. 69 * 70 * On finish, the list is checked to have only one chunk of all the relevant 71 * virtual range (which is a half of the device total virtual range). 72 * If not (means not all mappings were unmapped), a warning is printed. 73 */ 74 75/* 76 * alloc_device_memory() - allocate device memory. 77 * @ctx: pointer to the context structure. 78 * @args: host parameters containing the requested size. 79 * @ret_handle: result handle. 80 * 81 * This function does the following: 82 * - Allocate the requested size rounded up to 'dram_page_size' pages. 83 * - Return unique handle for later map/unmap/free. 84 */ 85static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, 86 u32 *ret_handle) 87{ 88 struct hl_device *hdev = ctx->hdev; 89 struct hl_vm *vm = &hdev->vm; 90 struct hl_vm_phys_pg_pack *phys_pg_pack; 91 u64 paddr = 0, total_size, num_pgs, i; 92 u32 num_curr_pgs, page_size; 93 bool contiguous; 94 int handle, rc; 95 96 num_curr_pgs = 0; 97 98 rc = set_alloc_page_size(hdev, args, &page_size); 99 if (rc) 100 return rc; 101 102 num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size); 103 total_size = num_pgs * page_size; 104 105 if (!total_size) { 106 dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); 107 return -EINVAL; 108 } 109 110 contiguous = args->flags & HL_MEM_CONTIGUOUS; 111 112 if (contiguous) { 113 if (is_power_of_2(page_size)) 114 paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool, 115 total_size, NULL, page_size); 116 else 117 paddr = gen_pool_alloc(vm->dram_pg_pool, total_size); 118 if (!paddr) { 119 dev_err(hdev->dev, 120 "Cannot allocate %llu contiguous pages with total size of %llu\n", 121 num_pgs, total_size); 122 return -ENOMEM; 123 } 124 } 125 126 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); 127 if (!phys_pg_pack) { 128 rc = -ENOMEM; 129 goto pages_pack_err; 130 } 131 132 phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; 133 phys_pg_pack->asid = ctx->asid; 134 phys_pg_pack->npages = num_pgs; 135 phys_pg_pack->page_size = page_size; 136 phys_pg_pack->total_size = total_size; 137 phys_pg_pack->flags = args->flags; 138 phys_pg_pack->contiguous = contiguous; 139 140 phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); 141 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { 142 rc = -ENOMEM; 143 goto pages_arr_err; 144 } 145 146 if (phys_pg_pack->contiguous) { 147 for (i = 0 ; i < num_pgs ; i++) 148 phys_pg_pack->pages[i] = paddr + i * page_size; 149 } else { 150 for (i = 0 ; i < num_pgs ; i++) { 151 if (is_power_of_2(page_size)) 152 phys_pg_pack->pages[i] = 153 (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool, 154 page_size, NULL, 155 page_size); 156 else 157 phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool, 158 page_size); 159 160 if (!phys_pg_pack->pages[i]) { 161 dev_err(hdev->dev, 162 "Cannot allocate device memory (out of memory)\n"); 163 rc = -ENOMEM; 164 goto page_err; 165 } 166 167 num_curr_pgs++; 168 } 169 } 170 171 spin_lock(&vm->idr_lock); 172 handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, 173 GFP_ATOMIC); 174 spin_unlock(&vm->idr_lock); 175 176 if (handle < 0) { 177 dev_err(hdev->dev, "Failed to get handle for page\n"); 178 rc = -EFAULT; 179 goto idr_err; 180 } 181 182 for (i = 0 ; i < num_pgs ; i++) 183 kref_get(&vm->dram_pg_pool_refcount); 184 185 phys_pg_pack->handle = handle; 186 187 atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); 188 atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); 189 190 *ret_handle = handle; 191 192 return 0; 193 194idr_err: 195page_err: 196 if (!phys_pg_pack->contiguous) 197 for (i = 0 ; i < num_curr_pgs ; i++) 198 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], 199 page_size); 200 201 kvfree(phys_pg_pack->pages); 202pages_arr_err: 203 kfree(phys_pg_pack); 204pages_pack_err: 205 if (contiguous) 206 gen_pool_free(vm->dram_pg_pool, paddr, total_size); 207 208 return rc; 209} 210 211/** 212 * dma_map_host_va() - DMA mapping of the given host virtual address. 213 * @hdev: habanalabs device structure. 214 * @addr: the host virtual address of the memory area. 215 * @size: the size of the memory area. 216 * @p_userptr: pointer to result userptr structure. 217 * 218 * This function does the following: 219 * - Allocate userptr structure. 220 * - Pin the given host memory using the userptr structure. 221 * - Perform DMA mapping to have the DMA addresses of the pages. 222 */ 223static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, 224 struct hl_userptr **p_userptr) 225{ 226 struct hl_userptr *userptr; 227 int rc; 228 229 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 230 if (!userptr) { 231 rc = -ENOMEM; 232 goto userptr_err; 233 } 234 235 rc = hl_pin_host_memory(hdev, addr, size, userptr); 236 if (rc) { 237 dev_err(hdev->dev, "Failed to pin host memory\n"); 238 goto pin_err; 239 } 240 241 userptr->dma_mapped = true; 242 userptr->dir = DMA_BIDIRECTIONAL; 243 userptr->vm_type = VM_TYPE_USERPTR; 244 245 *p_userptr = userptr; 246 247 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); 248 if (rc) { 249 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 250 goto dma_map_err; 251 } 252 253 return 0; 254 255dma_map_err: 256 hl_unpin_host_memory(hdev, userptr); 257pin_err: 258 kfree(userptr); 259userptr_err: 260 261 return rc; 262} 263 264/** 265 * dma_unmap_host_va() - DMA unmapping of the given host virtual address. 266 * @hdev: habanalabs device structure. 267 * @userptr: userptr to free. 268 * 269 * This function does the following: 270 * - Unpins the physical pages. 271 * - Frees the userptr structure. 272 */ 273static void dma_unmap_host_va(struct hl_device *hdev, 274 struct hl_userptr *userptr) 275{ 276 hl_unpin_host_memory(hdev, userptr); 277 kfree(userptr); 278} 279 280/** 281 * dram_pg_pool_do_release() - free DRAM pages pool 282 * @ref: pointer to reference object. 283 * 284 * This function does the following: 285 * - Frees the idr structure of physical pages handles. 286 * - Frees the generic pool of DRAM physical pages. 287 */ 288static void dram_pg_pool_do_release(struct kref *ref) 289{ 290 struct hl_vm *vm = container_of(ref, struct hl_vm, 291 dram_pg_pool_refcount); 292 293 /* 294 * free the idr here as only here we know for sure that there are no 295 * allocated physical pages and hence there are no handles in use 296 */ 297 idr_destroy(&vm->phys_pg_pack_handles); 298 gen_pool_destroy(vm->dram_pg_pool); 299} 300 301/** 302 * free_phys_pg_pack() - free physical page pack. 303 * @hdev: habanalabs device structure. 304 * @phys_pg_pack: physical page pack to free. 305 * 306 * This function does the following: 307 * - For DRAM memory only 308 * - iterate over the pack, scrub and free each physical block structure by 309 * returning it to the general pool. 310 * In case of error during scrubbing, initiate hard reset. 311 * Once hard reset is triggered, scrubbing is bypassed while freeing the 312 * memory continues. 313 * - Free the hl_vm_phys_pg_pack structure. 314 */ 315static int free_phys_pg_pack(struct hl_device *hdev, 316 struct hl_vm_phys_pg_pack *phys_pg_pack) 317{ 318 struct hl_vm *vm = &hdev->vm; 319 u64 i; 320 int rc = 0; 321 322 if (phys_pg_pack->created_from_userptr) 323 goto end; 324 325 if (phys_pg_pack->contiguous) { 326 if (hdev->memory_scrub && !hdev->disabled) { 327 rc = hdev->asic_funcs->scrub_device_mem(hdev, 328 phys_pg_pack->pages[0], 329 phys_pg_pack->total_size); 330 if (rc) 331 dev_err(hdev->dev, 332 "Failed to scrub contiguous device memory\n"); 333 } 334 335 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], 336 phys_pg_pack->total_size); 337 338 for (i = 0; i < phys_pg_pack->npages ; i++) 339 kref_put(&vm->dram_pg_pool_refcount, 340 dram_pg_pool_do_release); 341 } else { 342 for (i = 0 ; i < phys_pg_pack->npages ; i++) { 343 if (hdev->memory_scrub && !hdev->disabled && rc == 0) { 344 rc = hdev->asic_funcs->scrub_device_mem( 345 hdev, 346 phys_pg_pack->pages[i], 347 phys_pg_pack->page_size); 348 if (rc) 349 dev_err(hdev->dev, 350 "Failed to scrub device memory\n"); 351 } 352 gen_pool_free(vm->dram_pg_pool, 353 phys_pg_pack->pages[i], 354 phys_pg_pack->page_size); 355 kref_put(&vm->dram_pg_pool_refcount, 356 dram_pg_pool_do_release); 357 } 358 } 359 360 if (rc && !hdev->disabled) 361 hl_device_reset(hdev, HL_DRV_RESET_HARD); 362 363end: 364 kvfree(phys_pg_pack->pages); 365 kfree(phys_pg_pack); 366 367 return rc; 368} 369 370/** 371 * free_device_memory() - free device memory. 372 * @ctx: pointer to the context structure. 373 * @args: host parameters containing the requested size. 374 * 375 * This function does the following: 376 * - Free the device memory related to the given handle. 377 */ 378static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) 379{ 380 struct hl_device *hdev = ctx->hdev; 381 struct hl_vm *vm = &hdev->vm; 382 struct hl_vm_phys_pg_pack *phys_pg_pack; 383 u32 handle = args->free.handle; 384 385 spin_lock(&vm->idr_lock); 386 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 387 if (phys_pg_pack) { 388 if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { 389 dev_err(hdev->dev, "handle %u is mapped, cannot free\n", 390 handle); 391 spin_unlock(&vm->idr_lock); 392 return -EINVAL; 393 } 394 395 if (phys_pg_pack->exporting_cnt) { 396 dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle); 397 spin_unlock(&vm->idr_lock); 398 return -EINVAL; 399 } 400 401 /* 402 * must remove from idr before the freeing of the physical 403 * pages as the refcount of the pool is also the trigger of the 404 * idr destroy 405 */ 406 idr_remove(&vm->phys_pg_pack_handles, handle); 407 spin_unlock(&vm->idr_lock); 408 409 atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); 410 atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); 411 412 return free_phys_pg_pack(hdev, phys_pg_pack); 413 } else { 414 spin_unlock(&vm->idr_lock); 415 dev_err(hdev->dev, 416 "free device memory failed, no match for handle %u\n", 417 handle); 418 return -EINVAL; 419 } 420 421 return 0; 422} 423 424/** 425 * clear_va_list_locked() - free virtual addresses list. 426 * @hdev: habanalabs device structure. 427 * @va_list: list of virtual addresses to free. 428 * 429 * This function does the following: 430 * - Iterate over the list and free each virtual addresses block. 431 * 432 * This function should be called only when va_list lock is taken. 433 */ 434static void clear_va_list_locked(struct hl_device *hdev, 435 struct list_head *va_list) 436{ 437 struct hl_vm_va_block *va_block, *tmp; 438 439 list_for_each_entry_safe(va_block, tmp, va_list, node) { 440 list_del(&va_block->node); 441 kfree(va_block); 442 } 443} 444 445/** 446 * print_va_list_locked() - print virtual addresses list. 447 * @hdev: habanalabs device structure. 448 * @va_list: list of virtual addresses to print. 449 * 450 * This function does the following: 451 * - Iterate over the list and print each virtual addresses block. 452 * 453 * This function should be called only when va_list lock is taken. 454 */ 455static void print_va_list_locked(struct hl_device *hdev, 456 struct list_head *va_list) 457{ 458#if HL_MMU_DEBUG 459 struct hl_vm_va_block *va_block; 460 461 dev_dbg(hdev->dev, "print va list:\n"); 462 463 list_for_each_entry(va_block, va_list, node) 464 dev_dbg(hdev->dev, 465 "va block, start: 0x%llx, end: 0x%llx, size: %llu\n", 466 va_block->start, va_block->end, va_block->size); 467#endif 468} 469 470/** 471 * merge_va_blocks_locked() - merge a virtual block if possible. 472 * @hdev: pointer to the habanalabs device structure. 473 * @va_list: pointer to the virtual addresses block list. 474 * @va_block: virtual block to merge with adjacent blocks. 475 * 476 * This function does the following: 477 * - Merge the given blocks with the adjacent blocks if their virtual ranges 478 * create a contiguous virtual range. 479 * 480 * This Function should be called only when va_list lock is taken. 481 */ 482static void merge_va_blocks_locked(struct hl_device *hdev, 483 struct list_head *va_list, struct hl_vm_va_block *va_block) 484{ 485 struct hl_vm_va_block *prev, *next; 486 487 prev = list_prev_entry(va_block, node); 488 if (&prev->node != va_list && prev->end + 1 == va_block->start) { 489 prev->end = va_block->end; 490 prev->size = prev->end - prev->start; 491 list_del(&va_block->node); 492 kfree(va_block); 493 va_block = prev; 494 } 495 496 next = list_next_entry(va_block, node); 497 if (&next->node != va_list && va_block->end + 1 == next->start) { 498 next->start = va_block->start; 499 next->size = next->end - next->start; 500 list_del(&va_block->node); 501 kfree(va_block); 502 } 503} 504 505/** 506 * add_va_block_locked() - add a virtual block to the virtual addresses list. 507 * @hdev: pointer to the habanalabs device structure. 508 * @va_list: pointer to the virtual addresses block list. 509 * @start: start virtual address. 510 * @end: end virtual address. 511 * 512 * This function does the following: 513 * - Add the given block to the virtual blocks list and merge with other blocks 514 * if a contiguous virtual block can be created. 515 * 516 * This Function should be called only when va_list lock is taken. 517 */ 518static int add_va_block_locked(struct hl_device *hdev, 519 struct list_head *va_list, u64 start, u64 end) 520{ 521 struct hl_vm_va_block *va_block, *res = NULL; 522 u64 size = end - start + 1; 523 524 print_va_list_locked(hdev, va_list); 525 526 list_for_each_entry(va_block, va_list, node) { 527 /* TODO: remove upon matureness */ 528 if (hl_mem_area_crosses_range(start, size, va_block->start, 529 va_block->end)) { 530 dev_err(hdev->dev, 531 "block crossing ranges at start 0x%llx, end 0x%llx\n", 532 va_block->start, va_block->end); 533 return -EINVAL; 534 } 535 536 if (va_block->end < start) 537 res = va_block; 538 } 539 540 va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); 541 if (!va_block) 542 return -ENOMEM; 543 544 va_block->start = start; 545 va_block->end = end; 546 va_block->size = size; 547 548 if (!res) 549 list_add(&va_block->node, va_list); 550 else 551 list_add(&va_block->node, &res->node); 552 553 merge_va_blocks_locked(hdev, va_list, va_block); 554 555 print_va_list_locked(hdev, va_list); 556 557 return 0; 558} 559 560/** 561 * add_va_block() - wrapper for add_va_block_locked. 562 * @hdev: pointer to the habanalabs device structure. 563 * @va_range: pointer to the virtual addresses range object. 564 * @start: start virtual address. 565 * @end: end virtual address. 566 * 567 * This function does the following: 568 * - Takes the list lock and calls add_va_block_locked. 569 */ 570static inline int add_va_block(struct hl_device *hdev, 571 struct hl_va_range *va_range, u64 start, u64 end) 572{ 573 int rc; 574 575 mutex_lock(&va_range->lock); 576 rc = add_va_block_locked(hdev, &va_range->list, start, end); 577 mutex_unlock(&va_range->lock); 578 579 return rc; 580} 581 582/** 583 * is_hint_crossing_range() - check if hint address crossing specified reserved. 584 * @range_type: virtual space range type. 585 * @start_addr: start virtual address. 586 * @size: block size. 587 * @prop: asic properties structure to retrieve reserved ranges from. 588 */ 589static inline bool is_hint_crossing_range(enum hl_va_range_type range_type, 590 u64 start_addr, u32 size, struct asic_fixed_properties *prop) { 591 bool range_cross; 592 593 if (range_type == HL_VA_RANGE_TYPE_DRAM) 594 range_cross = 595 hl_mem_area_crosses_range(start_addr, size, 596 prop->hints_dram_reserved_va_range.start_addr, 597 prop->hints_dram_reserved_va_range.end_addr); 598 else if (range_type == HL_VA_RANGE_TYPE_HOST) 599 range_cross = 600 hl_mem_area_crosses_range(start_addr, size, 601 prop->hints_host_reserved_va_range.start_addr, 602 prop->hints_host_reserved_va_range.end_addr); 603 else 604 range_cross = 605 hl_mem_area_crosses_range(start_addr, size, 606 prop->hints_host_hpage_reserved_va_range.start_addr, 607 prop->hints_host_hpage_reserved_va_range.end_addr); 608 609 return range_cross; 610} 611 612/** 613 * get_va_block() - get a virtual block for the given size and alignment. 614 * 615 * @hdev: pointer to the habanalabs device structure. 616 * @va_range: pointer to the virtual addresses range. 617 * @size: requested block size. 618 * @hint_addr: hint for requested address by the user. 619 * @va_block_align: required alignment of the virtual block start address. 620 * @range_type: va range type (host, dram) 621 * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT 622 * 623 * This function does the following: 624 * - Iterate on the virtual block list to find a suitable virtual block for the 625 * given size, hint address and alignment. 626 * - Reserve the requested block and update the list. 627 * - Return the start address of the virtual block. 628 */ 629static u64 get_va_block(struct hl_device *hdev, 630 struct hl_va_range *va_range, 631 u64 size, u64 hint_addr, u32 va_block_align, 632 enum hl_va_range_type range_type, 633 u32 flags) 634{ 635 struct hl_vm_va_block *va_block, *new_va_block = NULL; 636 struct asic_fixed_properties *prop = &hdev->asic_prop; 637 u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end, 638 align_mask, reserved_valid_start = 0, reserved_valid_size = 0, 639 dram_hint_mask = prop->dram_hints_align_mask; 640 bool add_prev = false; 641 bool is_align_pow_2 = is_power_of_2(va_range->page_size); 642 bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr); 643 bool force_hint = flags & HL_MEM_FORCE_HINT; 644 645 if (is_align_pow_2) 646 align_mask = ~((u64)va_block_align - 1); 647 else 648 /* 649 * with non-power-of-2 range we work only with page granularity 650 * and the start address is page aligned, 651 * so no need for alignment checking. 652 */ 653 size = DIV_ROUND_UP_ULL(size, va_range->page_size) * 654 va_range->page_size; 655 656 tmp_hint_addr = hint_addr & ~dram_hint_mask; 657 658 /* Check if we need to ignore hint address */ 659 if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) || 660 (!is_align_pow_2 && is_hint_dram_addr && 661 do_div(tmp_hint_addr, va_range->page_size))) { 662 663 if (force_hint) { 664 /* Hint must be respected, so here we just fail */ 665 dev_err(hdev->dev, 666 "Hint address 0x%llx is not page aligned - cannot be respected\n", 667 hint_addr); 668 return 0; 669 } 670 671 dev_dbg(hdev->dev, 672 "Hint address 0x%llx will be ignored because it is not aligned\n", 673 hint_addr); 674 hint_addr = 0; 675 } 676 677 mutex_lock(&va_range->lock); 678 679 print_va_list_locked(hdev, &va_range->list); 680 681 list_for_each_entry(va_block, &va_range->list, node) { 682 /* Calc the first possible aligned addr */ 683 valid_start = va_block->start; 684 685 if (is_align_pow_2 && (valid_start & (va_block_align - 1))) { 686 valid_start &= align_mask; 687 valid_start += va_block_align; 688 if (valid_start > va_block->end) 689 continue; 690 } 691 692 valid_size = va_block->end - valid_start + 1; 693 if (valid_size < size) 694 continue; 695 696 /* 697 * In case hint address is 0, and hints_range_reservation 698 * property enabled, then avoid allocating va blocks from the 699 * range reserved for hint addresses 700 */ 701 if (prop->hints_range_reservation && !hint_addr) 702 if (is_hint_crossing_range(range_type, valid_start, 703 size, prop)) 704 continue; 705 706 /* Pick the minimal length block which has the required size */ 707 if (!new_va_block || (valid_size < reserved_valid_size)) { 708 new_va_block = va_block; 709 reserved_valid_start = valid_start; 710 reserved_valid_size = valid_size; 711 } 712 713 if (hint_addr && hint_addr >= valid_start && 714 (hint_addr + size) <= va_block->end) { 715 new_va_block = va_block; 716 reserved_valid_start = hint_addr; 717 reserved_valid_size = valid_size; 718 break; 719 } 720 } 721 722 if (!new_va_block) { 723 dev_err(hdev->dev, "no available va block for size %llu\n", 724 size); 725 goto out; 726 } 727 728 if (force_hint && reserved_valid_start != hint_addr) { 729 /* Hint address must be respected. If we are here - this means 730 * we could not respect it. 731 */ 732 dev_err(hdev->dev, 733 "Hint address 0x%llx could not be respected\n", 734 hint_addr); 735 reserved_valid_start = 0; 736 goto out; 737 } 738 739 /* 740 * Check if there is some leftover range due to reserving the new 741 * va block, then return it to the main virtual addresses list. 742 */ 743 if (reserved_valid_start > new_va_block->start) { 744 prev_start = new_va_block->start; 745 prev_end = reserved_valid_start - 1; 746 747 new_va_block->start = reserved_valid_start; 748 new_va_block->size = reserved_valid_size; 749 750 add_prev = true; 751 } 752 753 if (new_va_block->size > size) { 754 new_va_block->start += size; 755 new_va_block->size = new_va_block->end - new_va_block->start + 1; 756 } else { 757 list_del(&new_va_block->node); 758 kfree(new_va_block); 759 } 760 761 if (add_prev) 762 add_va_block_locked(hdev, &va_range->list, prev_start, 763 prev_end); 764 765 print_va_list_locked(hdev, &va_range->list); 766out: 767 mutex_unlock(&va_range->lock); 768 769 return reserved_valid_start; 770} 771 772/* 773 * hl_reserve_va_block() - reserve a virtual block of a given size. 774 * @hdev: pointer to the habanalabs device structure. 775 * @ctx: current context 776 * @type: virtual addresses range type. 777 * @size: requested block size. 778 * @alignment: required alignment in bytes of the virtual block start address, 779 * 0 means no alignment. 780 * 781 * This function does the following: 782 * - Iterate on the virtual block list to find a suitable virtual block for the 783 * given size and alignment. 784 * - Reserve the requested block and update the list. 785 * - Return the start address of the virtual block. 786 */ 787u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, 788 enum hl_va_range_type type, u32 size, u32 alignment) 789{ 790 return get_va_block(hdev, ctx->va_range[type], size, 0, 791 max(alignment, ctx->va_range[type]->page_size), 792 type, 0); 793} 794 795/** 796 * hl_get_va_range_type() - get va_range type for the given address and size. 797 * @ctx: context to fetch va_range from. 798 * @address: the start address of the area we want to validate. 799 * @size: the size in bytes of the area we want to validate. 800 * @type: returned va_range type. 801 * 802 * Return: true if the area is inside a valid range, false otherwise. 803 */ 804static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size, 805 enum hl_va_range_type *type) 806{ 807 int i; 808 809 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) { 810 if (hl_mem_area_inside_range(address, size, 811 ctx->va_range[i]->start_addr, 812 ctx->va_range[i]->end_addr)) { 813 *type = i; 814 return 0; 815 } 816 } 817 818 return -EINVAL; 819} 820 821/** 822 * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block. 823 * @hdev: pointer to the habanalabs device structure 824 * @ctx: pointer to the context structure. 825 * @start_addr: start virtual address. 826 * @size: number of bytes to unreserve. 827 * 828 * This function does the following: 829 * - Takes the list lock and calls add_va_block_locked. 830 */ 831int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, 832 u64 start_addr, u64 size) 833{ 834 enum hl_va_range_type type; 835 int rc; 836 837 rc = hl_get_va_range_type(ctx, start_addr, size, &type); 838 if (rc) { 839 dev_err(hdev->dev, 840 "cannot find va_range for va %#llx size %llu", 841 start_addr, size); 842 return rc; 843 } 844 845 rc = add_va_block(hdev, ctx->va_range[type], start_addr, 846 start_addr + size - 1); 847 if (rc) 848 dev_warn(hdev->dev, 849 "add va block failed for vaddr: 0x%llx\n", start_addr); 850 851 return rc; 852} 853 854/** 855 * init_phys_pg_pack_from_userptr() - initialize physical page pack from host 856 * memory 857 * @ctx: pointer to the context structure. 858 * @userptr: userptr to initialize from. 859 * @pphys_pg_pack: result pointer. 860 * @force_regular_page: tell the function to ignore huge page optimization, 861 * even if possible. Needed for cases where the device VA 862 * is allocated before we know the composition of the 863 * physical pages 864 * 865 * This function does the following: 866 * - Pin the physical pages related to the given virtual block. 867 * - Create a physical page pack from the physical pages related to the given 868 * virtual block. 869 */ 870static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, 871 struct hl_userptr *userptr, 872 struct hl_vm_phys_pg_pack **pphys_pg_pack, 873 bool force_regular_page) 874{ 875 u32 npages, page_size = PAGE_SIZE, 876 huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; 877 u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); 878 struct hl_vm_phys_pg_pack *phys_pg_pack; 879 bool first = true, is_huge_page_opt; 880 u64 page_mask, total_npages; 881 struct scatterlist *sg; 882 dma_addr_t dma_addr; 883 int rc, i, j; 884 885 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); 886 if (!phys_pg_pack) 887 return -ENOMEM; 888 889 phys_pg_pack->vm_type = userptr->vm_type; 890 phys_pg_pack->created_from_userptr = true; 891 phys_pg_pack->asid = ctx->asid; 892 atomic_set(&phys_pg_pack->mapping_cnt, 1); 893 894 is_huge_page_opt = (force_regular_page ? false : true); 895 896 /* Only if all dma_addrs are aligned to 2MB and their 897 * sizes is at least 2MB, we can use huge page mapping. 898 * We limit the 2MB optimization to this condition, 899 * since later on we acquire the related VA range as one 900 * consecutive block. 901 */ 902 total_npages = 0; 903 for_each_sgtable_dma_sg(userptr->sgt, sg, i) { 904 npages = hl_get_sg_info(sg, &dma_addr); 905 906 total_npages += npages; 907 908 if ((npages % pgs_in_huge_page) || 909 (dma_addr & (huge_page_size - 1))) 910 is_huge_page_opt = false; 911 } 912 913 if (is_huge_page_opt) { 914 page_size = huge_page_size; 915 do_div(total_npages, pgs_in_huge_page); 916 } 917 918 page_mask = ~(((u64) page_size) - 1); 919 920 phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), 921 GFP_KERNEL); 922 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { 923 rc = -ENOMEM; 924 goto page_pack_arr_mem_err; 925 } 926 927 phys_pg_pack->npages = total_npages; 928 phys_pg_pack->page_size = page_size; 929 phys_pg_pack->total_size = total_npages * page_size; 930 931 j = 0; 932 for_each_sgtable_dma_sg(userptr->sgt, sg, i) { 933 npages = hl_get_sg_info(sg, &dma_addr); 934 935 /* align down to physical page size and save the offset */ 936 if (first) { 937 first = false; 938 phys_pg_pack->offset = dma_addr & (page_size - 1); 939 dma_addr &= page_mask; 940 } 941 942 while (npages) { 943 phys_pg_pack->pages[j++] = dma_addr; 944 dma_addr += page_size; 945 946 if (is_huge_page_opt) 947 npages -= pgs_in_huge_page; 948 else 949 npages--; 950 } 951 } 952 953 *pphys_pg_pack = phys_pg_pack; 954 955 return 0; 956 957page_pack_arr_mem_err: 958 kfree(phys_pg_pack); 959 960 return rc; 961} 962 963/** 964 * map_phys_pg_pack() - maps the physical page pack.. 965 * @ctx: pointer to the context structure. 966 * @vaddr: start address of the virtual area to map from. 967 * @phys_pg_pack: the pack of physical pages to map to. 968 * 969 * This function does the following: 970 * - Maps each chunk of virtual memory to matching physical chunk. 971 * - Stores number of successful mappings in the given argument. 972 * - Returns 0 on success, error code otherwise. 973 */ 974static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, 975 struct hl_vm_phys_pg_pack *phys_pg_pack) 976{ 977 struct hl_device *hdev = ctx->hdev; 978 u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; 979 u32 page_size = phys_pg_pack->page_size; 980 int rc = 0; 981 bool is_host_addr; 982 983 for (i = 0 ; i < phys_pg_pack->npages ; i++) { 984 paddr = phys_pg_pack->pages[i]; 985 986 rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size, 987 (i + 1) == phys_pg_pack->npages); 988 if (rc) { 989 dev_err(hdev->dev, 990 "map failed for handle %u, npages: %llu, mapped: %llu", 991 phys_pg_pack->handle, phys_pg_pack->npages, 992 mapped_pg_cnt); 993 goto err; 994 } 995 996 mapped_pg_cnt++; 997 next_vaddr += page_size; 998 } 999 1000 return 0; 1001 1002err: 1003 is_host_addr = !hl_is_dram_va(hdev, vaddr); 1004 1005 next_vaddr = vaddr; 1006 for (i = 0 ; i < mapped_pg_cnt ; i++) { 1007 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, 1008 (i + 1) == mapped_pg_cnt)) 1009 dev_warn_ratelimited(hdev->dev, 1010 "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", 1011 phys_pg_pack->handle, next_vaddr, 1012 phys_pg_pack->pages[i], page_size); 1013 1014 next_vaddr += page_size; 1015 1016 /* 1017 * unmapping on Palladium can be really long, so avoid a CPU 1018 * soft lockup bug by sleeping a little between unmapping pages 1019 * 1020 * In addition, on host num of pages could be huge, 1021 * because page size could be 4KB, so when unmapping host 1022 * pages sleep every 32K pages to avoid soft lockup 1023 */ 1024 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) 1025 usleep_range(50, 200); 1026 } 1027 1028 return rc; 1029} 1030 1031/** 1032 * unmap_phys_pg_pack() - unmaps the physical page pack. 1033 * @ctx: pointer to the context structure. 1034 * @vaddr: start address of the virtual area to unmap. 1035 * @phys_pg_pack: the pack of physical pages to unmap. 1036 */ 1037static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, 1038 struct hl_vm_phys_pg_pack *phys_pg_pack) 1039{ 1040 struct hl_device *hdev = ctx->hdev; 1041 u64 next_vaddr, i; 1042 bool is_host_addr; 1043 u32 page_size; 1044 1045 is_host_addr = !hl_is_dram_va(hdev, vaddr); 1046 page_size = phys_pg_pack->page_size; 1047 next_vaddr = vaddr; 1048 1049 for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { 1050 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, 1051 (i + 1) == phys_pg_pack->npages)) 1052 dev_warn_ratelimited(hdev->dev, 1053 "unmap failed for vaddr: 0x%llx\n", next_vaddr); 1054 1055 /* 1056 * unmapping on Palladium can be really long, so avoid a CPU 1057 * soft lockup bug by sleeping a little between unmapping pages 1058 * 1059 * In addition, on host num of pages could be huge, 1060 * because page size could be 4KB, so when unmapping host 1061 * pages sleep every 32K pages to avoid soft lockup 1062 */ 1063 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) 1064 usleep_range(50, 200); 1065 } 1066} 1067 1068static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, 1069 u64 *paddr) 1070{ 1071 struct hl_device *hdev = ctx->hdev; 1072 struct hl_vm *vm = &hdev->vm; 1073 struct hl_vm_phys_pg_pack *phys_pg_pack; 1074 u32 handle; 1075 1076 handle = lower_32_bits(args->map_device.handle); 1077 spin_lock(&vm->idr_lock); 1078 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 1079 if (!phys_pg_pack) { 1080 spin_unlock(&vm->idr_lock); 1081 dev_err(hdev->dev, "no match for handle %u\n", handle); 1082 return -EINVAL; 1083 } 1084 1085 *paddr = phys_pg_pack->pages[0]; 1086 1087 spin_unlock(&vm->idr_lock); 1088 1089 return 0; 1090} 1091 1092/** 1093 * map_device_va() - map the given memory. 1094 * @ctx: pointer to the context structure. 1095 * @args: host parameters with handle/host virtual address. 1096 * @device_addr: pointer to result device virtual address. 1097 * 1098 * This function does the following: 1099 * - If given a physical device memory handle, map to a device virtual block 1100 * and return the start address of this block. 1101 * - If given a host virtual address and size, find the related physical pages, 1102 * map a device virtual block to this pages and return the start address of 1103 * this block. 1104 */ 1105static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr) 1106{ 1107 struct hl_vm_phys_pg_pack *phys_pg_pack; 1108 enum hl_va_range_type va_range_type = 0; 1109 struct hl_device *hdev = ctx->hdev; 1110 struct hl_userptr *userptr = NULL; 1111 u32 handle = 0, va_block_align; 1112 struct hl_vm_hash_node *hnode; 1113 struct hl_vm *vm = &hdev->vm; 1114 struct hl_va_range *va_range; 1115 bool is_userptr, do_prefetch; 1116 u64 ret_vaddr, hint_addr; 1117 enum vm_type *vm_type; 1118 int rc; 1119 1120 /* set map flags */ 1121 is_userptr = args->flags & HL_MEM_USERPTR; 1122 do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH); 1123 1124 /* Assume failure */ 1125 *device_addr = 0; 1126 1127 if (is_userptr) { 1128 u64 addr = args->map_host.host_virt_addr, 1129 size = args->map_host.mem_size; 1130 u32 page_size = hdev->asic_prop.pmmu.page_size, 1131 huge_page_size = hdev->asic_prop.pmmu_huge.page_size; 1132 1133 rc = dma_map_host_va(hdev, addr, size, &userptr); 1134 if (rc) { 1135 dev_err(hdev->dev, "failed to get userptr from va\n"); 1136 return rc; 1137 } 1138 1139 rc = init_phys_pg_pack_from_userptr(ctx, userptr, 1140 &phys_pg_pack, false); 1141 if (rc) { 1142 dev_err(hdev->dev, 1143 "unable to init page pack for vaddr 0x%llx\n", 1144 addr); 1145 goto init_page_pack_err; 1146 } 1147 1148 vm_type = (enum vm_type *) userptr; 1149 hint_addr = args->map_host.hint_addr; 1150 handle = phys_pg_pack->handle; 1151 1152 /* get required alignment */ 1153 if (phys_pg_pack->page_size == page_size) { 1154 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; 1155 va_range_type = HL_VA_RANGE_TYPE_HOST; 1156 /* 1157 * huge page alignment may be needed in case of regular 1158 * page mapping, depending on the host VA alignment 1159 */ 1160 if (addr & (huge_page_size - 1)) 1161 va_block_align = page_size; 1162 else 1163 va_block_align = huge_page_size; 1164 } else { 1165 /* 1166 * huge page alignment is needed in case of huge page 1167 * mapping 1168 */ 1169 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; 1170 va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE; 1171 va_block_align = huge_page_size; 1172 } 1173 } else { 1174 handle = lower_32_bits(args->map_device.handle); 1175 1176 spin_lock(&vm->idr_lock); 1177 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 1178 if (!phys_pg_pack) { 1179 spin_unlock(&vm->idr_lock); 1180 dev_err(hdev->dev, 1181 "no match for handle %u\n", handle); 1182 return -EINVAL; 1183 } 1184 1185 /* increment now to avoid freeing device memory while mapping */ 1186 atomic_inc(&phys_pg_pack->mapping_cnt); 1187 1188 spin_unlock(&vm->idr_lock); 1189 1190 vm_type = (enum vm_type *) phys_pg_pack; 1191 1192 hint_addr = args->map_device.hint_addr; 1193 1194 /* DRAM VA alignment is the same as the MMU page size */ 1195 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; 1196 va_range_type = HL_VA_RANGE_TYPE_DRAM; 1197 va_block_align = hdev->asic_prop.dmmu.page_size; 1198 } 1199 1200 /* 1201 * relevant for mapping device physical memory only, as host memory is 1202 * implicitly shared 1203 */ 1204 if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && 1205 phys_pg_pack->asid != ctx->asid) { 1206 dev_err(hdev->dev, 1207 "Failed to map memory, handle %u is not shared\n", 1208 handle); 1209 rc = -EPERM; 1210 goto shared_err; 1211 } 1212 1213 hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); 1214 if (!hnode) { 1215 rc = -ENOMEM; 1216 goto hnode_err; 1217 } 1218 1219 if (hint_addr && phys_pg_pack->offset) { 1220 if (args->flags & HL_MEM_FORCE_HINT) { 1221 /* Fail if hint must be respected but it can't be */ 1222 dev_err(hdev->dev, 1223 "Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n", 1224 hint_addr, phys_pg_pack->offset); 1225 rc = -EINVAL; 1226 goto va_block_err; 1227 } 1228 dev_dbg(hdev->dev, 1229 "Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n", 1230 hint_addr, phys_pg_pack->offset); 1231 } 1232 1233 ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, 1234 hint_addr, va_block_align, 1235 va_range_type, args->flags); 1236 if (!ret_vaddr) { 1237 dev_err(hdev->dev, "no available va block for handle %u\n", 1238 handle); 1239 rc = -ENOMEM; 1240 goto va_block_err; 1241 } 1242 1243 mutex_lock(&ctx->mmu_lock); 1244 1245 rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); 1246 if (rc) { 1247 dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); 1248 goto map_err; 1249 } 1250 1251 rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, 1252 ctx->asid, ret_vaddr, phys_pg_pack->total_size); 1253 if (rc) 1254 goto map_err; 1255 1256 mutex_unlock(&ctx->mmu_lock); 1257 1258 /* 1259 * prefetch is done upon user's request. it is performed in WQ as and so can 1260 * be outside the MMU lock. the operation itself is already protected by the mmu lock 1261 */ 1262 if (do_prefetch) { 1263 rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr, 1264 phys_pg_pack->total_size); 1265 if (rc) 1266 goto map_err; 1267 } 1268 1269 ret_vaddr += phys_pg_pack->offset; 1270 1271 hnode->ptr = vm_type; 1272 hnode->vaddr = ret_vaddr; 1273 1274 mutex_lock(&ctx->mem_hash_lock); 1275 hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); 1276 mutex_unlock(&ctx->mem_hash_lock); 1277 1278 *device_addr = ret_vaddr; 1279 1280 if (is_userptr) 1281 rc = free_phys_pg_pack(hdev, phys_pg_pack); 1282 1283 return rc; 1284 1285map_err: 1286 mutex_unlock(&ctx->mmu_lock); 1287 1288 if (add_va_block(hdev, va_range, ret_vaddr, 1289 ret_vaddr + phys_pg_pack->total_size - 1)) 1290 dev_warn(hdev->dev, 1291 "release va block failed for handle 0x%x, vaddr: 0x%llx\n", 1292 handle, ret_vaddr); 1293 1294va_block_err: 1295 kfree(hnode); 1296hnode_err: 1297shared_err: 1298 atomic_dec(&phys_pg_pack->mapping_cnt); 1299 if (is_userptr) 1300 free_phys_pg_pack(hdev, phys_pg_pack); 1301init_page_pack_err: 1302 if (is_userptr) 1303 dma_unmap_host_va(hdev, userptr); 1304 1305 return rc; 1306} 1307 1308/** 1309 * unmap_device_va() - unmap the given device virtual address. 1310 * @ctx: pointer to the context structure. 1311 * @args: host parameters with device virtual address to unmap. 1312 * @ctx_free: true if in context free flow, false otherwise. 1313 * 1314 * This function does the following: 1315 * - unmap the physical pages related to the given virtual address. 1316 * - return the device virtual block to the virtual block list. 1317 */ 1318static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, 1319 bool ctx_free) 1320{ 1321 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 1322 u64 vaddr = args->unmap.device_virt_addr; 1323 struct hl_vm_hash_node *hnode = NULL; 1324 struct asic_fixed_properties *prop; 1325 struct hl_device *hdev = ctx->hdev; 1326 struct hl_userptr *userptr = NULL; 1327 struct hl_va_range *va_range; 1328 enum vm_type *vm_type; 1329 bool is_userptr; 1330 int rc = 0; 1331 1332 prop = &hdev->asic_prop; 1333 1334 /* protect from double entrance */ 1335 mutex_lock(&ctx->mem_hash_lock); 1336 hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) 1337 if (vaddr == hnode->vaddr) 1338 break; 1339 1340 if (!hnode) { 1341 mutex_unlock(&ctx->mem_hash_lock); 1342 dev_err(hdev->dev, 1343 "unmap failed, no mem hnode for vaddr 0x%llx\n", 1344 vaddr); 1345 return -EINVAL; 1346 } 1347 1348 hash_del(&hnode->node); 1349 mutex_unlock(&ctx->mem_hash_lock); 1350 1351 vm_type = hnode->ptr; 1352 1353 if (*vm_type == VM_TYPE_USERPTR) { 1354 is_userptr = true; 1355 userptr = hnode->ptr; 1356 1357 rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack, 1358 false); 1359 if (rc) { 1360 dev_err(hdev->dev, 1361 "unable to init page pack for vaddr 0x%llx\n", 1362 vaddr); 1363 goto vm_type_err; 1364 } 1365 1366 if (phys_pg_pack->page_size == 1367 hdev->asic_prop.pmmu.page_size) 1368 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; 1369 else 1370 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; 1371 } else if (*vm_type == VM_TYPE_PHYS_PACK) { 1372 is_userptr = false; 1373 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; 1374 phys_pg_pack = hnode->ptr; 1375 } else { 1376 dev_warn(hdev->dev, 1377 "unmap failed, unknown vm desc for vaddr 0x%llx\n", 1378 vaddr); 1379 rc = -EFAULT; 1380 goto vm_type_err; 1381 } 1382 1383 if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { 1384 dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); 1385 rc = -EINVAL; 1386 goto mapping_cnt_err; 1387 } 1388 1389 if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size)) 1390 vaddr = prop->dram_base_address + 1391 DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address, 1392 phys_pg_pack->page_size) * 1393 phys_pg_pack->page_size; 1394 else 1395 vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); 1396 1397 mutex_lock(&ctx->mmu_lock); 1398 1399 unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); 1400 1401 /* 1402 * During context free this function is called in a loop to clean all 1403 * the context mappings. Hence the cache invalidation can be called once 1404 * at the loop end rather than for each iteration 1405 */ 1406 if (!ctx_free) 1407 rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, 1408 phys_pg_pack->total_size); 1409 1410 mutex_unlock(&ctx->mmu_lock); 1411 1412 /* 1413 * If the context is closing we don't need to check for the MMU cache 1414 * invalidation return code and update the VA free list as in this flow 1415 * we invalidate the MMU cache outside of this unmap function and the VA 1416 * free list will be freed anyway. 1417 */ 1418 if (!ctx_free) { 1419 int tmp_rc; 1420 1421 tmp_rc = add_va_block(hdev, va_range, vaddr, 1422 vaddr + phys_pg_pack->total_size - 1); 1423 if (tmp_rc) { 1424 dev_warn(hdev->dev, 1425 "add va block failed for vaddr: 0x%llx\n", 1426 vaddr); 1427 if (!rc) 1428 rc = tmp_rc; 1429 } 1430 } 1431 1432 atomic_dec(&phys_pg_pack->mapping_cnt); 1433 kfree(hnode); 1434 1435 if (is_userptr) { 1436 free_phys_pg_pack(hdev, phys_pg_pack); 1437 dma_unmap_host_va(hdev, userptr); 1438 } 1439 1440 return rc; 1441 1442mapping_cnt_err: 1443 if (is_userptr) 1444 free_phys_pg_pack(hdev, phys_pg_pack); 1445vm_type_err: 1446 mutex_lock(&ctx->mem_hash_lock); 1447 hash_add(ctx->mem_hash, &hnode->node, vaddr); 1448 mutex_unlock(&ctx->mem_hash_lock); 1449 1450 return rc; 1451} 1452 1453static int map_block(struct hl_device *hdev, u64 address, u64 *handle, 1454 u32 *size) 1455{ 1456 u32 block_id = 0; 1457 int rc; 1458 1459 rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); 1460 1461 *handle = block_id | HL_MMAP_TYPE_BLOCK; 1462 *handle <<= PAGE_SHIFT; 1463 1464 return rc; 1465} 1466 1467static void hw_block_vm_close(struct vm_area_struct *vma) 1468{ 1469 struct hl_vm_hw_block_list_node *lnode = 1470 (struct hl_vm_hw_block_list_node *) vma->vm_private_data; 1471 struct hl_ctx *ctx = lnode->ctx; 1472 1473 mutex_lock(&ctx->hw_block_list_lock); 1474 list_del(&lnode->node); 1475 mutex_unlock(&ctx->hw_block_list_lock); 1476 hl_ctx_put(ctx); 1477 kfree(lnode); 1478 vma->vm_private_data = NULL; 1479} 1480 1481static const struct vm_operations_struct hw_block_vm_ops = { 1482 .close = hw_block_vm_close 1483}; 1484 1485/** 1486 * hl_hw_block_mmap() - mmap a hw block to user. 1487 * @hpriv: pointer to the private data of the fd 1488 * @vma: pointer to vm_area_struct of the process 1489 * 1490 * Driver increments context reference for every HW block mapped in order 1491 * to prevent user from closing FD without unmapping first 1492 */ 1493int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) 1494{ 1495 struct hl_vm_hw_block_list_node *lnode; 1496 struct hl_device *hdev = hpriv->hdev; 1497 struct hl_ctx *ctx = hpriv->ctx; 1498 u32 block_id, block_size; 1499 int rc; 1500 1501 /* We use the page offset to hold the block id and thus we need to clear 1502 * it before doing the mmap itself 1503 */ 1504 block_id = vma->vm_pgoff; 1505 vma->vm_pgoff = 0; 1506 1507 /* Driver only allows mapping of a complete HW block */ 1508 block_size = vma->vm_end - vma->vm_start; 1509 1510 if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) { 1511 dev_err(hdev->dev, 1512 "user pointer is invalid - 0x%lx\n", 1513 vma->vm_start); 1514 1515 return -EINVAL; 1516 } 1517 1518 lnode = kzalloc(sizeof(*lnode), GFP_KERNEL); 1519 if (!lnode) 1520 return -ENOMEM; 1521 1522 vma->vm_ops = &hw_block_vm_ops; 1523 vma->vm_private_data = lnode; 1524 1525 hl_ctx_get(ctx); 1526 1527 rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); 1528 if (rc) { 1529 hl_ctx_put(ctx); 1530 kfree(lnode); 1531 return rc; 1532 } 1533 1534 lnode->ctx = ctx; 1535 lnode->vaddr = vma->vm_start; 1536 lnode->size = block_size; 1537 lnode->id = block_id; 1538 1539 mutex_lock(&ctx->hw_block_list_lock); 1540 list_add_tail(&lnode->node, &ctx->hw_block_mem_list); 1541 mutex_unlock(&ctx->hw_block_list_lock); 1542 1543 vma->vm_pgoff = block_id; 1544 1545 return 0; 1546} 1547 1548static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, 1549 struct device *dev, enum dma_data_direction dir) 1550{ 1551 dma_addr_t addr; 1552 int rc; 1553 1554 addr = dma_map_resource(dev, bar_address, chunk_size, dir, 1555 DMA_ATTR_SKIP_CPU_SYNC); 1556 rc = dma_mapping_error(dev, addr); 1557 if (rc) 1558 return rc; 1559 1560 sg_set_page(sg, NULL, chunk_size, 0); 1561 sg_dma_address(sg) = addr; 1562 sg_dma_len(sg) = chunk_size; 1563 1564 return 0; 1565} 1566 1567static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, 1568 u64 page_size, struct device *dev, 1569 enum dma_data_direction dir) 1570{ 1571 u64 chunk_size, bar_address, dma_max_seg_size; 1572 struct asic_fixed_properties *prop; 1573 int rc, i, j, nents, cur_page; 1574 struct scatterlist *sg; 1575 struct sg_table *sgt; 1576 1577 prop = &hdev->asic_prop; 1578 1579 dma_max_seg_size = dma_get_max_seg_size(dev); 1580 1581 /* We would like to align the max segment size to PAGE_SIZE, so the 1582 * SGL will contain aligned addresses that can be easily mapped to 1583 * an MMU 1584 */ 1585 dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE); 1586 if (dma_max_seg_size < PAGE_SIZE) { 1587 dev_err_ratelimited(hdev->dev, 1588 "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n", 1589 dma_max_seg_size); 1590 return ERR_PTR(-EINVAL); 1591 } 1592 1593 sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); 1594 if (!sgt) 1595 return ERR_PTR(-ENOMEM); 1596 1597 /* If the size of each page is larger than the dma max segment size, 1598 * then we can't combine pages and the number of entries in the SGL 1599 * will just be the 1600 * <number of pages> * <chunks of max segment size in each page> 1601 */ 1602 if (page_size > dma_max_seg_size) 1603 nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size); 1604 else 1605 /* Get number of non-contiguous chunks */ 1606 for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) { 1607 if (pages[i - 1] + page_size != pages[i] || 1608 chunk_size + page_size > dma_max_seg_size) { 1609 nents++; 1610 chunk_size = page_size; 1611 continue; 1612 } 1613 1614 chunk_size += page_size; 1615 } 1616 1617 rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); 1618 if (rc) 1619 goto error_free; 1620 1621 cur_page = 0; 1622 1623 if (page_size > dma_max_seg_size) { 1624 u64 size_left, cur_device_address = 0; 1625 1626 size_left = page_size; 1627 1628 /* Need to split each page into the number of chunks of 1629 * dma_max_seg_size 1630 */ 1631 for_each_sgtable_dma_sg(sgt, sg, i) { 1632 if (size_left == page_size) 1633 cur_device_address = 1634 pages[cur_page] - prop->dram_base_address; 1635 else 1636 cur_device_address += dma_max_seg_size; 1637 1638 chunk_size = min(size_left, dma_max_seg_size); 1639 1640 bar_address = hdev->dram_pci_bar_start + cur_device_address; 1641 1642 rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); 1643 if (rc) 1644 goto error_unmap; 1645 1646 if (size_left > dma_max_seg_size) { 1647 size_left -= dma_max_seg_size; 1648 } else { 1649 cur_page++; 1650 size_left = page_size; 1651 } 1652 } 1653 } else { 1654 /* Merge pages and put them into the scatterlist */ 1655 for_each_sgtable_dma_sg(sgt, sg, i) { 1656 chunk_size = page_size; 1657 for (j = cur_page + 1 ; j < npages ; j++) { 1658 if (pages[j - 1] + page_size != pages[j] || 1659 chunk_size + page_size > dma_max_seg_size) 1660 break; 1661 1662 chunk_size += page_size; 1663 } 1664 1665 bar_address = hdev->dram_pci_bar_start + 1666 (pages[cur_page] - prop->dram_base_address); 1667 1668 rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); 1669 if (rc) 1670 goto error_unmap; 1671 1672 cur_page = j; 1673 } 1674 } 1675 1676 /* Because we are not going to include a CPU list we want to have some 1677 * chance that other users will detect this by setting the orig_nents 1678 * to 0 and using only nents (length of DMA list) when going over the 1679 * sgl 1680 */ 1681 sgt->orig_nents = 0; 1682 1683 return sgt; 1684 1685error_unmap: 1686 for_each_sgtable_dma_sg(sgt, sg, i) { 1687 if (!sg_dma_len(sg)) 1688 continue; 1689 1690 dma_unmap_resource(dev, sg_dma_address(sg), 1691 sg_dma_len(sg), dir, 1692 DMA_ATTR_SKIP_CPU_SYNC); 1693 } 1694 1695 sg_free_table(sgt); 1696 1697error_free: 1698 kfree(sgt); 1699 return ERR_PTR(rc); 1700} 1701 1702static int hl_dmabuf_attach(struct dma_buf *dmabuf, 1703 struct dma_buf_attachment *attachment) 1704{ 1705 struct hl_dmabuf_priv *hl_dmabuf; 1706 struct hl_device *hdev; 1707 int rc; 1708 1709 hl_dmabuf = dmabuf->priv; 1710 hdev = hl_dmabuf->ctx->hdev; 1711 1712 rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true); 1713 1714 if (rc < 0) 1715 attachment->peer2peer = false; 1716 return 0; 1717} 1718 1719static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, 1720 enum dma_data_direction dir) 1721{ 1722 struct dma_buf *dma_buf = attachment->dmabuf; 1723 struct hl_vm_phys_pg_pack *phys_pg_pack; 1724 struct hl_dmabuf_priv *hl_dmabuf; 1725 struct hl_device *hdev; 1726 struct sg_table *sgt; 1727 1728 hl_dmabuf = dma_buf->priv; 1729 hdev = hl_dmabuf->ctx->hdev; 1730 phys_pg_pack = hl_dmabuf->phys_pg_pack; 1731 1732 if (!attachment->peer2peer) { 1733 dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); 1734 return ERR_PTR(-EPERM); 1735 } 1736 1737 if (phys_pg_pack) 1738 sgt = alloc_sgt_from_device_pages(hdev, 1739 phys_pg_pack->pages, 1740 phys_pg_pack->npages, 1741 phys_pg_pack->page_size, 1742 attachment->dev, 1743 dir); 1744 else 1745 sgt = alloc_sgt_from_device_pages(hdev, 1746 &hl_dmabuf->device_address, 1747 1, 1748 hl_dmabuf->dmabuf->size, 1749 attachment->dev, 1750 dir); 1751 1752 if (IS_ERR(sgt)) 1753 dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt)); 1754 1755 return sgt; 1756} 1757 1758static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment, 1759 struct sg_table *sgt, 1760 enum dma_data_direction dir) 1761{ 1762 struct scatterlist *sg; 1763 int i; 1764 1765 /* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives 1766 * only in the 'device' domain (after all, it maps a PCI bar address which points to the 1767 * device memory). 1768 * 1769 * Therefore, it was never in the 'CPU' domain and hence, there is no need to perform 1770 * a sync of the memory to the CPU's cache, as it never resided inside that cache. 1771 */ 1772 for_each_sgtable_dma_sg(sgt, sg, i) 1773 dma_unmap_resource(attachment->dev, sg_dma_address(sg), 1774 sg_dma_len(sg), dir, 1775 DMA_ATTR_SKIP_CPU_SYNC); 1776 1777 /* Need to restore orig_nents because sg_free_table use that field */ 1778 sgt->orig_nents = sgt->nents; 1779 sg_free_table(sgt); 1780 kfree(sgt); 1781} 1782 1783static void hl_release_dmabuf(struct dma_buf *dmabuf) 1784{ 1785 struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; 1786 struct hl_ctx *ctx = hl_dmabuf->ctx; 1787 struct hl_device *hdev = ctx->hdev; 1788 struct hl_vm *vm = &hdev->vm; 1789 1790 if (hl_dmabuf->phys_pg_pack) { 1791 spin_lock(&vm->idr_lock); 1792 hl_dmabuf->phys_pg_pack->exporting_cnt--; 1793 spin_unlock(&vm->idr_lock); 1794 } 1795 1796 hl_ctx_put(hl_dmabuf->ctx); 1797 1798 kfree(hl_dmabuf); 1799} 1800 1801static const struct dma_buf_ops habanalabs_dmabuf_ops = { 1802 .attach = hl_dmabuf_attach, 1803 .map_dma_buf = hl_map_dmabuf, 1804 .unmap_dma_buf = hl_unmap_dmabuf, 1805 .release = hl_release_dmabuf, 1806}; 1807 1808static int export_dmabuf_common(struct hl_ctx *ctx, 1809 struct hl_dmabuf_priv *hl_dmabuf, 1810 u64 total_size, int flags, int *dmabuf_fd) 1811{ 1812 DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 1813 struct hl_device *hdev = ctx->hdev; 1814 int rc, fd; 1815 1816 exp_info.ops = &habanalabs_dmabuf_ops; 1817 exp_info.size = total_size; 1818 exp_info.flags = flags; 1819 exp_info.priv = hl_dmabuf; 1820 1821 hl_dmabuf->dmabuf = dma_buf_export(&exp_info); 1822 if (IS_ERR(hl_dmabuf->dmabuf)) { 1823 dev_err(hdev->dev, "failed to export dma-buf\n"); 1824 return PTR_ERR(hl_dmabuf->dmabuf); 1825 } 1826 1827 fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); 1828 if (fd < 0) { 1829 dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n"); 1830 rc = fd; 1831 goto err_dma_buf_put; 1832 } 1833 1834 hl_dmabuf->ctx = ctx; 1835 hl_ctx_get(hl_dmabuf->ctx); 1836 1837 *dmabuf_fd = fd; 1838 1839 return 0; 1840 1841err_dma_buf_put: 1842 dma_buf_put(hl_dmabuf->dmabuf); 1843 return rc; 1844} 1845 1846/** 1847 * export_dmabuf_from_addr() - export a dma-buf object for the given memory 1848 * address and size. 1849 * @ctx: pointer to the context structure. 1850 * @device_addr: device memory physical address. 1851 * @size: size of device memory. 1852 * @flags: DMA-BUF file/FD flags. 1853 * @dmabuf_fd: pointer to result FD that represents the dma-buf object. 1854 * 1855 * Create and export a dma-buf object for an existing memory allocation inside 1856 * the device memory, and return a FD which is associated with the dma-buf 1857 * object. 1858 * 1859 * Return: 0 on success, non-zero for failure. 1860 */ 1861static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr, 1862 u64 size, int flags, int *dmabuf_fd) 1863{ 1864 struct hl_dmabuf_priv *hl_dmabuf; 1865 struct hl_device *hdev = ctx->hdev; 1866 struct asic_fixed_properties *prop; 1867 u64 bar_address; 1868 int rc; 1869 1870 prop = &hdev->asic_prop; 1871 1872 if (!IS_ALIGNED(device_addr, PAGE_SIZE)) { 1873 dev_dbg(hdev->dev, 1874 "exported device memory address 0x%llx should be aligned to 0x%lx\n", 1875 device_addr, PAGE_SIZE); 1876 return -EINVAL; 1877 } 1878 1879 if (size < PAGE_SIZE) { 1880 dev_dbg(hdev->dev, 1881 "exported device memory size %llu should be equal to or greater than %lu\n", 1882 size, PAGE_SIZE); 1883 return -EINVAL; 1884 } 1885 1886 if (device_addr < prop->dram_user_base_address || 1887 device_addr + size > prop->dram_end_address || 1888 device_addr + size < device_addr) { 1889 dev_dbg(hdev->dev, 1890 "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n", 1891 device_addr, size); 1892 return -EINVAL; 1893 } 1894 1895 bar_address = hdev->dram_pci_bar_start + 1896 (device_addr - prop->dram_base_address); 1897 1898 if (bar_address + size > 1899 hdev->dram_pci_bar_start + prop->dram_pci_bar_size || 1900 bar_address + size < bar_address) { 1901 dev_dbg(hdev->dev, 1902 "DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n", 1903 device_addr, size); 1904 return -EINVAL; 1905 } 1906 1907 hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); 1908 if (!hl_dmabuf) 1909 return -ENOMEM; 1910 1911 hl_dmabuf->device_address = device_addr; 1912 1913 rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd); 1914 if (rc) 1915 goto err_free_dmabuf_wrapper; 1916 1917 return 0; 1918 1919err_free_dmabuf_wrapper: 1920 kfree(hl_dmabuf); 1921 return rc; 1922} 1923 1924/** 1925 * export_dmabuf_from_handle() - export a dma-buf object for the given memory 1926 * handle. 1927 * @ctx: pointer to the context structure. 1928 * @handle: device memory allocation handle. 1929 * @flags: DMA-BUF file/FD flags. 1930 * @dmabuf_fd: pointer to result FD that represents the dma-buf object. 1931 * 1932 * Create and export a dma-buf object for an existing memory allocation inside 1933 * the device memory, and return a FD which is associated with the dma-buf 1934 * object. 1935 * 1936 * Return: 0 on success, non-zero for failure. 1937 */ 1938static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags, 1939 int *dmabuf_fd) 1940{ 1941 struct hl_vm_phys_pg_pack *phys_pg_pack; 1942 struct hl_dmabuf_priv *hl_dmabuf; 1943 struct hl_device *hdev = ctx->hdev; 1944 struct asic_fixed_properties *prop; 1945 struct hl_vm *vm = &hdev->vm; 1946 u64 bar_address; 1947 int rc, i; 1948 1949 prop = &hdev->asic_prop; 1950 1951 if (upper_32_bits(handle)) { 1952 dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle); 1953 return -EINVAL; 1954 } 1955 1956 spin_lock(&vm->idr_lock); 1957 1958 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle); 1959 if (!phys_pg_pack) { 1960 spin_unlock(&vm->idr_lock); 1961 dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle); 1962 return -EINVAL; 1963 } 1964 1965 /* increment now to avoid freeing device memory while exporting */ 1966 phys_pg_pack->exporting_cnt++; 1967 1968 spin_unlock(&vm->idr_lock); 1969 1970 if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) { 1971 dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle); 1972 rc = -EINVAL; 1973 goto err_dec_exporting_cnt; 1974 } 1975 1976 for (i = 0 ; i < phys_pg_pack->npages ; i++) { 1977 1978 bar_address = hdev->dram_pci_bar_start + 1979 (phys_pg_pack->pages[i] - 1980 prop->dram_base_address); 1981 1982 if (bar_address + phys_pg_pack->page_size > 1983 hdev->dram_pci_bar_start + prop->dram_pci_bar_size || 1984 bar_address + phys_pg_pack->page_size < bar_address) { 1985 1986 dev_dbg(hdev->dev, 1987 "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n", 1988 phys_pg_pack->pages[i], 1989 phys_pg_pack->page_size); 1990 1991 rc = -EINVAL; 1992 goto err_dec_exporting_cnt; 1993 } 1994 } 1995 1996 hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); 1997 if (!hl_dmabuf) { 1998 rc = -ENOMEM; 1999 goto err_dec_exporting_cnt; 2000 } 2001 2002 hl_dmabuf->phys_pg_pack = phys_pg_pack; 2003 2004 rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size, 2005 flags, dmabuf_fd); 2006 if (rc) 2007 goto err_free_dmabuf_wrapper; 2008 2009 return 0; 2010 2011err_free_dmabuf_wrapper: 2012 kfree(hl_dmabuf); 2013 2014err_dec_exporting_cnt: 2015 spin_lock(&vm->idr_lock); 2016 phys_pg_pack->exporting_cnt--; 2017 spin_unlock(&vm->idr_lock); 2018 2019 return rc; 2020} 2021 2022static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) 2023{ 2024 struct hl_device *hdev = hpriv->hdev; 2025 u64 block_handle, device_addr = 0; 2026 struct hl_ctx *ctx = hpriv->ctx; 2027 u32 handle = 0, block_size; 2028 int rc; 2029 2030 switch (args->in.op) { 2031 case HL_MEM_OP_ALLOC: 2032 if (args->in.alloc.mem_size == 0) { 2033 dev_err(hdev->dev, "alloc size must be larger than 0\n"); 2034 rc = -EINVAL; 2035 goto out; 2036 } 2037 2038 /* Force contiguous as there are no real MMU 2039 * translations to overcome physical memory gaps 2040 */ 2041 args->in.flags |= HL_MEM_CONTIGUOUS; 2042 rc = alloc_device_memory(ctx, &args->in, &handle); 2043 2044 memset(args, 0, sizeof(*args)); 2045 args->out.handle = (__u64) handle; 2046 break; 2047 2048 case HL_MEM_OP_FREE: 2049 rc = free_device_memory(ctx, &args->in); 2050 break; 2051 2052 case HL_MEM_OP_MAP: 2053 if (args->in.flags & HL_MEM_USERPTR) { 2054 dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n"); 2055 rc = -EPERM; 2056 } else { 2057 rc = get_paddr_from_handle(ctx, &args->in, &device_addr); 2058 memset(args, 0, sizeof(*args)); 2059 args->out.device_virt_addr = device_addr; 2060 } 2061 2062 break; 2063 2064 case HL_MEM_OP_UNMAP: 2065 rc = 0; 2066 break; 2067 2068 case HL_MEM_OP_MAP_BLOCK: 2069 rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size); 2070 args->out.block_handle = block_handle; 2071 args->out.block_size = block_size; 2072 break; 2073 2074 case HL_MEM_OP_EXPORT_DMABUF_FD: 2075 dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n"); 2076 rc = -EPERM; 2077 break; 2078 2079 case HL_MEM_OP_TS_ALLOC: 2080 rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); 2081 break; 2082 default: 2083 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); 2084 rc = -EINVAL; 2085 break; 2086 } 2087 2088out: 2089 return rc; 2090} 2091 2092static void ts_buff_release(struct hl_mmap_mem_buf *buf) 2093{ 2094 struct hl_ts_buff *ts_buff = buf->private; 2095 2096 vfree(ts_buff->kernel_buff_address); 2097 vfree(ts_buff->user_buff_address); 2098 kfree(ts_buff); 2099} 2100 2101static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args) 2102{ 2103 struct hl_ts_buff *ts_buff = buf->private; 2104 2105 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; 2106 return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0); 2107} 2108 2109static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) 2110{ 2111 struct hl_ts_buff *ts_buff = NULL; 2112 u32 size, num_elements; 2113 void *p; 2114 2115 num_elements = *(u32 *)args; 2116 2117 ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL); 2118 if (!ts_buff) 2119 return -ENOMEM; 2120 2121 /* Allocate the user buffer */ 2122 size = num_elements * sizeof(u64); 2123 p = vmalloc_user(size); 2124 if (!p) 2125 goto free_mem; 2126 2127 ts_buff->user_buff_address = p; 2128 buf->mappable_size = size; 2129 2130 /* Allocate the internal kernel buffer */ 2131 size = num_elements * sizeof(struct hl_user_pending_interrupt); 2132 p = vmalloc(size); 2133 if (!p) 2134 goto free_user_buff; 2135 2136 ts_buff->kernel_buff_address = p; 2137 ts_buff->kernel_buff_size = size; 2138 2139 buf->private = ts_buff; 2140 2141 return 0; 2142 2143free_user_buff: 2144 vfree(ts_buff->user_buff_address); 2145free_mem: 2146 kfree(ts_buff); 2147 return -ENOMEM; 2148} 2149 2150static struct hl_mmap_mem_buf_behavior hl_ts_behavior = { 2151 .topic = "TS", 2152 .mem_id = HL_MMAP_TYPE_TS_BUFF, 2153 .mmap = hl_ts_mmap, 2154 .alloc = hl_ts_alloc_buf, 2155 .release = ts_buff_release, 2156}; 2157 2158/** 2159 * allocate_timestamps_buffers() - allocate timestamps buffers 2160 * This function will allocate ts buffer that will later on be mapped to the user 2161 * in order to be able to read the timestamp. 2162 * in additon it'll allocate an extra buffer for registration management. 2163 * since we cannot fail during registration for out-of-memory situation, so 2164 * we'll prepare a pool which will be used as user interrupt nodes and instead 2165 * of dynamically allocating nodes while registration we'll pick the node from 2166 * this pool. in addtion it'll add node to the mapping hash which will be used 2167 * to map user ts buffer to the internal kernel ts buffer. 2168 * @hpriv: pointer to the private data of the fd 2169 * @args: ioctl input 2170 * @handle: user timestamp buffer handle as an output 2171 */ 2172static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle) 2173{ 2174 struct hl_mem_mgr *mmg = &hpriv->mem_mgr; 2175 struct hl_mmap_mem_buf *buf; 2176 2177 if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) { 2178 dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n", 2179 args->num_of_elements, TS_MAX_ELEMENTS_NUM); 2180 return -EINVAL; 2181 } 2182 2183 buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements); 2184 if (!buf) 2185 return -ENOMEM; 2186 2187 *handle = buf->handle; 2188 2189 return 0; 2190} 2191 2192int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) 2193{ 2194 enum hl_device_status status; 2195 union hl_mem_args *args = data; 2196 struct hl_device *hdev = hpriv->hdev; 2197 struct hl_ctx *ctx = hpriv->ctx; 2198 u64 block_handle, device_addr = 0; 2199 u32 handle = 0, block_size; 2200 int rc, dmabuf_fd = -EBADF; 2201 2202 if (!hl_device_operational(hdev, &status)) { 2203 dev_warn_ratelimited(hdev->dev, 2204 "Device is %s. Can't execute MEMORY IOCTL\n", 2205 hdev->status[status]); 2206 return -EBUSY; 2207 } 2208 2209 if (!hdev->mmu_enable) 2210 return mem_ioctl_no_mmu(hpriv, args); 2211 2212 switch (args->in.op) { 2213 case HL_MEM_OP_ALLOC: 2214 if (args->in.alloc.mem_size == 0) { 2215 dev_err(hdev->dev, 2216 "alloc size must be larger than 0\n"); 2217 rc = -EINVAL; 2218 goto out; 2219 } 2220 2221 /* If DRAM does not support virtual memory the driver won't 2222 * handle the allocation/freeing of that memory. However, for 2223 * system administration/monitoring purposes, the driver will 2224 * keep track of the amount of DRAM memory that is allocated 2225 * and freed by the user. Because this code totally relies on 2226 * the user's input, the driver can't ensure the validity 2227 * of this accounting. 2228 */ 2229 if (!hdev->asic_prop.dram_supports_virtual_memory) { 2230 atomic64_add(args->in.alloc.mem_size, 2231 &ctx->dram_phys_mem); 2232 atomic64_add(args->in.alloc.mem_size, 2233 &hdev->dram_used_mem); 2234 2235 dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); 2236 rc = 0; 2237 2238 memset(args, 0, sizeof(*args)); 2239 args->out.handle = 0; 2240 goto out; 2241 } 2242 2243 rc = alloc_device_memory(ctx, &args->in, &handle); 2244 2245 memset(args, 0, sizeof(*args)); 2246 args->out.handle = (__u64) handle; 2247 break; 2248 2249 case HL_MEM_OP_FREE: 2250 /* If DRAM does not support virtual memory the driver won't 2251 * handle the allocation/freeing of that memory. However, for 2252 * system administration/monitoring purposes, the driver will 2253 * keep track of the amount of DRAM memory that is allocated 2254 * and freed by the user. Because this code totally relies on 2255 * the user's input, the driver can't ensure the validity 2256 * of this accounting. 2257 */ 2258 if (!hdev->asic_prop.dram_supports_virtual_memory) { 2259 atomic64_sub(args->in.alloc.mem_size, 2260 &ctx->dram_phys_mem); 2261 atomic64_sub(args->in.alloc.mem_size, 2262 &hdev->dram_used_mem); 2263 2264 dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); 2265 rc = 0; 2266 2267 goto out; 2268 } 2269 2270 rc = free_device_memory(ctx, &args->in); 2271 break; 2272 2273 case HL_MEM_OP_MAP: 2274 rc = map_device_va(ctx, &args->in, &device_addr); 2275 2276 memset(args, 0, sizeof(*args)); 2277 args->out.device_virt_addr = device_addr; 2278 break; 2279 2280 case HL_MEM_OP_UNMAP: 2281 rc = unmap_device_va(ctx, &args->in, false); 2282 break; 2283 2284 case HL_MEM_OP_MAP_BLOCK: 2285 rc = map_block(hdev, args->in.map_block.block_addr, 2286 &block_handle, &block_size); 2287 args->out.block_handle = block_handle; 2288 args->out.block_size = block_size; 2289 break; 2290 2291 case HL_MEM_OP_EXPORT_DMABUF_FD: 2292 if (hdev->asic_prop.dram_supports_virtual_memory) 2293 rc = export_dmabuf_from_handle(ctx, 2294 args->in.export_dmabuf_fd.handle, 2295 args->in.flags, 2296 &dmabuf_fd); 2297 else 2298 rc = export_dmabuf_from_addr(ctx, 2299 args->in.export_dmabuf_fd.handle, 2300 args->in.export_dmabuf_fd.mem_size, 2301 args->in.flags, 2302 &dmabuf_fd); 2303 memset(args, 0, sizeof(*args)); 2304 args->out.fd = dmabuf_fd; 2305 break; 2306 2307 case HL_MEM_OP_TS_ALLOC: 2308 rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle); 2309 break; 2310 default: 2311 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); 2312 rc = -EINVAL; 2313 break; 2314 } 2315 2316out: 2317 return rc; 2318} 2319 2320static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, 2321 u32 npages, u64 start, u32 offset, 2322 struct hl_userptr *userptr) 2323{ 2324 int rc; 2325 2326 if (!access_ok((void __user *) (uintptr_t) addr, size)) { 2327 dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); 2328 return -EFAULT; 2329 } 2330 2331 userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages), 2332 GFP_KERNEL); 2333 if (!userptr->pages) 2334 return -ENOMEM; 2335 2336 rc = pin_user_pages_fast(start, npages, 2337 FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, 2338 userptr->pages); 2339 2340 if (rc != npages) { 2341 dev_err(hdev->dev, 2342 "Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n", 2343 rc, addr, size, npages); 2344 if (rc < 0) 2345 goto destroy_pages; 2346 npages = rc; 2347 rc = -EFAULT; 2348 goto put_pages; 2349 } 2350 userptr->npages = npages; 2351 2352 rc = sg_alloc_table_from_pages(userptr->sgt, 2353 userptr->pages, 2354 npages, offset, size, GFP_KERNEL); 2355 if (rc < 0) { 2356 dev_err(hdev->dev, "failed to create SG table from pages\n"); 2357 goto put_pages; 2358 } 2359 2360 return 0; 2361 2362put_pages: 2363 unpin_user_pages(userptr->pages, npages); 2364destroy_pages: 2365 kvfree(userptr->pages); 2366 return rc; 2367} 2368 2369/** 2370 * hl_pin_host_memory() - pins a chunk of host memory. 2371 * @hdev: pointer to the habanalabs device structure. 2372 * @addr: the host virtual address of the memory area. 2373 * @size: the size of the memory area. 2374 * @userptr: pointer to hl_userptr structure. 2375 * 2376 * This function does the following: 2377 * - Pins the physical pages. 2378 * - Create an SG list from those pages. 2379 */ 2380int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, 2381 struct hl_userptr *userptr) 2382{ 2383 u64 start, end; 2384 u32 npages, offset; 2385 int rc; 2386 2387 if (!size) { 2388 dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); 2389 return -EINVAL; 2390 } 2391 2392 /* 2393 * If the combination of the address and size requested for this memory 2394 * region causes an integer overflow, return error. 2395 */ 2396 if (((addr + size) < addr) || 2397 PAGE_ALIGN(addr + size) < (addr + size)) { 2398 dev_err(hdev->dev, 2399 "user pointer 0x%llx + %llu causes integer overflow\n", 2400 addr, size); 2401 return -EINVAL; 2402 } 2403 2404 userptr->pid = current->pid; 2405 userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); 2406 if (!userptr->sgt) 2407 return -ENOMEM; 2408 2409 start = addr & PAGE_MASK; 2410 offset = addr & ~PAGE_MASK; 2411 end = PAGE_ALIGN(addr + size); 2412 npages = (end - start) >> PAGE_SHIFT; 2413 2414 userptr->size = size; 2415 userptr->addr = addr; 2416 userptr->dma_mapped = false; 2417 INIT_LIST_HEAD(&userptr->job_node); 2418 2419 rc = get_user_memory(hdev, addr, size, npages, start, offset, 2420 userptr); 2421 if (rc) { 2422 dev_err(hdev->dev, 2423 "failed to get user memory for address 0x%llx\n", 2424 addr); 2425 goto free_sgt; 2426 } 2427 2428 hl_debugfs_add_userptr(hdev, userptr); 2429 2430 return 0; 2431 2432free_sgt: 2433 kfree(userptr->sgt); 2434 return rc; 2435} 2436 2437/* 2438 * hl_unpin_host_memory - unpins a chunk of host memory. 2439 * @hdev: pointer to the habanalabs device structure 2440 * @userptr: pointer to hl_userptr structure 2441 * 2442 * This function does the following: 2443 * - Unpins the physical pages related to the host memory 2444 * - Free the SG list 2445 */ 2446void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) 2447{ 2448 hl_debugfs_remove_userptr(hdev, userptr); 2449 2450 if (userptr->dma_mapped) 2451 hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); 2452 2453 unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); 2454 kvfree(userptr->pages); 2455 2456 list_del(&userptr->job_node); 2457 2458 sg_free_table(userptr->sgt); 2459 kfree(userptr->sgt); 2460} 2461 2462/** 2463 * hl_userptr_delete_list() - clear userptr list. 2464 * @hdev: pointer to the habanalabs device structure. 2465 * @userptr_list: pointer to the list to clear. 2466 * 2467 * This function does the following: 2468 * - Iterates over the list and unpins the host memory and frees the userptr 2469 * structure. 2470 */ 2471void hl_userptr_delete_list(struct hl_device *hdev, 2472 struct list_head *userptr_list) 2473{ 2474 struct hl_userptr *userptr, *tmp; 2475 2476 list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { 2477 hl_unpin_host_memory(hdev, userptr); 2478 kfree(userptr); 2479 } 2480 2481 INIT_LIST_HEAD(userptr_list); 2482} 2483 2484/** 2485 * hl_userptr_is_pinned() - returns whether the given userptr is pinned. 2486 * @hdev: pointer to the habanalabs device structure. 2487 * @addr: user address to check. 2488 * @size: user block size to check. 2489 * @userptr_list: pointer to the list to clear. 2490 * @userptr: pointer to userptr to check. 2491 * 2492 * This function does the following: 2493 * - Iterates over the list and checks if the given userptr is in it, means is 2494 * pinned. If so, returns true, otherwise returns false. 2495 */ 2496bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, 2497 u32 size, struct list_head *userptr_list, 2498 struct hl_userptr **userptr) 2499{ 2500 list_for_each_entry((*userptr), userptr_list, job_node) { 2501 if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) 2502 return true; 2503 } 2504 2505 return false; 2506} 2507 2508/** 2509 * va_range_init() - initialize virtual addresses range. 2510 * @hdev: pointer to the habanalabs device structure. 2511 * @va_ranges: pointer to va_ranges array. 2512 * @start: range start address. 2513 * @end: range end address. 2514 * @page_size: page size for this va_range. 2515 * 2516 * This function does the following: 2517 * - Initializes the virtual addresses list of the given range with the given 2518 * addresses. 2519 */ 2520static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, 2521 u64 start, u64 end, u32 page_size) 2522{ 2523 int rc; 2524 2525 INIT_LIST_HEAD(&va_range->list); 2526 2527 /* 2528 * PAGE_SIZE alignment 2529 * it is the callers responsibility to align the addresses if the 2530 * page size is not a power of 2 2531 */ 2532 2533 if (is_power_of_2(page_size)) { 2534 if (start & (PAGE_SIZE - 1)) { 2535 start &= PAGE_MASK; 2536 start += PAGE_SIZE; 2537 } 2538 2539 /* 2540 * The end of the range is inclusive, hence we need to align it 2541 * to the end of the last full page in the range. For example if 2542 * end = 0x3ff5 with page size 0x1000, we need to align it to 2543 * 0x2fff. The remainig 0xff5 bytes do not form a full page. 2544 */ 2545 if ((end + 1) & (PAGE_SIZE - 1)) 2546 end = ((end + 1) & PAGE_MASK) - 1; 2547 } 2548 2549 if (start >= end) { 2550 dev_err(hdev->dev, "too small vm range for va list\n"); 2551 return -EFAULT; 2552 } 2553 2554 rc = add_va_block(hdev, va_range, start, end); 2555 2556 if (rc) { 2557 dev_err(hdev->dev, "Failed to init host va list\n"); 2558 return rc; 2559 } 2560 2561 va_range->start_addr = start; 2562 va_range->end_addr = end; 2563 va_range->page_size = page_size; 2564 2565 return 0; 2566} 2567 2568/** 2569 * va_range_fini() - clear a virtual addresses range. 2570 * @hdev: pointer to the habanalabs structure. 2571 * @va_range: pointer to virtual addresses range. 2572 * 2573 * This function does the following: 2574 * - Frees the virtual addresses block list and its lock. 2575 */ 2576static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) 2577{ 2578 mutex_lock(&va_range->lock); 2579 clear_va_list_locked(hdev, &va_range->list); 2580 mutex_unlock(&va_range->lock); 2581 2582 mutex_destroy(&va_range->lock); 2583 kfree(va_range); 2584} 2585 2586/** 2587 * vm_ctx_init_with_ranges() - initialize virtual memory for context. 2588 * @ctx: pointer to the habanalabs context structure. 2589 * @host_range_start: host virtual addresses range start. 2590 * @host_range_end: host virtual addresses range end. 2591 * @host_page_size: host page size. 2592 * @host_huge_range_start: host virtual addresses range start for memory 2593 * allocated with huge pages. 2594 * @host_huge_range_end: host virtual addresses range end for memory allocated 2595 * with huge pages. 2596 * @host_huge_page_size: host huge page size. 2597 * @dram_range_start: dram virtual addresses range start. 2598 * @dram_range_end: dram virtual addresses range end. 2599 * @dram_page_size: dram page size. 2600 * 2601 * This function initializes the following: 2602 * - MMU for context. 2603 * - Virtual address to area descriptor hashtable. 2604 * - Virtual block list of available virtual memory. 2605 */ 2606static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, 2607 u64 host_range_start, 2608 u64 host_range_end, 2609 u32 host_page_size, 2610 u64 host_huge_range_start, 2611 u64 host_huge_range_end, 2612 u32 host_huge_page_size, 2613 u64 dram_range_start, 2614 u64 dram_range_end, 2615 u32 dram_page_size) 2616{ 2617 struct hl_device *hdev = ctx->hdev; 2618 int i, rc; 2619 2620 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) { 2621 ctx->va_range[i] = 2622 kzalloc(sizeof(struct hl_va_range), GFP_KERNEL); 2623 if (!ctx->va_range[i]) { 2624 rc = -ENOMEM; 2625 goto free_va_range; 2626 } 2627 } 2628 2629 rc = hl_mmu_ctx_init(ctx); 2630 if (rc) { 2631 dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); 2632 goto free_va_range; 2633 } 2634 2635 mutex_init(&ctx->mem_hash_lock); 2636 hash_init(ctx->mem_hash); 2637 2638 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); 2639 2640 rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST], 2641 host_range_start, host_range_end, host_page_size); 2642 if (rc) { 2643 dev_err(hdev->dev, "failed to init host vm range\n"); 2644 goto mmu_ctx_fini; 2645 } 2646 2647 if (hdev->pmmu_huge_range) { 2648 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); 2649 2650 rc = va_range_init(hdev, 2651 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE], 2652 host_huge_range_start, host_huge_range_end, 2653 host_huge_page_size); 2654 if (rc) { 2655 dev_err(hdev->dev, 2656 "failed to init host huge vm range\n"); 2657 goto clear_host_va_range; 2658 } 2659 } else { 2660 kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); 2661 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] = 2662 ctx->va_range[HL_VA_RANGE_TYPE_HOST]; 2663 } 2664 2665 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); 2666 2667 rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM], 2668 dram_range_start, dram_range_end, dram_page_size); 2669 if (rc) { 2670 dev_err(hdev->dev, "failed to init dram vm range\n"); 2671 goto clear_host_huge_va_range; 2672 } 2673 2674 hl_debugfs_add_ctx_mem_hash(hdev, ctx); 2675 2676 return 0; 2677 2678clear_host_huge_va_range: 2679 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); 2680 2681 if (hdev->pmmu_huge_range) { 2682 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); 2683 clear_va_list_locked(hdev, 2684 &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list); 2685 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); 2686 } 2687clear_host_va_range: 2688 if (hdev->pmmu_huge_range) 2689 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); 2690 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); 2691 clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list); 2692 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); 2693mmu_ctx_fini: 2694 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); 2695 mutex_destroy(&ctx->mem_hash_lock); 2696 hl_mmu_ctx_fini(ctx); 2697free_va_range: 2698 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) 2699 kfree(ctx->va_range[i]); 2700 2701 return rc; 2702} 2703 2704int hl_vm_ctx_init(struct hl_ctx *ctx) 2705{ 2706 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; 2707 u64 host_range_start, host_range_end, host_huge_range_start, 2708 host_huge_range_end, dram_range_start, dram_range_end; 2709 u32 host_page_size, host_huge_page_size, dram_page_size; 2710 2711 atomic64_set(&ctx->dram_phys_mem, 0); 2712 2713 /* 2714 * - If MMU is enabled, init the ranges as usual. 2715 * - If MMU is disabled, in case of host mapping, the returned address 2716 * is the given one. 2717 * In case of DRAM mapping, the returned address is the physical 2718 * address of the memory related to the given handle. 2719 */ 2720 if (!ctx->hdev->mmu_enable) 2721 return 0; 2722 2723 dram_range_start = prop->dmmu.start_addr; 2724 dram_range_end = prop->dmmu.end_addr - 1; 2725 dram_page_size = prop->dram_page_size ? 2726 prop->dram_page_size : prop->dmmu.page_size; 2727 host_range_start = prop->pmmu.start_addr; 2728 host_range_end = prop->pmmu.end_addr - 1; 2729 host_page_size = prop->pmmu.page_size; 2730 host_huge_range_start = prop->pmmu_huge.start_addr; 2731 host_huge_range_end = prop->pmmu_huge.end_addr - 1; 2732 host_huge_page_size = prop->pmmu_huge.page_size; 2733 2734 return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, 2735 host_page_size, host_huge_range_start, 2736 host_huge_range_end, host_huge_page_size, 2737 dram_range_start, dram_range_end, dram_page_size); 2738} 2739 2740/** 2741 * hl_vm_ctx_fini() - virtual memory teardown of context. 2742 * @ctx: pointer to the habanalabs context structure. 2743 * 2744 * This function perform teardown the following: 2745 * - Virtual block list of available virtual memory. 2746 * - Virtual address to area descriptor hashtable. 2747 * - MMU for context. 2748 * 2749 * In addition this function does the following: 2750 * - Unmaps the existing hashtable nodes if the hashtable is not empty. The 2751 * hashtable should be empty as no valid mappings should exist at this 2752 * point. 2753 * - Frees any existing physical page list from the idr which relates to the 2754 * current context asid. 2755 * - This function checks the virtual block list for correctness. At this point 2756 * the list should contain one element which describes the whole virtual 2757 * memory range of the context. Otherwise, a warning is printed. 2758 */ 2759void hl_vm_ctx_fini(struct hl_ctx *ctx) 2760{ 2761 struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node; 2762 struct hl_device *hdev = ctx->hdev; 2763 struct hl_vm_hash_node *hnode; 2764 struct hl_vm *vm = &hdev->vm; 2765 struct hlist_node *tmp_node; 2766 struct list_head free_list; 2767 struct hl_mem_in args; 2768 int i; 2769 2770 if (!hdev->mmu_enable) 2771 return; 2772 2773 hl_debugfs_remove_ctx_mem_hash(hdev, ctx); 2774 2775 /* 2776 * Clearly something went wrong on hard reset so no point in printing 2777 * another side effect error 2778 */ 2779 if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash)) 2780 dev_dbg(hdev->dev, 2781 "user released device without removing its memory mappings\n"); 2782 2783 hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { 2784 dev_dbg(hdev->dev, 2785 "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", 2786 hnode->vaddr, ctx->asid); 2787 args.unmap.device_virt_addr = hnode->vaddr; 2788 unmap_device_va(ctx, &args, true); 2789 } 2790 2791 mutex_lock(&ctx->mmu_lock); 2792 2793 /* invalidate the cache once after the unmapping loop */ 2794 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 2795 hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK); 2796 2797 mutex_unlock(&ctx->mmu_lock); 2798 2799 INIT_LIST_HEAD(&free_list); 2800 2801 spin_lock(&vm->idr_lock); 2802 idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) 2803 if (phys_pg_list->asid == ctx->asid) { 2804 dev_dbg(hdev->dev, 2805 "page list 0x%px of asid %d is still alive\n", 2806 phys_pg_list, ctx->asid); 2807 2808 atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); 2809 idr_remove(&vm->phys_pg_pack_handles, i); 2810 list_add(&phys_pg_list->node, &free_list); 2811 } 2812 spin_unlock(&vm->idr_lock); 2813 2814 list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node) 2815 free_phys_pg_pack(hdev, phys_pg_list); 2816 2817 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); 2818 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]); 2819 2820 if (hdev->pmmu_huge_range) 2821 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); 2822 2823 mutex_destroy(&ctx->mem_hash_lock); 2824 hl_mmu_ctx_fini(ctx); 2825 2826 /* In this case we need to clear the global accounting of DRAM usage 2827 * because the user notifies us on allocations. If the user is no more, 2828 * all DRAM is available 2829 */ 2830 if (ctx->asid != HL_KERNEL_ASID_ID && 2831 !hdev->asic_prop.dram_supports_virtual_memory) 2832 atomic64_set(&hdev->dram_used_mem, 0); 2833} 2834 2835/** 2836 * hl_vm_init() - initialize virtual memory module. 2837 * @hdev: pointer to the habanalabs device structure. 2838 * 2839 * This function initializes the following: 2840 * - MMU module. 2841 * - DRAM physical pages pool of 2MB. 2842 * - Idr for device memory allocation handles. 2843 */ 2844int hl_vm_init(struct hl_device *hdev) 2845{ 2846 struct asic_fixed_properties *prop = &hdev->asic_prop; 2847 struct hl_vm *vm = &hdev->vm; 2848 int rc; 2849 2850 if (is_power_of_2(prop->dram_page_size)) 2851 vm->dram_pg_pool = 2852 gen_pool_create(__ffs(prop->dram_page_size), -1); 2853 else 2854 vm->dram_pg_pool = 2855 gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1); 2856 2857 if (!vm->dram_pg_pool) { 2858 dev_err(hdev->dev, "Failed to create dram page pool\n"); 2859 return -ENOMEM; 2860 } 2861 2862 kref_init(&vm->dram_pg_pool_refcount); 2863 2864 rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, 2865 prop->dram_end_address - prop->dram_user_base_address, 2866 -1); 2867 2868 if (rc) { 2869 dev_err(hdev->dev, 2870 "Failed to add memory to dram page pool %d\n", rc); 2871 goto pool_add_err; 2872 } 2873 2874 spin_lock_init(&vm->idr_lock); 2875 idr_init(&vm->phys_pg_pack_handles); 2876 2877 atomic64_set(&hdev->dram_used_mem, 0); 2878 2879 vm->init_done = true; 2880 2881 return 0; 2882 2883pool_add_err: 2884 gen_pool_destroy(vm->dram_pg_pool); 2885 2886 return rc; 2887} 2888 2889/** 2890 * hl_vm_fini() - virtual memory module teardown. 2891 * @hdev: pointer to the habanalabs device structure. 2892 * 2893 * This function perform teardown to the following: 2894 * - Idr for device memory allocation handles. 2895 * - DRAM physical pages pool of 2MB. 2896 * - MMU module. 2897 */ 2898void hl_vm_fini(struct hl_device *hdev) 2899{ 2900 struct hl_vm *vm = &hdev->vm; 2901 2902 if (!vm->init_done) 2903 return; 2904 2905 /* 2906 * At this point all the contexts should be freed and hence no DRAM 2907 * memory should be in use. Hence the DRAM pool should be freed here. 2908 */ 2909 if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) 2910 dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", 2911 __func__); 2912 2913 vm->init_done = false; 2914} 2915 2916/** 2917 * hl_hw_block_mem_init() - HW block memory initialization. 2918 * @ctx: pointer to the habanalabs context structure. 2919 * 2920 * This function initializes the HW block virtual mapped addresses list and 2921 * it's lock. 2922 */ 2923void hl_hw_block_mem_init(struct hl_ctx *ctx) 2924{ 2925 mutex_init(&ctx->hw_block_list_lock); 2926 INIT_LIST_HEAD(&ctx->hw_block_mem_list); 2927} 2928 2929/** 2930 * hl_hw_block_mem_fini() - HW block memory teardown. 2931 * @ctx: pointer to the habanalabs context structure. 2932 * 2933 * This function clears the HW block virtual mapped addresses list and destroys 2934 * it's lock. 2935 */ 2936void hl_hw_block_mem_fini(struct hl_ctx *ctx) 2937{ 2938 struct hl_vm_hw_block_list_node *lnode, *tmp; 2939 2940 if (!list_empty(&ctx->hw_block_mem_list)) 2941 dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n"); 2942 2943 list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) { 2944 list_del(&lnode->node); 2945 kfree(lnode); 2946 } 2947 2948 mutex_destroy(&ctx->hw_block_list_lock); 2949}