command_buffer.c (13489B)
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include <uapi/misc/habanalabs.h> 9#include "habanalabs.h" 10 11#include <linux/mm.h> 12#include <linux/slab.h> 13#include <linux/uaccess.h> 14 15static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) 16{ 17 struct hl_device *hdev = ctx->hdev; 18 struct asic_fixed_properties *prop = &hdev->asic_prop; 19 struct hl_vm_va_block *va_block, *tmp; 20 dma_addr_t bus_addr; 21 u64 virt_addr; 22 u32 page_size = prop->pmmu.page_size; 23 s32 offset; 24 int rc; 25 26 if (!hdev->supports_cb_mapping) { 27 dev_err_ratelimited(hdev->dev, 28 "Cannot map CB because no VA range is allocated for CB mapping\n"); 29 return -EINVAL; 30 } 31 32 if (!hdev->mmu_enable) { 33 dev_err_ratelimited(hdev->dev, 34 "Cannot map CB because MMU is disabled\n"); 35 return -EINVAL; 36 } 37 38 INIT_LIST_HEAD(&cb->va_block_list); 39 40 for (bus_addr = cb->bus_address; 41 bus_addr < cb->bus_address + cb->size; 42 bus_addr += page_size) { 43 44 virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); 45 if (!virt_addr) { 46 dev_err(hdev->dev, 47 "Failed to allocate device virtual address for CB\n"); 48 rc = -ENOMEM; 49 goto err_va_pool_free; 50 } 51 52 va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); 53 if (!va_block) { 54 rc = -ENOMEM; 55 gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); 56 goto err_va_pool_free; 57 } 58 59 va_block->start = virt_addr; 60 va_block->end = virt_addr + page_size - 1; 61 va_block->size = page_size; 62 list_add_tail(&va_block->node, &cb->va_block_list); 63 } 64 65 mutex_lock(&ctx->mmu_lock); 66 67 bus_addr = cb->bus_address; 68 offset = 0; 69 list_for_each_entry(va_block, &cb->va_block_list, node) { 70 rc = hl_mmu_map_page(ctx, va_block->start, bus_addr, 71 va_block->size, list_is_last(&va_block->node, 72 &cb->va_block_list)); 73 if (rc) { 74 dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", 75 va_block->start); 76 goto err_va_umap; 77 } 78 79 bus_addr += va_block->size; 80 offset += va_block->size; 81 } 82 83 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); 84 85 mutex_unlock(&ctx->mmu_lock); 86 87 cb->is_mmu_mapped = true; 88 89 return rc; 90 91err_va_umap: 92 list_for_each_entry(va_block, &cb->va_block_list, node) { 93 if (offset <= 0) 94 break; 95 hl_mmu_unmap_page(ctx, va_block->start, va_block->size, 96 offset <= va_block->size); 97 offset -= va_block->size; 98 } 99 100 rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 101 102 mutex_unlock(&ctx->mmu_lock); 103 104err_va_pool_free: 105 list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { 106 gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); 107 list_del(&va_block->node); 108 kfree(va_block); 109 } 110 111 return rc; 112} 113 114static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) 115{ 116 struct hl_device *hdev = ctx->hdev; 117 struct hl_vm_va_block *va_block, *tmp; 118 119 mutex_lock(&ctx->mmu_lock); 120 121 list_for_each_entry(va_block, &cb->va_block_list, node) 122 if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size, 123 list_is_last(&va_block->node, 124 &cb->va_block_list))) 125 dev_warn_ratelimited(hdev->dev, 126 "Failed to unmap CB's va 0x%llx\n", 127 va_block->start); 128 129 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 130 131 mutex_unlock(&ctx->mmu_lock); 132 133 list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { 134 gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); 135 list_del(&va_block->node); 136 kfree(va_block); 137 } 138} 139 140static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) 141{ 142 if (cb->is_internal) 143 gen_pool_free(hdev->internal_cb_pool, 144 (uintptr_t)cb->kernel_address, cb->size); 145 else 146 hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, 147 cb->kernel_address, cb->bus_address); 148 149 kfree(cb); 150} 151 152static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) 153{ 154 if (cb->is_pool) { 155 spin_lock(&hdev->cb_pool_lock); 156 list_add(&cb->pool_list, &hdev->cb_pool); 157 spin_unlock(&hdev->cb_pool_lock); 158 } else { 159 cb_fini(hdev, cb); 160 } 161} 162 163static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, 164 int ctx_id, bool internal_cb) 165{ 166 struct hl_cb *cb = NULL; 167 u32 cb_offset; 168 void *p; 169 170 /* 171 * We use of GFP_ATOMIC here because this function can be called from 172 * the latency-sensitive code path for command submission. Due to H/W 173 * limitations in some of the ASICs, the kernel must copy the user CB 174 * that is designated for an external queue and actually enqueue 175 * the kernel's copy. Hence, we must never sleep in this code section 176 * and must use GFP_ATOMIC for all memory allocations. 177 */ 178 if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) 179 cb = kzalloc(sizeof(*cb), GFP_ATOMIC); 180 181 if (!cb) 182 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 183 184 if (!cb) 185 return NULL; 186 187 if (internal_cb) { 188 p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); 189 if (!p) { 190 kfree(cb); 191 return NULL; 192 } 193 194 cb_offset = p - hdev->internal_cb_pool_virt_addr; 195 cb->is_internal = true; 196 cb->bus_address = hdev->internal_cb_va_base + cb_offset; 197 } else if (ctx_id == HL_KERNEL_ASID_ID) { 198 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, 199 &cb->bus_address, GFP_ATOMIC); 200 if (!p) 201 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, 202 cb_size, &cb->bus_address, GFP_KERNEL); 203 } else { 204 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, 205 &cb->bus_address, 206 GFP_USER | __GFP_ZERO); 207 } 208 209 if (!p) { 210 dev_err(hdev->dev, 211 "failed to allocate %d of dma memory for CB\n", 212 cb_size); 213 kfree(cb); 214 return NULL; 215 } 216 217 cb->kernel_address = p; 218 cb->size = cb_size; 219 220 return cb; 221} 222 223struct hl_cb_mmap_mem_alloc_args { 224 struct hl_device *hdev; 225 struct hl_ctx *ctx; 226 u32 cb_size; 227 bool internal_cb; 228 bool map_cb; 229}; 230 231static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf) 232{ 233 struct hl_cb *cb = buf->private; 234 235 hl_debugfs_remove_cb(cb); 236 237 if (cb->is_mmu_mapped) 238 cb_unmap_mem(cb->ctx, cb); 239 240 hl_ctx_put(cb->ctx); 241 242 cb_do_release(cb->hdev, cb); 243} 244 245static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args) 246{ 247 struct hl_cb_mmap_mem_alloc_args *cb_args = args; 248 struct hl_cb *cb; 249 int rc, ctx_id = cb_args->ctx->asid; 250 bool alloc_new_cb = true; 251 252 if (!cb_args->internal_cb) { 253 /* Minimum allocation must be PAGE SIZE */ 254 if (cb_args->cb_size < PAGE_SIZE) 255 cb_args->cb_size = PAGE_SIZE; 256 257 if (ctx_id == HL_KERNEL_ASID_ID && 258 cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) { 259 260 spin_lock(&cb_args->hdev->cb_pool_lock); 261 if (!list_empty(&cb_args->hdev->cb_pool)) { 262 cb = list_first_entry(&cb_args->hdev->cb_pool, 263 typeof(*cb), pool_list); 264 list_del(&cb->pool_list); 265 spin_unlock(&cb_args->hdev->cb_pool_lock); 266 alloc_new_cb = false; 267 } else { 268 spin_unlock(&cb_args->hdev->cb_pool_lock); 269 dev_dbg(cb_args->hdev->dev, "CB pool is empty\n"); 270 } 271 } 272 } 273 274 if (alloc_new_cb) { 275 cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb); 276 if (!cb) 277 return -ENOMEM; 278 } 279 280 cb->hdev = cb_args->hdev; 281 cb->ctx = cb_args->ctx; 282 cb->buf = buf; 283 cb->buf->mappable_size = cb->size; 284 cb->buf->private = cb; 285 286 hl_ctx_get(cb->ctx); 287 288 if (cb_args->map_cb) { 289 if (ctx_id == HL_KERNEL_ASID_ID) { 290 dev_err(cb_args->hdev->dev, 291 "CB mapping is not supported for kernel context\n"); 292 rc = -EINVAL; 293 goto release_cb; 294 } 295 296 rc = cb_map_mem(cb_args->ctx, cb); 297 if (rc) 298 goto release_cb; 299 } 300 301 hl_debugfs_add_cb(cb); 302 303 return 0; 304 305release_cb: 306 hl_ctx_put(cb->ctx); 307 cb_do_release(cb_args->hdev, cb); 308 309 return rc; 310} 311 312static int hl_cb_mmap(struct hl_mmap_mem_buf *buf, 313 struct vm_area_struct *vma, void *args) 314{ 315 struct hl_cb *cb = buf->private; 316 317 return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address, 318 cb->bus_address, cb->size); 319} 320 321static struct hl_mmap_mem_buf_behavior cb_behavior = { 322 .topic = "CB", 323 .mem_id = HL_MMAP_TYPE_CB, 324 .alloc = hl_cb_mmap_mem_alloc, 325 .release = hl_cb_mmap_mem_release, 326 .mmap = hl_cb_mmap, 327}; 328 329int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, 330 struct hl_ctx *ctx, u32 cb_size, bool internal_cb, 331 bool map_cb, u64 *handle) 332{ 333 struct hl_cb_mmap_mem_alloc_args args = { 334 .hdev = hdev, 335 .ctx = ctx, 336 .cb_size = cb_size, 337 .internal_cb = internal_cb, 338 .map_cb = map_cb, 339 }; 340 struct hl_mmap_mem_buf *buf; 341 int ctx_id = ctx->asid; 342 343 if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) { 344 dev_warn_ratelimited(hdev->dev, 345 "Device is disabled or in reset. Can't create new CBs\n"); 346 return -EBUSY; 347 } 348 349 if (cb_size > SZ_2M) { 350 dev_err(hdev->dev, "CB size %d must be less than %d\n", 351 cb_size, SZ_2M); 352 return -EINVAL; 353 } 354 355 buf = hl_mmap_mem_buf_alloc( 356 mmg, &cb_behavior, 357 ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args); 358 if (!buf) 359 return -ENOMEM; 360 361 *handle = buf->handle; 362 363 return 0; 364} 365 366int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) 367{ 368 int rc; 369 370 rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle); 371 if (rc < 0) 372 return rc; /* Invalid handle */ 373 374 if (rc == 0) 375 dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle); 376 377 return 0; 378} 379 380static int hl_cb_info(struct hl_mem_mgr *mmg, 381 u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) 382{ 383 struct hl_vm_va_block *va_block; 384 struct hl_cb *cb; 385 int rc = 0; 386 387 cb = hl_cb_get(mmg, handle); 388 if (!cb) { 389 dev_err(mmg->dev, 390 "CB info failed, no match to handle 0x%llx\n", handle); 391 return -EINVAL; 392 } 393 394 if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { 395 va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); 396 if (va_block) { 397 *device_va = va_block->start; 398 } else { 399 dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); 400 rc = -EINVAL; 401 goto out; 402 } 403 } else { 404 *usage_cnt = atomic_read(&cb->cs_cnt); 405 } 406 407out: 408 hl_cb_put(cb); 409 return rc; 410} 411 412int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) 413{ 414 union hl_cb_args *args = data; 415 struct hl_device *hdev = hpriv->hdev; 416 u64 handle = 0, device_va = 0; 417 enum hl_device_status status; 418 u32 usage_cnt = 0; 419 int rc; 420 421 if (!hl_device_operational(hdev, &status)) { 422 dev_warn_ratelimited(hdev->dev, 423 "Device is %s. Can't execute CB IOCTL\n", 424 hdev->status[status]); 425 return -EBUSY; 426 } 427 428 switch (args->in.op) { 429 case HL_CB_OP_CREATE: 430 if (args->in.cb_size > HL_MAX_CB_SIZE) { 431 dev_err(hdev->dev, 432 "User requested CB size %d must be less than %d\n", 433 args->in.cb_size, HL_MAX_CB_SIZE); 434 rc = -EINVAL; 435 } else { 436 rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx, 437 args->in.cb_size, false, 438 !!(args->in.flags & HL_CB_FLAGS_MAP), 439 &handle); 440 } 441 442 memset(args, 0, sizeof(*args)); 443 args->out.cb_handle = handle; 444 break; 445 446 case HL_CB_OP_DESTROY: 447 rc = hl_cb_destroy(&hpriv->mem_mgr, 448 args->in.cb_handle); 449 break; 450 451 case HL_CB_OP_INFO: 452 rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle, 453 args->in.flags, 454 &usage_cnt, 455 &device_va); 456 if (rc) 457 break; 458 459 memset(&args->out, 0, sizeof(args->out)); 460 461 if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA) 462 args->out.device_va = device_va; 463 else 464 args->out.usage_cnt = usage_cnt; 465 break; 466 467 default: 468 rc = -EINVAL; 469 break; 470 } 471 472 return rc; 473} 474 475struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle) 476{ 477 struct hl_mmap_mem_buf *buf; 478 479 buf = hl_mmap_mem_buf_get(mmg, handle); 480 if (!buf) 481 return NULL; 482 return buf->private; 483 484} 485 486void hl_cb_put(struct hl_cb *cb) 487{ 488 hl_mmap_mem_buf_put(cb->buf); 489} 490 491struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, 492 bool internal_cb) 493{ 494 u64 cb_handle; 495 struct hl_cb *cb; 496 int rc; 497 498 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size, 499 internal_cb, false, &cb_handle); 500 if (rc) { 501 dev_err(hdev->dev, 502 "Failed to allocate CB for the kernel driver %d\n", rc); 503 return NULL; 504 } 505 506 cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle); 507 /* hl_cb_get should never fail here */ 508 if (!cb) { 509 dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n", 510 (u32) cb_handle); 511 goto destroy_cb; 512 } 513 514 return cb; 515 516destroy_cb: 517 hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle); 518 519 return NULL; 520} 521 522int hl_cb_pool_init(struct hl_device *hdev) 523{ 524 struct hl_cb *cb; 525 int i; 526 527 INIT_LIST_HEAD(&hdev->cb_pool); 528 spin_lock_init(&hdev->cb_pool_lock); 529 530 for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { 531 cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, 532 HL_KERNEL_ASID_ID, false); 533 if (cb) { 534 cb->is_pool = true; 535 list_add(&cb->pool_list, &hdev->cb_pool); 536 } else { 537 hl_cb_pool_fini(hdev); 538 return -ENOMEM; 539 } 540 } 541 542 return 0; 543} 544 545int hl_cb_pool_fini(struct hl_device *hdev) 546{ 547 struct hl_cb *cb, *tmp; 548 549 list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { 550 list_del(&cb->pool_list); 551 cb_fini(hdev, cb); 552 } 553 554 return 0; 555} 556 557int hl_cb_va_pool_init(struct hl_ctx *ctx) 558{ 559 struct hl_device *hdev = ctx->hdev; 560 struct asic_fixed_properties *prop = &hdev->asic_prop; 561 int rc; 562 563 if (!hdev->supports_cb_mapping) 564 return 0; 565 566 ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); 567 if (!ctx->cb_va_pool) { 568 dev_err(hdev->dev, 569 "Failed to create VA gen pool for CB mapping\n"); 570 return -ENOMEM; 571 } 572 573 rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, 574 prop->cb_va_end_addr - prop->cb_va_start_addr, -1); 575 if (rc) { 576 dev_err(hdev->dev, 577 "Failed to add memory to VA gen pool for CB mapping\n"); 578 goto err_pool_destroy; 579 } 580 581 return 0; 582 583err_pool_destroy: 584 gen_pool_destroy(ctx->cb_va_pool); 585 586 return rc; 587} 588 589void hl_cb_va_pool_fini(struct hl_ctx *ctx) 590{ 591 struct hl_device *hdev = ctx->hdev; 592 593 if (!hdev->supports_cb_mapping) 594 return; 595 596 gen_pool_destroy(ctx->cb_va_pool); 597}