irq.c (13048B)
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include "habanalabs.h" 9 10#include <linux/slab.h> 11 12/** 13 * struct hl_eqe_work - This structure is used to schedule work of EQ 14 * entry and cpucp_reset event 15 * 16 * @eq_work: workqueue object to run when EQ entry is received 17 * @hdev: pointer to device structure 18 * @eq_entry: copy of the EQ entry 19 */ 20struct hl_eqe_work { 21 struct work_struct eq_work; 22 struct hl_device *hdev; 23 struct hl_eq_entry eq_entry; 24}; 25 26/** 27 * hl_cq_inc_ptr - increment ci or pi of cq 28 * 29 * @ptr: the current ci or pi value of the completion queue 30 * 31 * Increment ptr by 1. If it reaches the number of completion queue 32 * entries, set it to 0 33 */ 34inline u32 hl_cq_inc_ptr(u32 ptr) 35{ 36 ptr++; 37 if (unlikely(ptr == HL_CQ_LENGTH)) 38 ptr = 0; 39 return ptr; 40} 41 42/** 43 * hl_eq_inc_ptr - increment ci of eq 44 * 45 * @ptr: the current ci value of the event queue 46 * 47 * Increment ptr by 1. If it reaches the number of event queue 48 * entries, set it to 0 49 */ 50static inline u32 hl_eq_inc_ptr(u32 ptr) 51{ 52 ptr++; 53 if (unlikely(ptr == HL_EQ_LENGTH)) 54 ptr = 0; 55 return ptr; 56} 57 58static void irq_handle_eqe(struct work_struct *work) 59{ 60 struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work, 61 eq_work); 62 struct hl_device *hdev = eqe_work->hdev; 63 64 hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry); 65 66 kfree(eqe_work); 67} 68 69/** 70 * hl_irq_handler_cq - irq handler for completion queue 71 * 72 * @irq: irq number 73 * @arg: pointer to completion queue structure 74 * 75 */ 76irqreturn_t hl_irq_handler_cq(int irq, void *arg) 77{ 78 struct hl_cq *cq = arg; 79 struct hl_device *hdev = cq->hdev; 80 struct hl_hw_queue *queue; 81 struct hl_cs_job *job; 82 bool shadow_index_valid; 83 u16 shadow_index; 84 struct hl_cq_entry *cq_entry, *cq_base; 85 86 if (hdev->disabled) { 87 dev_dbg(hdev->dev, 88 "Device disabled but received IRQ %d for CQ %d\n", 89 irq, cq->hw_queue_id); 90 return IRQ_HANDLED; 91 } 92 93 cq_base = cq->kernel_address; 94 95 while (1) { 96 bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & 97 CQ_ENTRY_READY_MASK) 98 >> CQ_ENTRY_READY_SHIFT); 99 100 if (!entry_ready) 101 break; 102 103 cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci]; 104 105 /* Make sure we read CQ entry contents after we've 106 * checked the ownership bit. 107 */ 108 dma_rmb(); 109 110 shadow_index_valid = ((le32_to_cpu(cq_entry->data) & 111 CQ_ENTRY_SHADOW_INDEX_VALID_MASK) 112 >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT); 113 114 shadow_index = (u16) ((le32_to_cpu(cq_entry->data) & 115 CQ_ENTRY_SHADOW_INDEX_MASK) 116 >> CQ_ENTRY_SHADOW_INDEX_SHIFT); 117 118 queue = &hdev->kernel_queues[cq->hw_queue_id]; 119 120 if ((shadow_index_valid) && (!hdev->disabled)) { 121 job = queue->shadow_queue[hl_pi_2_offset(shadow_index)]; 122 queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); 123 } 124 125 atomic_inc(&queue->ci); 126 127 /* Clear CQ entry ready bit */ 128 cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) & 129 ~CQ_ENTRY_READY_MASK); 130 131 cq->ci = hl_cq_inc_ptr(cq->ci); 132 133 /* Increment free slots */ 134 atomic_inc(&cq->free_slots_cnt); 135 } 136 137 return IRQ_HANDLED; 138} 139 140/* 141 * hl_ts_free_objects - handler of the free objects workqueue. 142 * This function should put refcount to objects that the registration node 143 * took refcount to them. 144 * @work: workqueue object pointer 145 */ 146static void hl_ts_free_objects(struct work_struct *work) 147{ 148 struct timestamp_reg_work_obj *job = 149 container_of(work, struct timestamp_reg_work_obj, free_obj); 150 struct timestamp_reg_free_node *free_obj, *temp_free_obj; 151 struct list_head *free_list_head = job->free_obj_head; 152 struct hl_device *hdev = job->hdev; 153 154 list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { 155 dev_dbg(hdev->dev, "About to put refcount to buf (%p) cq_cb(%p)\n", 156 free_obj->buf, 157 free_obj->cq_cb); 158 159 hl_mmap_mem_buf_put(free_obj->buf); 160 hl_cb_put(free_obj->cq_cb); 161 kfree(free_obj); 162 } 163 164 kfree(free_list_head); 165 kfree(job); 166} 167 168/* 169 * This function called with spin_lock of wait_list_lock taken 170 * This function will set timestamp and delete the registration node from the 171 * wait_list_lock. 172 * and since we're protected with spin_lock here, so we cannot just put the refcount 173 * for the objects here, since the release function may be called and it's also a long 174 * logic (which might sleep also) that cannot be handled in irq context. 175 * so here we'll be filling a list with nodes of "put" jobs and then will send this 176 * list to a dedicated workqueue to do the actual put. 177 */ 178static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend, 179 struct list_head **free_list) 180{ 181 struct timestamp_reg_free_node *free_node; 182 u64 timestamp; 183 184 if (!(*free_list)) { 185 /* Alloc/Init the timestamp registration free objects list */ 186 *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); 187 if (!(*free_list)) 188 return -ENOMEM; 189 190 INIT_LIST_HEAD(*free_list); 191 } 192 193 free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC); 194 if (!free_node) 195 return -ENOMEM; 196 197 timestamp = ktime_get_ns(); 198 199 *pend->ts_reg_info.timestamp_kernel_addr = timestamp; 200 201 dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n", 202 pend->ts_reg_info.timestamp_kernel_addr, 203 *(u64 *)pend->ts_reg_info.timestamp_kernel_addr); 204 205 list_del(&pend->wait_list_node); 206 207 /* Mark kernel CB node as free */ 208 pend->ts_reg_info.in_use = 0; 209 210 /* Putting the refcount for ts_buff and cq_cb objects will be handled 211 * in workqueue context, just add job to free_list. 212 */ 213 free_node->buf = pend->ts_reg_info.buf; 214 free_node->cq_cb = pend->ts_reg_info.cq_cb; 215 list_add(&free_node->free_objects_node, *free_list); 216 217 return 0; 218} 219 220static void handle_user_cq(struct hl_device *hdev, 221 struct hl_user_interrupt *user_cq) 222{ 223 struct hl_user_pending_interrupt *pend, *temp_pend; 224 struct list_head *ts_reg_free_list_head = NULL; 225 struct timestamp_reg_work_obj *job; 226 bool reg_node_handle_fail = false; 227 ktime_t now = ktime_get(); 228 int rc; 229 230 /* For registration nodes: 231 * As part of handling the registration nodes, we should put refcount to 232 * some objects. the problem is that we cannot do that under spinlock 233 * or in irq handler context at all (since release functions are long and 234 * might sleep), so we will need to handle that part in workqueue context. 235 * To avoid handling kmalloc failure which compels us rolling back actions 236 * and move nodes hanged on the free list back to the interrupt wait list 237 * we always alloc the job of the WQ at the beginning. 238 */ 239 job = kmalloc(sizeof(*job), GFP_ATOMIC); 240 if (!job) 241 return; 242 243 spin_lock(&user_cq->wait_list_lock); 244 list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) { 245 if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || 246 !pend->cq_kernel_addr) { 247 if (pend->ts_reg_info.buf) { 248 if (!reg_node_handle_fail) { 249 rc = handle_registration_node(hdev, pend, 250 &ts_reg_free_list_head); 251 if (rc) 252 reg_node_handle_fail = true; 253 } 254 } else { 255 /* Handle wait target value node */ 256 pend->fence.timestamp = now; 257 complete_all(&pend->fence.completion); 258 } 259 } 260 } 261 spin_unlock(&user_cq->wait_list_lock); 262 263 if (ts_reg_free_list_head) { 264 INIT_WORK(&job->free_obj, hl_ts_free_objects); 265 job->free_obj_head = ts_reg_free_list_head; 266 job->hdev = hdev; 267 queue_work(hdev->ts_free_obj_wq, &job->free_obj); 268 } else { 269 kfree(job); 270 } 271} 272 273/** 274 * hl_irq_handler_user_cq - irq handler for user completion queues 275 * 276 * @irq: irq number 277 * @arg: pointer to user interrupt structure 278 * 279 */ 280irqreturn_t hl_irq_handler_user_cq(int irq, void *arg) 281{ 282 struct hl_user_interrupt *user_cq = arg; 283 struct hl_device *hdev = user_cq->hdev; 284 285 /* Handle user cq interrupts registered on all interrupts */ 286 handle_user_cq(hdev, &hdev->common_user_interrupt); 287 288 /* Handle user cq interrupts registered on this specific interrupt */ 289 handle_user_cq(hdev, user_cq); 290 291 return IRQ_HANDLED; 292} 293 294/** 295 * hl_irq_handler_default - default irq handler 296 * 297 * @irq: irq number 298 * @arg: pointer to user interrupt structure 299 * 300 */ 301irqreturn_t hl_irq_handler_default(int irq, void *arg) 302{ 303 struct hl_user_interrupt *user_interrupt = arg; 304 struct hl_device *hdev = user_interrupt->hdev; 305 u32 interrupt_id = user_interrupt->interrupt_id; 306 307 dev_err(hdev->dev, 308 "got invalid user interrupt %u", 309 interrupt_id); 310 311 return IRQ_HANDLED; 312} 313 314/** 315 * hl_irq_handler_eq - irq handler for event queue 316 * 317 * @irq: irq number 318 * @arg: pointer to event queue structure 319 * 320 */ 321irqreturn_t hl_irq_handler_eq(int irq, void *arg) 322{ 323 struct hl_eq *eq = arg; 324 struct hl_device *hdev = eq->hdev; 325 struct hl_eq_entry *eq_entry; 326 struct hl_eq_entry *eq_base; 327 struct hl_eqe_work *handle_eqe_work; 328 bool entry_ready; 329 u32 cur_eqe; 330 u16 cur_eqe_index; 331 332 eq_base = eq->kernel_address; 333 334 while (1) { 335 cur_eqe = le32_to_cpu(eq_base[eq->ci].hdr.ctl); 336 entry_ready = !!FIELD_GET(EQ_CTL_READY_MASK, cur_eqe); 337 338 if (!entry_ready) 339 break; 340 341 cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe); 342 if ((hdev->event_queue.check_eqe_index) && 343 (((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) 344 != cur_eqe_index)) { 345 dev_dbg(hdev->dev, 346 "EQE 0x%x in queue is ready but index does not match %d!=%d", 347 eq_base[eq->ci].hdr.ctl, 348 ((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK), 349 cur_eqe_index); 350 break; 351 } 352 353 eq->prev_eqe_index++; 354 355 eq_entry = &eq_base[eq->ci]; 356 357 /* 358 * Make sure we read EQ entry contents after we've 359 * checked the ownership bit. 360 */ 361 dma_rmb(); 362 363 if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) { 364 dev_warn(hdev->dev, "Device disabled but received an EQ event\n"); 365 goto skip_irq; 366 } 367 368 handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC); 369 if (handle_eqe_work) { 370 INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe); 371 handle_eqe_work->hdev = hdev; 372 373 memcpy(&handle_eqe_work->eq_entry, eq_entry, 374 sizeof(*eq_entry)); 375 376 queue_work(hdev->eq_wq, &handle_eqe_work->eq_work); 377 } 378skip_irq: 379 /* Clear EQ entry ready bit */ 380 eq_entry->hdr.ctl = 381 cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) & 382 ~EQ_CTL_READY_MASK); 383 384 eq->ci = hl_eq_inc_ptr(eq->ci); 385 386 hdev->asic_funcs->update_eq_ci(hdev, eq->ci); 387 } 388 389 return IRQ_HANDLED; 390} 391 392/** 393 * hl_cq_init - main initialization function for an cq object 394 * 395 * @hdev: pointer to device structure 396 * @q: pointer to cq structure 397 * @hw_queue_id: The H/W queue ID this completion queue belongs to 398 * 399 * Allocate dma-able memory for the completion queue and initialize fields 400 * Returns 0 on success 401 */ 402int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) 403{ 404 void *p; 405 406 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, 407 &q->bus_address, GFP_KERNEL | __GFP_ZERO); 408 if (!p) 409 return -ENOMEM; 410 411 q->hdev = hdev; 412 q->kernel_address = p; 413 q->hw_queue_id = hw_queue_id; 414 q->ci = 0; 415 q->pi = 0; 416 417 atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); 418 419 return 0; 420} 421 422/** 423 * hl_cq_fini - destroy completion queue 424 * 425 * @hdev: pointer to device structure 426 * @q: pointer to cq structure 427 * 428 * Free the completion queue memory 429 */ 430void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) 431{ 432 hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, 433 q->kernel_address, 434 q->bus_address); 435} 436 437void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) 438{ 439 q->ci = 0; 440 q->pi = 0; 441 442 atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); 443 444 /* 445 * It's not enough to just reset the PI/CI because the H/W may have 446 * written valid completion entries before it was halted and therefore 447 * we need to clean the actual queues so we won't process old entries 448 * when the device is operational again 449 */ 450 451 memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); 452} 453 454/** 455 * hl_eq_init - main initialization function for an event queue object 456 * 457 * @hdev: pointer to device structure 458 * @q: pointer to eq structure 459 * 460 * Allocate dma-able memory for the event queue and initialize fields 461 * Returns 0 on success 462 */ 463int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) 464{ 465 void *p; 466 467 p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, 468 HL_EQ_SIZE_IN_BYTES, 469 &q->bus_address); 470 if (!p) 471 return -ENOMEM; 472 473 q->hdev = hdev; 474 q->kernel_address = p; 475 q->ci = 0; 476 q->prev_eqe_index = 0; 477 478 return 0; 479} 480 481/** 482 * hl_eq_fini - destroy event queue 483 * 484 * @hdev: pointer to device structure 485 * @q: pointer to eq structure 486 * 487 * Free the event queue memory 488 */ 489void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) 490{ 491 flush_workqueue(hdev->eq_wq); 492 493 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, 494 HL_EQ_SIZE_IN_BYTES, 495 q->kernel_address); 496} 497 498void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) 499{ 500 q->ci = 0; 501 q->prev_eqe_index = 0; 502 503 /* 504 * It's not enough to just reset the PI/CI because the H/W may have 505 * written valid completion entries before it was halted and therefore 506 * we need to clean the actual queues so we won't process old entries 507 * when the device is operational again 508 */ 509 510 memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); 511}