gaudi.c (279049B)
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include "gaudiP.h" 9#include "../include/hw_ip/mmu/mmu_general.h" 10#include "../include/hw_ip/mmu/mmu_v1_1.h" 11#include "../include/gaudi/gaudi_masks.h" 12#include "../include/gaudi/gaudi_fw_if.h" 13#include "../include/gaudi/gaudi_reg_map.h" 14#include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16#include <linux/module.h> 17#include <linux/pci.h> 18#include <linux/firmware.h> 19#include <linux/hwmon.h> 20#include <linux/iommu.h> 21#include <linux/seq_file.h> 22 23/* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85#define GAUDI_MAX_STRING_LEN 20 86 87#define GAUDI_CB_POOL_CB_CNT 512 88#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94#define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100#define GAUDI_CLK_GATE_DEBUGFS_MASK (\ 101 BIT(GAUDI_ENGINE_ID_MME_0) |\ 102 BIT(GAUDI_ENGINE_ID_MME_2) |\ 103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) 104 105#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 106 107#define GAUDI_PLL_MAX 10 108 109#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 110 111#define MONITOR_SOB_STRING_SIZE 256 112 113static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 114 GAUDI_QUEUE_ID_DMA_0_0, 115 GAUDI_QUEUE_ID_DMA_0_1, 116 GAUDI_QUEUE_ID_DMA_0_2, 117 GAUDI_QUEUE_ID_DMA_0_3, 118 GAUDI_QUEUE_ID_DMA_1_0, 119 GAUDI_QUEUE_ID_DMA_1_1, 120 GAUDI_QUEUE_ID_DMA_1_2, 121 GAUDI_QUEUE_ID_DMA_1_3 122}; 123 124static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 125 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 126 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 127 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 128 "gaudi cpu eq" 129}; 130 131static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 132 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 133 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 134 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 135 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 136 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 137 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 138 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 139 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 140}; 141 142static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 143 [0] = GAUDI_QUEUE_ID_DMA_0_0, 144 [1] = GAUDI_QUEUE_ID_DMA_0_1, 145 [2] = GAUDI_QUEUE_ID_DMA_0_2, 146 [3] = GAUDI_QUEUE_ID_DMA_0_3, 147 [4] = GAUDI_QUEUE_ID_DMA_1_0, 148 [5] = GAUDI_QUEUE_ID_DMA_1_1, 149 [6] = GAUDI_QUEUE_ID_DMA_1_2, 150 [7] = GAUDI_QUEUE_ID_DMA_1_3, 151}; 152 153static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 154 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 155 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 156 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 157 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 158 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 159 [PACKET_REPEAT] = sizeof(struct packet_repeat), 160 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 161 [PACKET_FENCE] = sizeof(struct packet_fence), 162 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 163 [PACKET_NOP] = sizeof(struct packet_nop), 164 [PACKET_STOP] = sizeof(struct packet_stop), 165 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 166 [PACKET_WAIT] = sizeof(struct packet_wait), 167 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 168}; 169 170static inline bool validate_packet_id(enum packet_id id) 171{ 172 switch (id) { 173 case PACKET_WREG_32: 174 case PACKET_WREG_BULK: 175 case PACKET_MSG_LONG: 176 case PACKET_MSG_SHORT: 177 case PACKET_CP_DMA: 178 case PACKET_REPEAT: 179 case PACKET_MSG_PROT: 180 case PACKET_FENCE: 181 case PACKET_LIN_DMA: 182 case PACKET_NOP: 183 case PACKET_STOP: 184 case PACKET_ARB_POINT: 185 case PACKET_WAIT: 186 case PACKET_LOAD_AND_EXE: 187 return true; 188 default: 189 return false; 190 } 191} 192 193static const char * const 194gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 195 "tpc_address_exceed_slm", 196 "tpc_div_by_0", 197 "tpc_spu_mac_overflow", 198 "tpc_spu_addsub_overflow", 199 "tpc_spu_abs_overflow", 200 "tpc_spu_fp_dst_nan_inf", 201 "tpc_spu_fp_dst_denorm", 202 "tpc_vpu_mac_overflow", 203 "tpc_vpu_addsub_overflow", 204 "tpc_vpu_abs_overflow", 205 "tpc_vpu_fp_dst_nan_inf", 206 "tpc_vpu_fp_dst_denorm", 207 "tpc_assertions", 208 "tpc_illegal_instruction", 209 "tpc_pc_wrap_around", 210 "tpc_qm_sw_err", 211 "tpc_hbw_rresp_err", 212 "tpc_hbw_bresp_err", 213 "tpc_lbw_rresp_err", 214 "tpc_lbw_bresp_err" 215}; 216 217static const char * const 218gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 219 "PQ AXI HBW error", 220 "CQ AXI HBW error", 221 "CP AXI HBW error", 222 "CP error due to undefined OPCODE", 223 "CP encountered STOP OPCODE", 224 "CP AXI LBW error", 225 "CP WRREG32 or WRBULK returned error", 226 "N/A", 227 "FENCE 0 inc over max value and clipped", 228 "FENCE 1 inc over max value and clipped", 229 "FENCE 2 inc over max value and clipped", 230 "FENCE 3 inc over max value and clipped", 231 "FENCE 0 dec under min value and clipped", 232 "FENCE 1 dec under min value and clipped", 233 "FENCE 2 dec under min value and clipped", 234 "FENCE 3 dec under min value and clipped" 235}; 236 237static const char * const 238gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 239 "Choice push while full error", 240 "Choice Q watchdog error", 241 "MSG AXI LBW returned with error" 242}; 243 244enum gaudi_sm_sei_cause { 245 GAUDI_SM_SEI_SO_OVERFLOW, 246 GAUDI_SM_SEI_LBW_4B_UNALIGNED, 247 GAUDI_SM_SEI_AXI_RESPONSE_ERR 248}; 249 250static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 251 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 256 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 257 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 258 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 259 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 355 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 356 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 357 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 358 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 359 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 360 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 361 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 362 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 363 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 364}; 365 366static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 367 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 368 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 369 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 370 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 371 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 372 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 373 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 374 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 375 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 376 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 377 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 378 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 379 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 380 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 381 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 382 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 383 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 384 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 385 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 386 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 387 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 388 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 389 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 390 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 391 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 392 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 393 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 394}; 395 396static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 397 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 398 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 399 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 400 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 401 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 402 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 403 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 404 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 405 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 406 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 407 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 408}; 409 410static s64 gaudi_state_dump_specs_props[] = { 411 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 412 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 413 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 414 [SP_MON_OBJ_WR_ADDR_LOW] = 415 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 416 [SP_MON_OBJ_WR_ADDR_HIGH] = 417 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 418 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 419 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 420 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 421 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 422 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 423 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 424 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 425 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 426 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 427 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 428 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 429 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 430 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 431 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 432 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 433 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 434 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 435 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 436 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 437 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 438 [SP_FENCE0_CNT_OFFSET] = 439 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 440 [SP_FENCE0_RDATA_OFFSET] = 441 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 442 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 443 [SP_NUM_CORES] = 1, 444}; 445 446/* The order here is opposite to the order of the indexing in the h/w. 447 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 448 */ 449static const char * const gaudi_sync_manager_names[] = { 450 "SYNC_MGR_E_N", 451 "SYNC_MGR_W_N", 452 "SYNC_MGR_E_S", 453 "SYNC_MGR_W_S", 454 NULL 455}; 456 457struct ecc_info_extract_params { 458 u64 block_address; 459 u32 num_memories; 460 bool derr; 461}; 462 463static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 464 u64 phys_addr); 465static int gaudi_send_job_on_qman0(struct hl_device *hdev, 466 struct hl_cs_job *job); 467static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 468 u32 size, u64 val); 469static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 470 u32 num_regs, u32 val); 471static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 472 u32 tpc_id); 473static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 474static int gaudi_cpucp_info_get(struct hl_device *hdev); 475static void gaudi_disable_clock_gating(struct hl_device *hdev); 476static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 477static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 478 u32 size, bool eb); 479static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 480 struct hl_gen_wait_properties *prop); 481static inline enum hl_collective_mode 482get_collective_mode(struct hl_device *hdev, u32 queue_id) 483{ 484 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 485 return HL_COLLECTIVE_MASTER; 486 487 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 488 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 489 return HL_COLLECTIVE_SLAVE; 490 491 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 492 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 493 return HL_COLLECTIVE_SLAVE; 494 495 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 496 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 497 return HL_COLLECTIVE_SLAVE; 498 499 return HL_COLLECTIVE_NOT_SUPPORTED; 500} 501 502static inline void set_default_power_values(struct hl_device *hdev) 503{ 504 struct asic_fixed_properties *prop = &hdev->asic_prop; 505 506 if (hdev->card_type == cpucp_card_type_pmc) { 507 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 508 509 if (prop->fw_security_enabled) 510 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 511 else 512 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 513 } else { 514 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 515 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 516 } 517} 518 519static int gaudi_set_fixed_properties(struct hl_device *hdev) 520{ 521 struct asic_fixed_properties *prop = &hdev->asic_prop; 522 u32 num_sync_stream_queues = 0; 523 int i; 524 525 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 526 prop->hw_queues_props = kcalloc(prop->max_queues, 527 sizeof(struct hw_queue_properties), 528 GFP_KERNEL); 529 530 if (!prop->hw_queues_props) 531 return -ENOMEM; 532 533 for (i = 0 ; i < prop->max_queues ; i++) { 534 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 535 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 536 prop->hw_queues_props[i].driver_only = 0; 537 prop->hw_queues_props[i].supports_sync_stream = 1; 538 prop->hw_queues_props[i].cb_alloc_flags = 539 CB_ALLOC_KERNEL; 540 num_sync_stream_queues++; 541 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 542 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 543 prop->hw_queues_props[i].driver_only = 1; 544 prop->hw_queues_props[i].supports_sync_stream = 0; 545 prop->hw_queues_props[i].cb_alloc_flags = 546 CB_ALLOC_KERNEL; 547 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 548 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 549 prop->hw_queues_props[i].driver_only = 0; 550 prop->hw_queues_props[i].supports_sync_stream = 0; 551 prop->hw_queues_props[i].cb_alloc_flags = 552 CB_ALLOC_USER; 553 554 } 555 prop->hw_queues_props[i].collective_mode = 556 get_collective_mode(hdev, i); 557 } 558 559 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 560 prop->host_base_address = HOST_PHYS_BASE; 561 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 562 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 563 prop->collective_first_sob = 0; 564 prop->collective_first_mon = 0; 565 566 /* 2 SOBs per internal queue stream are reserved for collective */ 567 prop->sync_stream_first_sob = 568 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 569 * QMAN_STREAMS * HL_RSVD_SOBS; 570 571 /* 1 monitor per internal queue stream are reserved for collective 572 * 2 monitors per external queue stream are reserved for collective 573 */ 574 prop->sync_stream_first_mon = 575 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 576 (NUMBER_OF_EXT_HW_QUEUES * 2); 577 578 prop->dram_base_address = DRAM_PHYS_BASE; 579 prop->dram_size = GAUDI_HBM_SIZE_32GB; 580 prop->dram_end_address = prop->dram_base_address + 581 prop->dram_size; 582 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 583 584 prop->sram_base_address = SRAM_BASE_ADDR; 585 prop->sram_size = SRAM_SIZE; 586 prop->sram_end_address = prop->sram_base_address + 587 prop->sram_size; 588 prop->sram_user_base_address = prop->sram_base_address + 589 SRAM_USER_BASE_OFFSET; 590 591 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 592 if (hdev->pldm) 593 prop->mmu_pgt_size = 0x800000; /* 8MB */ 594 else 595 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 596 prop->mmu_pte_size = HL_PTE_SIZE; 597 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 598 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 599 prop->dram_page_size = PAGE_SIZE_2MB; 600 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 601 prop->dram_supports_virtual_memory = false; 602 603 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 604 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 605 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 606 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 607 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 608 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 609 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 610 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 611 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 612 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 613 prop->pmmu.start_addr = VA_HOST_SPACE_START; 614 prop->pmmu.end_addr = 615 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 616 prop->pmmu.page_size = PAGE_SIZE_4KB; 617 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 618 prop->pmmu.last_mask = LAST_MASK; 619 /* TODO: will be duplicated until implementing per-MMU props */ 620 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 621 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 622 623 /* PMMU and HPMMU are the same except of page size */ 624 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 625 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 626 627 /* shifts and masks are the same in PMMU and DMMU */ 628 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 629 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 630 prop->dmmu.end_addr = VA_HOST_SPACE_END; 631 prop->dmmu.page_size = PAGE_SIZE_2MB; 632 633 prop->cfg_size = CFG_SIZE; 634 prop->max_asid = MAX_ASID; 635 prop->num_of_events = GAUDI_EVENT_SIZE; 636 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 637 638 set_default_power_values(hdev); 639 640 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 641 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 642 643 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 644 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 645 646 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 647 CARD_NAME_MAX_LEN); 648 649 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 650 651 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 652 prop->sync_stream_first_sob + 653 (num_sync_stream_queues * HL_RSVD_SOBS); 654 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 655 prop->sync_stream_first_mon + 656 (num_sync_stream_queues * HL_RSVD_MONS); 657 658 prop->first_available_user_msix_interrupt = USHRT_MAX; 659 660 for (i = 0 ; i < HL_MAX_DCORES ; i++) 661 prop->first_available_cq[i] = USHRT_MAX; 662 663 prop->fw_cpu_boot_dev_sts0_valid = false; 664 prop->fw_cpu_boot_dev_sts1_valid = false; 665 prop->hard_reset_done_by_fw = false; 666 prop->gic_interrupts_enable = true; 667 668 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 669 670 prop->clk_pll_index = HL_GAUDI_MME_PLL; 671 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 672 673 prop->use_get_power_for_reset_history = true; 674 675 prop->configurable_stop_on_err = true; 676 677 prop->set_max_power_on_device_init = true; 678 679 prop->dma_mask = 48; 680 681 return 0; 682} 683 684static int gaudi_pci_bars_map(struct hl_device *hdev) 685{ 686 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 687 bool is_wc[3] = {false, false, true}; 688 int rc; 689 690 rc = hl_pci_bars_map(hdev, name, is_wc); 691 if (rc) 692 return rc; 693 694 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 695 (CFG_BASE - SPI_FLASH_BASE_ADDR); 696 697 return 0; 698} 699 700static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 701{ 702 struct gaudi_device *gaudi = hdev->asic_specific; 703 struct hl_inbound_pci_region pci_region; 704 u64 old_addr = addr; 705 int rc; 706 707 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 708 return old_addr; 709 710 if (hdev->asic_prop.iatu_done_by_fw) 711 return U64_MAX; 712 713 /* Inbound Region 2 - Bar 4 - Point to HBM */ 714 pci_region.mode = PCI_BAR_MATCH_MODE; 715 pci_region.bar = HBM_BAR_ID; 716 pci_region.addr = addr; 717 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 718 if (rc) 719 return U64_MAX; 720 721 if (gaudi) { 722 old_addr = gaudi->hbm_bar_cur_addr; 723 gaudi->hbm_bar_cur_addr = addr; 724 } 725 726 return old_addr; 727} 728 729static int gaudi_init_iatu(struct hl_device *hdev) 730{ 731 struct hl_inbound_pci_region inbound_region; 732 struct hl_outbound_pci_region outbound_region; 733 int rc; 734 735 if (hdev->asic_prop.iatu_done_by_fw) 736 return 0; 737 738 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 739 inbound_region.mode = PCI_BAR_MATCH_MODE; 740 inbound_region.bar = SRAM_BAR_ID; 741 inbound_region.addr = SRAM_BASE_ADDR; 742 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 743 if (rc) 744 goto done; 745 746 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 747 inbound_region.mode = PCI_BAR_MATCH_MODE; 748 inbound_region.bar = CFG_BAR_ID; 749 inbound_region.addr = SPI_FLASH_BASE_ADDR; 750 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 751 if (rc) 752 goto done; 753 754 /* Inbound Region 2 - Bar 4 - Point to HBM */ 755 inbound_region.mode = PCI_BAR_MATCH_MODE; 756 inbound_region.bar = HBM_BAR_ID; 757 inbound_region.addr = DRAM_PHYS_BASE; 758 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 759 if (rc) 760 goto done; 761 762 /* Outbound Region 0 - Point to Host */ 763 outbound_region.addr = HOST_PHYS_BASE; 764 outbound_region.size = HOST_PHYS_SIZE; 765 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 766 767done: 768 return rc; 769} 770 771static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 772{ 773 return RREG32(mmHW_STATE); 774} 775 776static int gaudi_early_init(struct hl_device *hdev) 777{ 778 struct asic_fixed_properties *prop = &hdev->asic_prop; 779 struct pci_dev *pdev = hdev->pdev; 780 u32 fw_boot_status; 781 int rc; 782 783 rc = gaudi_set_fixed_properties(hdev); 784 if (rc) { 785 dev_err(hdev->dev, "Failed setting fixed properties\n"); 786 return rc; 787 } 788 789 /* Check BAR sizes */ 790 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) { 791 dev_err(hdev->dev, 792 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", 793 SRAM_BAR_ID, 794 (unsigned long long) pci_resource_len(pdev, 795 SRAM_BAR_ID), 796 SRAM_BAR_SIZE); 797 rc = -ENODEV; 798 goto free_queue_props; 799 } 800 801 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) { 802 dev_err(hdev->dev, 803 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", 804 CFG_BAR_ID, 805 (unsigned long long) pci_resource_len(pdev, 806 CFG_BAR_ID), 807 CFG_BAR_SIZE); 808 rc = -ENODEV; 809 goto free_queue_props; 810 } 811 812 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 813 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 814 815 /* If FW security is enabled at this point it means no access to ELBI */ 816 if (hdev->asic_prop.fw_security_enabled) { 817 hdev->asic_prop.iatu_done_by_fw = true; 818 819 /* 820 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 821 * decision can only be taken based on PCI ID security. 822 */ 823 hdev->asic_prop.gic_interrupts_enable = false; 824 goto pci_init; 825 } 826 827 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 828 &fw_boot_status); 829 if (rc) 830 goto free_queue_props; 831 832 /* Check whether FW is configuring iATU */ 833 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 834 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 835 hdev->asic_prop.iatu_done_by_fw = true; 836 837pci_init: 838 rc = hl_pci_init(hdev); 839 if (rc) 840 goto free_queue_props; 841 842 /* Before continuing in the initialization, we need to read the preboot 843 * version to determine whether we run with a security-enabled firmware 844 */ 845 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, 846 mmCPU_BOOT_DEV_STS0, 847 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 848 mmCPU_BOOT_ERR1, 849 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC); 850 if (rc) { 851 if (hdev->reset_on_preboot_fail) 852 hdev->asic_funcs->hw_fini(hdev, true, false); 853 goto pci_fini; 854 } 855 856 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 857 dev_info(hdev->dev, 858 "H/W state is dirty, must reset before initializing\n"); 859 hdev->asic_funcs->hw_fini(hdev, true, false); 860 } 861 862 return 0; 863 864pci_fini: 865 hl_pci_fini(hdev); 866free_queue_props: 867 kfree(hdev->asic_prop.hw_queues_props); 868 return rc; 869} 870 871static int gaudi_early_fini(struct hl_device *hdev) 872{ 873 kfree(hdev->asic_prop.hw_queues_props); 874 hl_pci_fini(hdev); 875 876 return 0; 877} 878 879/** 880 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 881 * 882 * @hdev: pointer to hl_device structure 883 * 884 */ 885static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 886{ 887 struct asic_fixed_properties *prop = &hdev->asic_prop; 888 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 889 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 890 int rc; 891 892 if (hdev->asic_prop.fw_security_enabled) { 893 struct gaudi_device *gaudi = hdev->asic_specific; 894 895 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 896 return 0; 897 898 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 899 900 if (rc) 901 return rc; 902 903 freq = pll_freq_arr[2]; 904 } else { 905 /* Backward compatibility */ 906 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 907 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 908 nr = RREG32(mmPSOC_CPU_PLL_NR); 909 nf = RREG32(mmPSOC_CPU_PLL_NF); 910 od = RREG32(mmPSOC_CPU_PLL_OD); 911 912 if (div_sel == DIV_SEL_REF_CLK || 913 div_sel == DIV_SEL_DIVIDED_REF) { 914 if (div_sel == DIV_SEL_REF_CLK) 915 freq = PLL_REF_CLK; 916 else 917 freq = PLL_REF_CLK / (div_fctr + 1); 918 } else if (div_sel == DIV_SEL_PLL_CLK || 919 div_sel == DIV_SEL_DIVIDED_PLL) { 920 pll_clk = PLL_REF_CLK * (nf + 1) / 921 ((nr + 1) * (od + 1)); 922 if (div_sel == DIV_SEL_PLL_CLK) 923 freq = pll_clk; 924 else 925 freq = pll_clk / (div_fctr + 1); 926 } else { 927 dev_warn(hdev->dev, 928 "Received invalid div select value: %d", 929 div_sel); 930 freq = 0; 931 } 932 } 933 934 prop->psoc_timestamp_frequency = freq; 935 prop->psoc_pci_pll_nr = nr; 936 prop->psoc_pci_pll_nf = nf; 937 prop->psoc_pci_pll_od = od; 938 prop->psoc_pci_pll_div_factor = div_fctr; 939 940 return 0; 941} 942 943static int _gaudi_init_tpc_mem(struct hl_device *hdev, 944 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 945{ 946 struct asic_fixed_properties *prop = &hdev->asic_prop; 947 struct packet_lin_dma *init_tpc_mem_pkt; 948 struct hl_cs_job *job; 949 struct hl_cb *cb; 950 u64 dst_addr; 951 u32 cb_size, ctl; 952 u8 tpc_id; 953 int rc; 954 955 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 956 if (!cb) 957 return -EFAULT; 958 959 init_tpc_mem_pkt = cb->kernel_address; 960 cb_size = sizeof(*init_tpc_mem_pkt); 961 memset(init_tpc_mem_pkt, 0, cb_size); 962 963 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 964 965 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 966 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 967 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 968 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 969 970 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 971 972 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 973 dst_addr = (prop->sram_user_base_address & 974 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 975 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 976 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 977 978 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 979 if (!job) { 980 dev_err(hdev->dev, "Failed to allocate a new job\n"); 981 rc = -ENOMEM; 982 goto release_cb; 983 } 984 985 job->id = 0; 986 job->user_cb = cb; 987 atomic_inc(&job->user_cb->cs_cnt); 988 job->user_cb_size = cb_size; 989 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 990 job->patched_cb = job->user_cb; 991 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 992 993 hl_debugfs_add_job(hdev, job); 994 995 rc = gaudi_send_job_on_qman0(hdev, job); 996 997 if (rc) 998 goto free_job; 999 1000 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1001 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1002 if (rc) 1003 break; 1004 } 1005 1006free_job: 1007 hl_userptr_delete_list(hdev, &job->userptr_list); 1008 hl_debugfs_remove_job(hdev, job); 1009 kfree(job); 1010 atomic_dec(&cb->cs_cnt); 1011 1012release_cb: 1013 hl_cb_put(cb); 1014 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1015 1016 return rc; 1017} 1018 1019/* 1020 * gaudi_init_tpc_mem() - Initialize TPC memories. 1021 * @hdev: Pointer to hl_device structure. 1022 * 1023 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1024 * 1025 * Return: 0 for success, negative value for error. 1026 */ 1027static int gaudi_init_tpc_mem(struct hl_device *hdev) 1028{ 1029 const struct firmware *fw; 1030 size_t fw_size; 1031 void *cpu_addr; 1032 dma_addr_t dma_handle; 1033 int rc, count = 5; 1034 1035again: 1036 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1037 if (rc == -EINTR && count-- > 0) { 1038 msleep(50); 1039 goto again; 1040 } 1041 1042 if (rc) { 1043 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1044 GAUDI_TPC_FW_FILE); 1045 goto out; 1046 } 1047 1048 fw_size = fw->size; 1049 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size, 1050 &dma_handle, GFP_KERNEL | __GFP_ZERO); 1051 if (!cpu_addr) { 1052 dev_err(hdev->dev, 1053 "Failed to allocate %zu of dma memory for TPC kernel\n", 1054 fw_size); 1055 rc = -ENOMEM; 1056 goto out; 1057 } 1058 1059 memcpy(cpu_addr, fw->data, fw_size); 1060 1061 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1062 1063 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr, 1064 dma_handle); 1065 1066out: 1067 release_firmware(fw); 1068 return rc; 1069} 1070 1071static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1072{ 1073 struct gaudi_device *gaudi = hdev->asic_specific; 1074 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1075 struct hl_hw_queue *q; 1076 u32 i, sob_id, sob_group_id, queue_id; 1077 1078 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1079 sob_group_id = 1080 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1081 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1082 1083 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1084 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1085 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1086 q->sync_stream_prop.collective_sob_id = sob_id + i; 1087 } 1088 1089 /* Both DMA5 and TPC7 use the same resources since only a single 1090 * engine need to participate in the reduction process 1091 */ 1092 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1093 q = &hdev->kernel_queues[queue_id]; 1094 q->sync_stream_prop.collective_sob_id = 1095 sob_id + NIC_NUMBER_OF_ENGINES; 1096 1097 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1098 q = &hdev->kernel_queues[queue_id]; 1099 q->sync_stream_prop.collective_sob_id = 1100 sob_id + NIC_NUMBER_OF_ENGINES; 1101} 1102 1103static void gaudi_sob_group_hw_reset(struct kref *ref) 1104{ 1105 struct gaudi_hw_sob_group *hw_sob_group = 1106 container_of(ref, struct gaudi_hw_sob_group, kref); 1107 struct hl_device *hdev = hw_sob_group->hdev; 1108 int i; 1109 1110 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1111 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1112 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1113 1114 kref_init(&hw_sob_group->kref); 1115} 1116 1117static void gaudi_sob_group_reset_error(struct kref *ref) 1118{ 1119 struct gaudi_hw_sob_group *hw_sob_group = 1120 container_of(ref, struct gaudi_hw_sob_group, kref); 1121 struct hl_device *hdev = hw_sob_group->hdev; 1122 1123 dev_crit(hdev->dev, 1124 "SOB release shouldn't be called here, base_sob_id: %d\n", 1125 hw_sob_group->base_sob_id); 1126} 1127 1128static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1129{ 1130 struct gaudi_collective_properties *prop; 1131 int i; 1132 1133 prop = &gaudi->collective_props; 1134 1135 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1136 1137 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1138 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1139 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1140 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1141 /* Set collective engine bit */ 1142 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1143 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1144} 1145 1146static int gaudi_collective_init(struct hl_device *hdev) 1147{ 1148 u32 i, sob_id, reserved_sobs_per_group; 1149 struct gaudi_collective_properties *prop; 1150 struct gaudi_device *gaudi; 1151 1152 gaudi = hdev->asic_specific; 1153 prop = &gaudi->collective_props; 1154 sob_id = hdev->asic_prop.collective_first_sob; 1155 1156 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1157 reserved_sobs_per_group = 1158 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1159 1160 /* Init SOB groups */ 1161 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1162 prop->hw_sob_group[i].hdev = hdev; 1163 prop->hw_sob_group[i].base_sob_id = sob_id; 1164 sob_id += reserved_sobs_per_group; 1165 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1166 } 1167 1168 for (i = 0 ; i < QMAN_STREAMS; i++) { 1169 prop->next_sob_group_val[i] = 1; 1170 prop->curr_sob_group_idx[i] = 0; 1171 gaudi_collective_map_sobs(hdev, i); 1172 } 1173 1174 gaudi_collective_mstr_sob_mask_set(gaudi); 1175 1176 return 0; 1177} 1178 1179static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1180{ 1181 struct gaudi_device *gaudi = hdev->asic_specific; 1182 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1183 1184 kref_put(&cprop->hw_sob_group[sob_group].kref, 1185 gaudi_sob_group_hw_reset); 1186} 1187 1188static void gaudi_collective_master_init_job(struct hl_device *hdev, 1189 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1190{ 1191 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1192 struct gaudi_collective_properties *cprop; 1193 struct hl_gen_wait_properties wait_prop; 1194 struct hl_sync_stream_properties *prop; 1195 struct gaudi_device *gaudi; 1196 1197 gaudi = hdev->asic_specific; 1198 cprop = &gaudi->collective_props; 1199 queue_id = job->hw_queue_id; 1200 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1201 1202 master_sob_base = 1203 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1204 master_monitor = prop->collective_mstr_mon_id[0]; 1205 1206 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1207 1208 dev_dbg(hdev->dev, 1209 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1210 master_sob_base, cprop->mstr_sob_mask[0], 1211 cprop->next_sob_group_val[stream], 1212 master_monitor, queue_id); 1213 1214 wait_prop.data = (void *) job->patched_cb; 1215 wait_prop.sob_base = master_sob_base; 1216 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1217 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1218 wait_prop.mon_id = master_monitor; 1219 wait_prop.q_idx = queue_id; 1220 wait_prop.size = cb_size; 1221 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1222 1223 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1224 master_monitor = prop->collective_mstr_mon_id[1]; 1225 1226 dev_dbg(hdev->dev, 1227 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1228 master_sob_base, cprop->mstr_sob_mask[1], 1229 cprop->next_sob_group_val[stream], 1230 master_monitor, queue_id); 1231 1232 wait_prop.sob_base = master_sob_base; 1233 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1234 wait_prop.mon_id = master_monitor; 1235 wait_prop.size = cb_size; 1236 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1237} 1238 1239static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1240 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1241{ 1242 struct hl_gen_wait_properties wait_prop; 1243 struct hl_sync_stream_properties *prop; 1244 u32 queue_id, cb_size = 0; 1245 1246 queue_id = job->hw_queue_id; 1247 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1248 1249 if (job->cs->encaps_signals) { 1250 /* use the encaps signal handle store earlier in the flow 1251 * and set the SOB information from the encaps 1252 * signals handle 1253 */ 1254 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1255 cs_cmpl); 1256 1257 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1258 job->cs->sequence, 1259 cs_cmpl->hw_sob->sob_id, 1260 cs_cmpl->sob_val); 1261 } 1262 1263 /* Add to wait CBs using slave monitor */ 1264 wait_prop.data = (void *) job->user_cb; 1265 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1266 wait_prop.sob_mask = 0x1; 1267 wait_prop.sob_val = cs_cmpl->sob_val; 1268 wait_prop.mon_id = prop->collective_slave_mon_id; 1269 wait_prop.q_idx = queue_id; 1270 wait_prop.size = cb_size; 1271 1272 dev_dbg(hdev->dev, 1273 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1274 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1275 prop->collective_slave_mon_id, queue_id); 1276 1277 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1278 1279 dev_dbg(hdev->dev, 1280 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1281 prop->collective_sob_id, queue_id); 1282 1283 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1284 prop->collective_sob_id, cb_size, false); 1285} 1286 1287static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1288{ 1289 struct hl_cs_compl *signal_cs_cmpl = 1290 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1291 struct hl_cs_compl *cs_cmpl = 1292 container_of(cs->fence, struct hl_cs_compl, base_fence); 1293 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1294 struct gaudi_collective_properties *cprop; 1295 u32 stream, queue_id, sob_group_offset; 1296 struct gaudi_device *gaudi; 1297 struct hl_device *hdev; 1298 struct hl_cs_job *job; 1299 struct hl_ctx *ctx; 1300 1301 ctx = cs->ctx; 1302 hdev = ctx->hdev; 1303 gaudi = hdev->asic_specific; 1304 cprop = &gaudi->collective_props; 1305 1306 if (cs->encaps_signals) { 1307 cs_cmpl->hw_sob = handle->hw_sob; 1308 /* at this checkpoint we only need the hw_sob pointer 1309 * for the completion check before start going over the jobs 1310 * of the master/slaves, the sob_value will be taken later on 1311 * in gaudi_collective_slave_init_job depends on each 1312 * job wait offset value. 1313 */ 1314 cs_cmpl->sob_val = 0; 1315 } else { 1316 /* copy the SOB id and value of the signal CS */ 1317 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1318 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1319 } 1320 1321 /* check again if the signal cs already completed. 1322 * if yes then don't send any wait cs since the hw_sob 1323 * could be in reset already. if signal is not completed 1324 * then get refcount to hw_sob to prevent resetting the sob 1325 * while wait cs is not submitted. 1326 * note that this check is protected by two locks, 1327 * hw queue lock and completion object lock, 1328 * and the same completion object lock also protects 1329 * the hw_sob reset handler function. 1330 * The hw_queue lock prevent out of sync of hw_sob 1331 * refcount value, changed by signal/wait flows. 1332 */ 1333 spin_lock(&signal_cs_cmpl->lock); 1334 1335 if (completion_done(&cs->signal_fence->completion)) { 1336 spin_unlock(&signal_cs_cmpl->lock); 1337 return -EINVAL; 1338 } 1339 /* Increment kref since all slave queues are now waiting on it */ 1340 kref_get(&cs_cmpl->hw_sob->kref); 1341 1342 spin_unlock(&signal_cs_cmpl->lock); 1343 1344 /* Calculate the stream from collective master queue (1st job) */ 1345 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1346 stream = job->hw_queue_id % 4; 1347 sob_group_offset = 1348 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1349 1350 list_for_each_entry(job, &cs->job_list, cs_node) { 1351 queue_id = job->hw_queue_id; 1352 1353 if (hdev->kernel_queues[queue_id].collective_mode == 1354 HL_COLLECTIVE_MASTER) 1355 gaudi_collective_master_init_job(hdev, job, stream, 1356 sob_group_offset); 1357 else 1358 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1359 } 1360 1361 cs_cmpl->sob_group = sob_group_offset; 1362 1363 /* Handle sob group kref and wraparound */ 1364 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1365 cprop->next_sob_group_val[stream]++; 1366 1367 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1368 /* 1369 * Decrement as we reached the max value. 1370 * The release function won't be called here as we've 1371 * just incremented the refcount. 1372 */ 1373 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1374 gaudi_sob_group_reset_error); 1375 cprop->next_sob_group_val[stream] = 1; 1376 /* only two SOBs are currently in use */ 1377 cprop->curr_sob_group_idx[stream] = 1378 (cprop->curr_sob_group_idx[stream] + 1) & 1379 (HL_RSVD_SOBS - 1); 1380 1381 gaudi_collective_map_sobs(hdev, stream); 1382 1383 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1384 cprop->curr_sob_group_idx[stream], stream); 1385 } 1386 1387 mb(); 1388 hl_fence_put(cs->signal_fence); 1389 cs->signal_fence = NULL; 1390 1391 return 0; 1392} 1393 1394static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1395 struct hl_ctx *ctx, struct hl_cs *cs, 1396 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1397 u32 encaps_signal_offset) 1398{ 1399 struct hw_queue_properties *hw_queue_prop; 1400 struct hl_cs_counters_atomic *cntr; 1401 struct hl_cs_job *job; 1402 struct hl_cb *cb; 1403 u32 cb_size; 1404 bool patched_cb; 1405 1406 cntr = &hdev->aggregated_cs_counters; 1407 1408 if (mode == HL_COLLECTIVE_MASTER) { 1409 /* CB size of collective master queue contains 1410 * 4 msg short packets for monitor 1 configuration 1411 * 1 fence packet 1412 * 4 msg short packets for monitor 2 configuration 1413 * 1 fence packet 1414 * 2 msg prot packets for completion and MSI-X 1415 */ 1416 cb_size = sizeof(struct packet_msg_short) * 8 + 1417 sizeof(struct packet_fence) * 2 + 1418 sizeof(struct packet_msg_prot) * 2; 1419 patched_cb = true; 1420 } else { 1421 /* CB size of collective slave queues contains 1422 * 4 msg short packets for monitor configuration 1423 * 1 fence packet 1424 * 1 additional msg short packet for sob signal 1425 */ 1426 cb_size = sizeof(struct packet_msg_short) * 5 + 1427 sizeof(struct packet_fence); 1428 patched_cb = false; 1429 } 1430 1431 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1432 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1433 if (!job) { 1434 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1435 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1436 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1437 return -ENOMEM; 1438 } 1439 1440 /* Allocate internal mapped CB for non patched CBs */ 1441 cb = hl_cb_kernel_create(hdev, cb_size, 1442 hdev->mmu_enable && !patched_cb); 1443 if (!cb) { 1444 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1445 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1446 kfree(job); 1447 return -EFAULT; 1448 } 1449 1450 job->id = 0; 1451 job->cs = cs; 1452 job->user_cb = cb; 1453 atomic_inc(&job->user_cb->cs_cnt); 1454 job->user_cb_size = cb_size; 1455 job->hw_queue_id = queue_id; 1456 1457 /* since its guaranteed to have only one chunk in the collective wait 1458 * cs, we can use this chunk to set the encapsulated signal offset 1459 * in the jobs. 1460 */ 1461 if (cs->encaps_signals) 1462 job->encaps_sig_wait_offset = encaps_signal_offset; 1463 1464 /* 1465 * No need in parsing, user CB is the patched CB. 1466 * We call hl_cb_destroy() out of two reasons - we don't need 1467 * the CB in the CB idr anymore and to decrement its refcount as 1468 * it was incremented inside hl_cb_kernel_create(). 1469 */ 1470 if (patched_cb) 1471 job->patched_cb = job->user_cb; 1472 else 1473 job->patched_cb = NULL; 1474 1475 job->job_cb_size = job->user_cb_size; 1476 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1477 1478 /* increment refcount as for external queues we get completion */ 1479 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1480 cs_get(cs); 1481 1482 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1483 1484 list_add_tail(&job->cs_node, &cs->job_list); 1485 1486 hl_debugfs_add_job(hdev, job); 1487 1488 return 0; 1489} 1490 1491static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1492 struct hl_ctx *ctx, struct hl_cs *cs, 1493 u32 wait_queue_id, u32 collective_engine_id, 1494 u32 encaps_signal_offset) 1495{ 1496 struct gaudi_device *gaudi = hdev->asic_specific; 1497 struct hw_queue_properties *hw_queue_prop; 1498 u32 queue_id, collective_queue, num_jobs; 1499 u32 stream, nic_queue, nic_idx = 0; 1500 bool skip; 1501 int i, rc = 0; 1502 1503 /* Verify wait queue id is configured as master */ 1504 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1505 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1506 dev_err(hdev->dev, 1507 "Queue %d is not configured as collective master\n", 1508 wait_queue_id); 1509 return -EINVAL; 1510 } 1511 1512 /* Verify engine id is supported */ 1513 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1514 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1515 dev_err(hdev->dev, 1516 "Collective wait does not support engine %u\n", 1517 collective_engine_id); 1518 return -EINVAL; 1519 } 1520 1521 stream = wait_queue_id % 4; 1522 1523 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1524 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1525 else 1526 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1527 1528 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1529 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1530 1531 /* First job goes to the collective master queue, it will wait for 1532 * the collective slave queues to finish execution. 1533 * The synchronization is done using two monitors: 1534 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1535 * reduction engine (DMA5/TPC7). 1536 * 1537 * Rest of the jobs goes to the collective slave queues which will 1538 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1539 */ 1540 for (i = 0 ; i < num_jobs ; i++) { 1541 if (i == 0) { 1542 queue_id = wait_queue_id; 1543 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1544 HL_COLLECTIVE_MASTER, queue_id, 1545 wait_queue_id, encaps_signal_offset); 1546 } else { 1547 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1548 if (gaudi->hw_cap_initialized & 1549 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1550 skip = false; 1551 else 1552 skip = true; 1553 1554 queue_id = nic_queue; 1555 nic_queue += 4; 1556 nic_idx++; 1557 1558 if (skip) 1559 continue; 1560 } else { 1561 queue_id = collective_queue; 1562 } 1563 1564 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1565 HL_COLLECTIVE_SLAVE, queue_id, 1566 wait_queue_id, encaps_signal_offset); 1567 } 1568 1569 if (rc) 1570 return rc; 1571 } 1572 1573 return rc; 1574} 1575 1576static int gaudi_late_init(struct hl_device *hdev) 1577{ 1578 struct gaudi_device *gaudi = hdev->asic_specific; 1579 int rc; 1580 1581 rc = gaudi->cpucp_info_get(hdev); 1582 if (rc) { 1583 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1584 return rc; 1585 } 1586 1587 if ((hdev->card_type == cpucp_card_type_pci) && 1588 (hdev->nic_ports_mask & 0x3)) { 1589 dev_info(hdev->dev, 1590 "PCI card detected, only 8 ports are enabled\n"); 1591 hdev->nic_ports_mask &= ~0x3; 1592 1593 /* Stop and disable unused NIC QMANs */ 1594 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1595 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1596 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1597 1598 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1599 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1600 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1601 1602 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1603 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1604 1605 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1606 } 1607 1608 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); 1609 if (rc) { 1610 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1611 return rc; 1612 } 1613 1614 /* Scrub both SRAM and DRAM */ 1615 rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0); 1616 if (rc) 1617 goto disable_pci_access; 1618 1619 rc = gaudi_fetch_psoc_frequency(hdev); 1620 if (rc) { 1621 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1622 goto disable_pci_access; 1623 } 1624 1625 rc = gaudi_mmu_clear_pgt_range(hdev); 1626 if (rc) { 1627 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1628 goto disable_pci_access; 1629 } 1630 1631 rc = gaudi_init_tpc_mem(hdev); 1632 if (rc) { 1633 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1634 goto disable_pci_access; 1635 } 1636 1637 rc = gaudi_collective_init(hdev); 1638 if (rc) { 1639 dev_err(hdev->dev, "Failed to init collective\n"); 1640 goto disable_pci_access; 1641 } 1642 1643 /* We only support a single ASID for the user, so for the sake of optimization, just 1644 * initialize the ASID one time during device initialization with the fixed value of 1 1645 */ 1646 gaudi_mmu_prepare(hdev, 1); 1647 1648 hl_fw_set_pll_profile(hdev); 1649 1650 return 0; 1651 1652disable_pci_access: 1653 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); 1654 1655 return rc; 1656} 1657 1658static void gaudi_late_fini(struct hl_device *hdev) 1659{ 1660 const struct hwmon_channel_info **channel_info_arr; 1661 int i = 0; 1662 1663 if (!hdev->hl_chip_info->info) 1664 return; 1665 1666 channel_info_arr = hdev->hl_chip_info->info; 1667 1668 while (channel_info_arr[i]) { 1669 kfree(channel_info_arr[i]->config); 1670 kfree(channel_info_arr[i]); 1671 i++; 1672 } 1673 1674 kfree(channel_info_arr); 1675 1676 hdev->hl_chip_info->info = NULL; 1677} 1678 1679static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1680{ 1681 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1682 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1683 int i, j, rc = 0; 1684 1685 /* 1686 * The device CPU works with 40-bits addresses, while bit 39 must be set 1687 * to '1' when accessing the host. 1688 * Bits 49:39 of the full host address are saved for a later 1689 * configuration of the HW to perform extension to 50 bits. 1690 * Because there is a single HW register that holds the extension bits, 1691 * these bits must be identical in all allocated range. 1692 */ 1693 1694 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1695 virt_addr_arr[i] = 1696 hdev->asic_funcs->asic_dma_alloc_coherent(hdev, 1697 HL_CPU_ACCESSIBLE_MEM_SIZE, 1698 &dma_addr_arr[i], 1699 GFP_KERNEL | __GFP_ZERO); 1700 if (!virt_addr_arr[i]) { 1701 rc = -ENOMEM; 1702 goto free_dma_mem_arr; 1703 } 1704 1705 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1706 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1707 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1708 break; 1709 } 1710 1711 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1712 dev_err(hdev->dev, 1713 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1714 rc = -EFAULT; 1715 goto free_dma_mem_arr; 1716 } 1717 1718 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1719 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1720 hdev->cpu_pci_msb_addr = 1721 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1722 1723 if (!hdev->asic_prop.fw_security_enabled) 1724 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1725 1726free_dma_mem_arr: 1727 for (j = 0 ; j < i ; j++) 1728 hdev->asic_funcs->asic_dma_free_coherent(hdev, 1729 HL_CPU_ACCESSIBLE_MEM_SIZE, 1730 virt_addr_arr[j], 1731 dma_addr_arr[j]); 1732 1733 return rc; 1734} 1735 1736static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1737{ 1738 struct gaudi_device *gaudi = hdev->asic_specific; 1739 struct gaudi_internal_qman_info *q; 1740 u32 i; 1741 1742 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1743 q = &gaudi->internal_qmans[i]; 1744 if (!q->pq_kernel_addr) 1745 continue; 1746 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size, 1747 q->pq_kernel_addr, 1748 q->pq_dma_addr); 1749 } 1750} 1751 1752static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1753{ 1754 struct gaudi_device *gaudi = hdev->asic_specific; 1755 struct gaudi_internal_qman_info *q; 1756 int rc, i; 1757 1758 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1759 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1760 continue; 1761 1762 q = &gaudi->internal_qmans[i]; 1763 1764 switch (i) { 1765 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1766 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1767 break; 1768 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1769 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1770 break; 1771 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1772 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1773 break; 1774 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1775 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1776 break; 1777 default: 1778 dev_err(hdev->dev, "Bad internal queue index %d", i); 1779 rc = -EINVAL; 1780 goto free_internal_qmans_pq_mem; 1781 } 1782 1783 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( 1784 hdev, q->pq_size, 1785 &q->pq_dma_addr, 1786 GFP_KERNEL | __GFP_ZERO); 1787 if (!q->pq_kernel_addr) { 1788 rc = -ENOMEM; 1789 goto free_internal_qmans_pq_mem; 1790 } 1791 } 1792 1793 return 0; 1794 1795free_internal_qmans_pq_mem: 1796 gaudi_free_internal_qmans_pq_mem(hdev); 1797 return rc; 1798} 1799 1800static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1801{ 1802 struct asic_fixed_properties *prop = &hdev->asic_prop; 1803 struct pci_mem_region *region; 1804 1805 /* CFG */ 1806 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1807 region->region_base = CFG_BASE; 1808 region->region_size = CFG_SIZE; 1809 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1810 region->bar_size = CFG_BAR_SIZE; 1811 region->bar_id = CFG_BAR_ID; 1812 region->used = 1; 1813 1814 /* SRAM */ 1815 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1816 region->region_base = SRAM_BASE_ADDR; 1817 region->region_size = SRAM_SIZE; 1818 region->offset_in_bar = 0; 1819 region->bar_size = SRAM_BAR_SIZE; 1820 region->bar_id = SRAM_BAR_ID; 1821 region->used = 1; 1822 1823 /* DRAM */ 1824 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1825 region->region_base = DRAM_PHYS_BASE; 1826 region->region_size = hdev->asic_prop.dram_size; 1827 region->offset_in_bar = 0; 1828 region->bar_size = prop->dram_pci_bar_size; 1829 region->bar_id = HBM_BAR_ID; 1830 region->used = 1; 1831 1832 /* SP SRAM */ 1833 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1834 region->region_base = PSOC_SCRATCHPAD_ADDR; 1835 region->region_size = PSOC_SCRATCHPAD_SIZE; 1836 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1837 region->bar_size = CFG_BAR_SIZE; 1838 region->bar_id = CFG_BAR_ID; 1839 region->used = 1; 1840} 1841 1842static int gaudi_sw_init(struct hl_device *hdev) 1843{ 1844 struct gaudi_device *gaudi; 1845 u32 i, event_id = 0; 1846 int rc; 1847 1848 /* Allocate device structure */ 1849 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1850 if (!gaudi) 1851 return -ENOMEM; 1852 1853 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1854 if (gaudi_irq_map_table[i].valid) { 1855 if (event_id == GAUDI_EVENT_SIZE) { 1856 dev_err(hdev->dev, 1857 "Event array exceeds the limit of %u events\n", 1858 GAUDI_EVENT_SIZE); 1859 rc = -EINVAL; 1860 goto free_gaudi_device; 1861 } 1862 1863 gaudi->events[event_id++] = 1864 gaudi_irq_map_table[i].fc_id; 1865 } 1866 } 1867 1868 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1869 1870 hdev->asic_specific = gaudi; 1871 1872 /* Create DMA pool for small allocations */ 1873 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1874 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1875 if (!hdev->dma_pool) { 1876 dev_err(hdev->dev, "failed to create DMA pool\n"); 1877 rc = -ENOMEM; 1878 goto free_gaudi_device; 1879 } 1880 1881 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1882 if (rc) 1883 goto free_dma_pool; 1884 1885 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1886 if (!hdev->cpu_accessible_dma_pool) { 1887 dev_err(hdev->dev, 1888 "Failed to create CPU accessible DMA pool\n"); 1889 rc = -ENOMEM; 1890 goto free_cpu_dma_mem; 1891 } 1892 1893 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1894 (uintptr_t) hdev->cpu_accessible_dma_mem, 1895 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1896 if (rc) { 1897 dev_err(hdev->dev, 1898 "Failed to add memory to CPU accessible DMA pool\n"); 1899 rc = -EFAULT; 1900 goto free_cpu_accessible_dma_pool; 1901 } 1902 1903 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1904 if (rc) 1905 goto free_cpu_accessible_dma_pool; 1906 1907 spin_lock_init(&gaudi->hw_queues_lock); 1908 1909 hdev->supports_sync_stream = true; 1910 hdev->supports_coresight = true; 1911 hdev->supports_staged_submission = true; 1912 hdev->supports_wait_for_multi_cs = true; 1913 1914 hdev->asic_funcs->set_pci_memory_regions(hdev); 1915 hdev->stream_master_qid_arr = 1916 hdev->asic_funcs->get_stream_master_qid_arr(); 1917 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1918 1919 return 0; 1920 1921free_cpu_accessible_dma_pool: 1922 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1923free_cpu_dma_mem: 1924 if (!hdev->asic_prop.fw_security_enabled) 1925 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1926 hdev->cpu_pci_msb_addr); 1927 hdev->asic_funcs->asic_dma_free_coherent(hdev, 1928 HL_CPU_ACCESSIBLE_MEM_SIZE, 1929 hdev->cpu_accessible_dma_mem, 1930 hdev->cpu_accessible_dma_address); 1931free_dma_pool: 1932 dma_pool_destroy(hdev->dma_pool); 1933free_gaudi_device: 1934 kfree(gaudi); 1935 return rc; 1936} 1937 1938static int gaudi_sw_fini(struct hl_device *hdev) 1939{ 1940 struct gaudi_device *gaudi = hdev->asic_specific; 1941 1942 gaudi_free_internal_qmans_pq_mem(hdev); 1943 1944 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1945 1946 if (!hdev->asic_prop.fw_security_enabled) 1947 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1948 hdev->cpu_pci_msb_addr); 1949 1950 hdev->asic_funcs->asic_dma_free_coherent(hdev, 1951 HL_CPU_ACCESSIBLE_MEM_SIZE, 1952 hdev->cpu_accessible_dma_mem, 1953 hdev->cpu_accessible_dma_address); 1954 1955 dma_pool_destroy(hdev->dma_pool); 1956 1957 kfree(gaudi); 1958 1959 return 0; 1960} 1961 1962static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1963{ 1964 struct hl_device *hdev = arg; 1965 int i; 1966 1967 if (hdev->disabled) 1968 return IRQ_HANDLED; 1969 1970 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1971 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1972 1973 hl_irq_handler_eq(irq, &hdev->event_queue); 1974 1975 return IRQ_HANDLED; 1976} 1977 1978/* 1979 * For backward compatibility, new MSI interrupts should be set after the 1980 * existing CPU and NIC interrupts. 1981 */ 1982static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1983 bool cpu_eq) 1984{ 1985 int msi_vec; 1986 1987 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1988 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1989 GAUDI_EVENT_QUEUE_MSI_IDX); 1990 1991 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1992 (nr + NIC_NUMBER_OF_ENGINES + 1); 1993 1994 return pci_irq_vector(hdev->pdev, msi_vec); 1995} 1996 1997static int gaudi_enable_msi_single(struct hl_device *hdev) 1998{ 1999 int rc, irq; 2000 2001 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2002 2003 irq = gaudi_pci_irq_vector(hdev, 0, false); 2004 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2005 "gaudi single msi", hdev); 2006 if (rc) 2007 dev_err(hdev->dev, 2008 "Failed to request single MSI IRQ\n"); 2009 2010 return rc; 2011} 2012 2013static int gaudi_enable_msi_multi(struct hl_device *hdev) 2014{ 2015 int cq_cnt = hdev->asic_prop.completion_queues_count; 2016 int rc, i, irq_cnt_init, irq; 2017 2018 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { 2019 irq = gaudi_pci_irq_vector(hdev, i, false); 2020 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], 2021 &hdev->completion_queue[i]); 2022 if (rc) { 2023 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2024 goto free_irqs; 2025 } 2026 } 2027 2028 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); 2029 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], 2030 &hdev->event_queue); 2031 if (rc) { 2032 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2033 goto free_irqs; 2034 } 2035 2036 return 0; 2037 2038free_irqs: 2039 for (i = 0 ; i < irq_cnt_init ; i++) 2040 free_irq(gaudi_pci_irq_vector(hdev, i, false), 2041 &hdev->completion_queue[i]); 2042 return rc; 2043} 2044 2045static int gaudi_enable_msi(struct hl_device *hdev) 2046{ 2047 struct gaudi_device *gaudi = hdev->asic_specific; 2048 int rc; 2049 2050 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2051 return 0; 2052 2053 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2054 if (rc < 0) { 2055 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2056 return rc; 2057 } 2058 2059 if (rc < NUMBER_OF_INTERRUPTS) { 2060 gaudi->multi_msi_mode = false; 2061 rc = gaudi_enable_msi_single(hdev); 2062 } else { 2063 gaudi->multi_msi_mode = true; 2064 rc = gaudi_enable_msi_multi(hdev); 2065 } 2066 2067 if (rc) 2068 goto free_pci_irq_vectors; 2069 2070 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2071 2072 return 0; 2073 2074free_pci_irq_vectors: 2075 pci_free_irq_vectors(hdev->pdev); 2076 return rc; 2077} 2078 2079static void gaudi_sync_irqs(struct hl_device *hdev) 2080{ 2081 struct gaudi_device *gaudi = hdev->asic_specific; 2082 int i, cq_cnt = hdev->asic_prop.completion_queues_count; 2083 2084 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2085 return; 2086 2087 /* Wait for all pending IRQs to be finished */ 2088 if (gaudi->multi_msi_mode) { 2089 for (i = 0 ; i < cq_cnt ; i++) 2090 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); 2091 2092 synchronize_irq(gaudi_pci_irq_vector(hdev, 2093 GAUDI_EVENT_QUEUE_MSI_IDX, 2094 true)); 2095 } else { 2096 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2097 } 2098} 2099 2100static void gaudi_disable_msi(struct hl_device *hdev) 2101{ 2102 struct gaudi_device *gaudi = hdev->asic_specific; 2103 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; 2104 2105 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2106 return; 2107 2108 gaudi_sync_irqs(hdev); 2109 2110 if (gaudi->multi_msi_mode) { 2111 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, 2112 true); 2113 free_irq(irq, &hdev->event_queue); 2114 2115 for (i = 0 ; i < cq_cnt ; i++) { 2116 irq = gaudi_pci_irq_vector(hdev, i, false); 2117 free_irq(irq, &hdev->completion_queue[i]); 2118 } 2119 } else { 2120 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2121 } 2122 2123 pci_free_irq_vectors(hdev->pdev); 2124 2125 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2126} 2127 2128static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2129{ 2130 struct gaudi_device *gaudi = hdev->asic_specific; 2131 2132 if (hdev->asic_prop.fw_security_enabled) 2133 return; 2134 2135 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2136 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2137 return; 2138 2139 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2140 return; 2141 2142 if (!hdev->sram_scrambler_enable) 2143 return; 2144 2145 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2146 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2147 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2148 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2149 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2150 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2151 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2152 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2153 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2154 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2155 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2156 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2157 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2159 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2161 2162 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2163 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2164 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2165 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2166 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2167 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2168 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2169 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2170 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2171 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2172 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2173 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2174 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2175 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2176 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2178 2179 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2180 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2181 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2182 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2183 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2184 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2185 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2186 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2187 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2188 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2189 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2190 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2191 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2192 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2193 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2195 2196 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2197} 2198 2199static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2200{ 2201 struct gaudi_device *gaudi = hdev->asic_specific; 2202 2203 if (hdev->asic_prop.fw_security_enabled) 2204 return; 2205 2206 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2207 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2208 return; 2209 2210 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2211 return; 2212 2213 if (!hdev->dram_scrambler_enable) 2214 return; 2215 2216 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2218 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2220 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2222 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2224 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2225 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2226 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2227 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2228 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2229 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2230 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2231 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2232 2233 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2235 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2237 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2239 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2240 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2241 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2242 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2243 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2244 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2245 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2246 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2247 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2248 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2249 2250 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2251 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2252 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2253 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2254 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2255 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2256 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2257 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2258 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2259 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2260 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2261 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2262 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2263 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2264 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2265 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2266 2267 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2268} 2269 2270static void gaudi_init_e2e(struct hl_device *hdev) 2271{ 2272 if (hdev->asic_prop.fw_security_enabled) 2273 return; 2274 2275 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2276 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2277 return; 2278 2279 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2280 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2281 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2282 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2283 2284 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2285 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2286 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2287 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2288 2289 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2290 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2291 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2292 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2293 2294 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2295 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2296 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2297 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2298 2299 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2300 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2301 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2302 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2303 2304 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2305 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2306 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2307 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2308 2309 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2310 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2311 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2312 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2313 2314 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2315 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2316 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2317 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2318 2319 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2320 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2321 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2322 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2323 2324 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2325 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2326 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2327 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2328 2329 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2330 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2331 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2332 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2333 2334 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2335 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2336 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2337 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2338 2339 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2340 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2341 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2342 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2343 2344 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2345 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2346 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2347 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2348 2349 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2350 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2351 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2352 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2353 2354 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2355 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2356 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2357 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2358 2359 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2360 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2361 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2362 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2363 2364 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2365 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2366 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2367 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2368 2369 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2370 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2371 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2372 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2373 2374 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2375 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2376 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2377 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2378 2379 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2380 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2381 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2382 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2383 2384 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2385 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2386 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2387 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2388 2389 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2390 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2391 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2392 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2393 2394 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2395 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2396 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2397 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2398 2399 if (!hdev->dram_scrambler_enable) { 2400 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21); 2401 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22); 2402 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F); 2403 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20); 2404 2405 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21); 2406 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22); 2407 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F); 2408 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20); 2409 2410 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21); 2411 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22); 2412 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F); 2413 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20); 2414 2415 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21); 2416 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22); 2417 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F); 2418 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20); 2419 2420 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21); 2421 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22); 2422 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F); 2423 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20); 2424 2425 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21); 2426 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22); 2427 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F); 2428 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20); 2429 2430 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21); 2431 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22); 2432 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F); 2433 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20); 2434 2435 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21); 2436 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22); 2437 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F); 2438 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20); 2439 2440 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21); 2441 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22); 2442 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F); 2443 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20); 2444 2445 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21); 2446 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22); 2447 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F); 2448 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20); 2449 2450 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21); 2451 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22); 2452 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F); 2453 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20); 2454 2455 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21); 2456 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22); 2457 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F); 2458 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20); 2459 2460 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21); 2461 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22); 2462 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F); 2463 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20); 2464 2465 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21); 2466 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22); 2467 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F); 2468 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20); 2469 2470 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21); 2471 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22); 2472 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F); 2473 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20); 2474 2475 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21); 2476 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22); 2477 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F); 2478 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20); 2479 2480 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21); 2481 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22); 2482 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 2483 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 2484 2485 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21); 2486 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22); 2487 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 2488 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 2489 2490 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21); 2491 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22); 2492 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 2493 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 2494 2495 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21); 2496 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22); 2497 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 2498 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 2499 2500 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21); 2501 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22); 2502 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 2503 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 2504 2505 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21); 2506 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22); 2507 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 2508 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 2509 2510 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21); 2511 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22); 2512 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 2513 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 2514 2515 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21); 2516 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22); 2517 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 2518 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 2519 } 2520 2521 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2522 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2523 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2524 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2525 2526 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2527 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2528 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2529 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2530 2531 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2532 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2533 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2534 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2535 2536 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2537 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2538 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2539 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2540 2541 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2542 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2543 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2544 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2545 2546 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2547 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2548 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2549 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2550 2551 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2552 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2553 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2554 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2555 2556 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2557 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2558 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2559 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2560 2561 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2562 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2563 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2564 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2565 2566 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2567 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2568 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2569 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2570 2571 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2572 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2573 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2574 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2575 2576 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2577 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2578 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2579 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2580 2581 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2582 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2583 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2584 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2585 2586 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2587 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2588 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2589 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2590 2591 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2592 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2593 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2594 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2595 2596 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2597 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2598 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2599 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2600 2601 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2602 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2603 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2604 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2605 2606 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2607 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2608 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2609 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2610 2611 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2612 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2613 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2614 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2615 2616 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2617 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2618 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2619 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2620 2621 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2622 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2623 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2624 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2625 2626 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2627 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2628 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2629 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2630 2631 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2632 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2633 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2634 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2635 2636 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2637 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2638 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2639 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2640} 2641 2642static void gaudi_init_hbm_cred(struct hl_device *hdev) 2643{ 2644 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2645 2646 if (hdev->asic_prop.fw_security_enabled) 2647 return; 2648 2649 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2650 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2651 return; 2652 2653 hbm0_wr = 0x33333333; 2654 hbm0_rd = 0x77777777; 2655 hbm1_wr = 0x55555555; 2656 hbm1_rd = 0xDDDDDDDD; 2657 2658 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2659 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2660 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2661 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2662 2663 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2664 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2665 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2666 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2667 2668 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2669 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2670 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2671 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2672 2673 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2674 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2675 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2676 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2677 2678 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2679 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2680 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2681 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2682 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2683 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2684 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2685 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2686 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2687 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2688 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2689 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2690 2691 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2692 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2693 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2694 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2695 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2696 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2697 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2698 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2699 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2700 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2701 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2702 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2703} 2704 2705static void gaudi_init_golden_registers(struct hl_device *hdev) 2706{ 2707 u32 tpc_offset; 2708 int tpc_id, i; 2709 2710 gaudi_init_e2e(hdev); 2711 gaudi_init_hbm_cred(hdev); 2712 2713 for (tpc_id = 0, tpc_offset = 0; 2714 tpc_id < TPC_NUMBER_OF_ENGINES; 2715 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2716 /* Mask all arithmetic interrupts from TPC */ 2717 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2718 /* Set 16 cache lines */ 2719 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2720 ICACHE_FETCH_LINE_NUM, 2); 2721 } 2722 2723 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2724 for (i = 0 ; i < 128 ; i += 8) 2725 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2726 2727 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2728 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2729 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2730 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2731} 2732 2733static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2734 int qman_id, dma_addr_t qman_pq_addr) 2735{ 2736 struct cpu_dyn_regs *dyn_regs = 2737 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2738 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2739 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2740 u32 q_off, dma_qm_offset; 2741 u32 dma_qm_err_cfg, irq_handler_offset; 2742 2743 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2744 2745 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2747 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2748 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2749 so_base_en_lo = lower_32_bits(CFG_BASE + 2750 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2751 so_base_en_hi = upper_32_bits(CFG_BASE + 2752 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2753 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2754 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2755 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2756 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2757 so_base_ws_lo = lower_32_bits(CFG_BASE + 2758 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2759 so_base_ws_hi = upper_32_bits(CFG_BASE + 2760 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2761 2762 q_off = dma_qm_offset + qman_id * 4; 2763 2764 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2765 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2766 2767 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2768 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2769 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2770 2771 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2772 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2773 QMAN_LDMA_SRC_OFFSET); 2774 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2775 QMAN_LDMA_DST_OFFSET); 2776 2777 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2778 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2779 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2780 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2781 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2782 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2783 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2784 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2785 2786 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2787 2788 /* The following configuration is needed only once per QMAN */ 2789 if (qman_id == 0) { 2790 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2791 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2792 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2793 2794 /* Configure RAZWI IRQ */ 2795 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2796 if (hdev->stop_on_err) 2797 dma_qm_err_cfg |= 2798 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2799 2800 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2801 2802 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2803 lower_32_bits(CFG_BASE + irq_handler_offset)); 2804 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2805 upper_32_bits(CFG_BASE + irq_handler_offset)); 2806 2807 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2808 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2809 dma_id); 2810 2811 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2812 QM_ARB_ERR_MSG_EN_MASK); 2813 2814 /* Set timeout to maximum */ 2815 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2816 2817 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2818 QMAN_EXTERNAL_MAKE_TRUSTED); 2819 2820 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2821 } 2822} 2823 2824static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2825{ 2826 struct cpu_dyn_regs *dyn_regs = 2827 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2828 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2829 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2830 u32 irq_handler_offset; 2831 2832 /* Set to maximum possible according to physical size */ 2833 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2834 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2835 2836 /* WA for H/W bug H3-2116 */ 2837 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2838 2839 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2840 if (hdev->stop_on_err) 2841 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2842 2843 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2844 2845 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2846 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2847 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2848 2849 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2850 lower_32_bits(CFG_BASE + irq_handler_offset)); 2851 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2852 upper_32_bits(CFG_BASE + irq_handler_offset)); 2853 2854 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2855 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2856 WREG32(mmDMA0_CORE_PROT + dma_offset, 2857 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2858 /* If the channel is secured, it should be in MMU bypass mode */ 2859 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2860 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2861 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2862} 2863 2864static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2865 u32 enable_mask) 2866{ 2867 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2868 2869 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2870} 2871 2872static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2873{ 2874 struct gaudi_device *gaudi = hdev->asic_specific; 2875 struct hl_hw_queue *q; 2876 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2877 2878 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2879 return; 2880 2881 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2882 dma_id = gaudi_dma_assignment[i]; 2883 /* 2884 * For queues after the CPU Q need to add 1 to get the correct 2885 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2886 * order to get the correct MSI register. 2887 */ 2888 if (dma_id > 1) { 2889 cpu_skip = 1; 2890 nic_skip = NIC_NUMBER_OF_ENGINES; 2891 } else { 2892 cpu_skip = 0; 2893 nic_skip = 0; 2894 } 2895 2896 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2897 q_idx = 4 * dma_id + j + cpu_skip; 2898 q = &hdev->kernel_queues[q_idx]; 2899 q->cq_id = cq_id++; 2900 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2901 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2902 q->bus_address); 2903 } 2904 2905 gaudi_init_dma_core(hdev, dma_id); 2906 2907 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2908 } 2909 2910 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2911} 2912 2913static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2914 int qman_id, u64 qman_base_addr) 2915{ 2916 struct cpu_dyn_regs *dyn_regs = 2917 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2918 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2919 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2920 u32 dma_qm_err_cfg, irq_handler_offset; 2921 u32 q_off, dma_qm_offset; 2922 2923 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2924 2925 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2926 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2927 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2928 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2929 so_base_en_lo = lower_32_bits(CFG_BASE + 2930 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2931 so_base_en_hi = upper_32_bits(CFG_BASE + 2932 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2933 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2934 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2935 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2936 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2937 so_base_ws_lo = lower_32_bits(CFG_BASE + 2938 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2939 so_base_ws_hi = upper_32_bits(CFG_BASE + 2940 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2941 2942 q_off = dma_qm_offset + qman_id * 4; 2943 2944 if (qman_id < 4) { 2945 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2946 lower_32_bits(qman_base_addr)); 2947 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2948 upper_32_bits(qman_base_addr)); 2949 2950 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2951 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2952 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2953 2954 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2955 QMAN_CPDMA_SIZE_OFFSET); 2956 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2957 QMAN_CPDMA_SRC_OFFSET); 2958 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2959 QMAN_CPDMA_DST_OFFSET); 2960 } else { 2961 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2962 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2963 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2964 2965 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2966 QMAN_LDMA_SIZE_OFFSET); 2967 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2968 QMAN_LDMA_SRC_OFFSET); 2969 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2970 QMAN_LDMA_DST_OFFSET); 2971 2972 /* Configure RAZWI IRQ */ 2973 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2974 if (hdev->stop_on_err) 2975 dma_qm_err_cfg |= 2976 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2977 2978 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2979 2980 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2981 lower_32_bits(CFG_BASE + irq_handler_offset)); 2982 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2983 upper_32_bits(CFG_BASE + irq_handler_offset)); 2984 2985 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2986 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2987 dma_id); 2988 2989 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2990 QM_ARB_ERR_MSG_EN_MASK); 2991 2992 /* Set timeout to maximum */ 2993 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2994 2995 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2996 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2997 QMAN_INTERNAL_MAKE_TRUSTED); 2998 } 2999 3000 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3001 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3002 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3003 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3004 3005 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 3006 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 3007 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3008 mtr_base_ws_lo); 3009 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3010 mtr_base_ws_hi); 3011 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3012 so_base_ws_lo); 3013 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3014 so_base_ws_hi); 3015 } 3016} 3017 3018static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 3019{ 3020 struct gaudi_device *gaudi = hdev->asic_specific; 3021 struct gaudi_internal_qman_info *q; 3022 u64 qman_base_addr; 3023 int i, j, dma_id, internal_q_index; 3024 3025 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 3026 return; 3027 3028 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 3029 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 3030 3031 for (j = 0 ; j < QMAN_STREAMS ; j++) { 3032 /* 3033 * Add the CPU queue in order to get the correct queue 3034 * number as all internal queue are placed after it 3035 */ 3036 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 3037 3038 q = &gaudi->internal_qmans[internal_q_index]; 3039 qman_base_addr = (u64) q->pq_dma_addr; 3040 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 3041 qman_base_addr); 3042 } 3043 3044 /* Initializing lower CP for HBM DMA QMAN */ 3045 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 3046 3047 gaudi_init_dma_core(hdev, dma_id); 3048 3049 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 3050 } 3051 3052 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 3053} 3054 3055static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 3056 int qman_id, u64 qman_base_addr) 3057{ 3058 struct cpu_dyn_regs *dyn_regs = 3059 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3060 u32 mtr_base_lo, mtr_base_hi; 3061 u32 so_base_lo, so_base_hi; 3062 u32 irq_handler_offset; 3063 u32 q_off, mme_id; 3064 u32 mme_qm_err_cfg; 3065 3066 mtr_base_lo = lower_32_bits(CFG_BASE + 3067 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3068 mtr_base_hi = upper_32_bits(CFG_BASE + 3069 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3070 so_base_lo = lower_32_bits(CFG_BASE + 3071 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3072 so_base_hi = upper_32_bits(CFG_BASE + 3073 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3074 3075 q_off = mme_offset + qman_id * 4; 3076 3077 if (qman_id < 4) { 3078 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 3079 lower_32_bits(qman_base_addr)); 3080 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 3081 upper_32_bits(qman_base_addr)); 3082 3083 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 3084 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 3085 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 3086 3087 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3088 QMAN_CPDMA_SIZE_OFFSET); 3089 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3090 QMAN_CPDMA_SRC_OFFSET); 3091 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3092 QMAN_CPDMA_DST_OFFSET); 3093 } else { 3094 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3095 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3096 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 3097 3098 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3099 QMAN_LDMA_SIZE_OFFSET); 3100 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3101 QMAN_LDMA_SRC_OFFSET); 3102 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3103 QMAN_LDMA_DST_OFFSET); 3104 3105 /* Configure RAZWI IRQ */ 3106 mme_id = mme_offset / 3107 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 3108 3109 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3110 if (hdev->stop_on_err) 3111 mme_qm_err_cfg |= 3112 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3113 3114 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 3115 3116 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 3117 lower_32_bits(CFG_BASE + irq_handler_offset)); 3118 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 3119 upper_32_bits(CFG_BASE + irq_handler_offset)); 3120 3121 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 3122 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 3123 mme_id); 3124 3125 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 3126 QM_ARB_ERR_MSG_EN_MASK); 3127 3128 /* Set timeout to maximum */ 3129 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 3130 3131 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 3132 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 3133 QMAN_INTERNAL_MAKE_TRUSTED); 3134 } 3135 3136 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 3137 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 3138 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 3139 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 3140} 3141 3142static void gaudi_init_mme_qmans(struct hl_device *hdev) 3143{ 3144 struct gaudi_device *gaudi = hdev->asic_specific; 3145 struct gaudi_internal_qman_info *q; 3146 u64 qman_base_addr; 3147 u32 mme_offset; 3148 int i, internal_q_index; 3149 3150 if (gaudi->hw_cap_initialized & HW_CAP_MME) 3151 return; 3152 3153 /* 3154 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 3155 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 3156 */ 3157 3158 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3159 3160 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 3161 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 3162 q = &gaudi->internal_qmans[internal_q_index]; 3163 qman_base_addr = (u64) q->pq_dma_addr; 3164 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 3165 qman_base_addr); 3166 if (i == 3) 3167 mme_offset = 0; 3168 } 3169 3170 /* Initializing lower CP for MME QMANs */ 3171 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3172 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 3173 gaudi_init_mme_qman(hdev, 0, 4, 0); 3174 3175 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3176 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3177 3178 gaudi->hw_cap_initialized |= HW_CAP_MME; 3179} 3180 3181static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 3182 int qman_id, u64 qman_base_addr) 3183{ 3184 struct cpu_dyn_regs *dyn_regs = 3185 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3186 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3187 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3188 u32 tpc_qm_err_cfg, irq_handler_offset; 3189 u32 q_off, tpc_id; 3190 3191 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3192 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3193 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3194 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3195 so_base_en_lo = lower_32_bits(CFG_BASE + 3196 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3197 so_base_en_hi = upper_32_bits(CFG_BASE + 3198 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3199 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3200 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3201 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3202 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3203 so_base_ws_lo = lower_32_bits(CFG_BASE + 3204 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3205 so_base_ws_hi = upper_32_bits(CFG_BASE + 3206 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3207 3208 q_off = tpc_offset + qman_id * 4; 3209 3210 tpc_id = tpc_offset / 3211 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3212 3213 if (qman_id < 4) { 3214 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3215 lower_32_bits(qman_base_addr)); 3216 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3217 upper_32_bits(qman_base_addr)); 3218 3219 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3220 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3221 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3222 3223 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3224 QMAN_CPDMA_SIZE_OFFSET); 3225 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3226 QMAN_CPDMA_SRC_OFFSET); 3227 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3228 QMAN_CPDMA_DST_OFFSET); 3229 } else { 3230 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3231 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3232 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3233 3234 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3235 QMAN_LDMA_SIZE_OFFSET); 3236 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3237 QMAN_LDMA_SRC_OFFSET); 3238 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3239 QMAN_LDMA_DST_OFFSET); 3240 3241 /* Configure RAZWI IRQ */ 3242 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3243 if (hdev->stop_on_err) 3244 tpc_qm_err_cfg |= 3245 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3246 3247 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3248 3249 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3250 lower_32_bits(CFG_BASE + irq_handler_offset)); 3251 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3252 upper_32_bits(CFG_BASE + irq_handler_offset)); 3253 3254 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3255 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3256 tpc_id); 3257 3258 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3259 QM_ARB_ERR_MSG_EN_MASK); 3260 3261 /* Set timeout to maximum */ 3262 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3263 3264 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3265 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3266 QMAN_INTERNAL_MAKE_TRUSTED); 3267 } 3268 3269 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3270 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3271 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3272 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3273 3274 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3275 if (tpc_id == 6) { 3276 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3277 mtr_base_ws_lo); 3278 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3279 mtr_base_ws_hi); 3280 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3281 so_base_ws_lo); 3282 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3283 so_base_ws_hi); 3284 } 3285} 3286 3287static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3288{ 3289 struct gaudi_device *gaudi = hdev->asic_specific; 3290 struct gaudi_internal_qman_info *q; 3291 u64 qman_base_addr; 3292 u32 so_base_hi, tpc_offset = 0; 3293 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3294 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3295 int i, tpc_id, internal_q_index; 3296 3297 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3298 return; 3299 3300 so_base_hi = upper_32_bits(CFG_BASE + 3301 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3302 3303 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3304 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3305 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3306 tpc_id * QMAN_STREAMS + i; 3307 q = &gaudi->internal_qmans[internal_q_index]; 3308 qman_base_addr = (u64) q->pq_dma_addr; 3309 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3310 qman_base_addr); 3311 3312 if (i == 3) { 3313 /* Initializing lower CP for TPC QMAN */ 3314 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3315 3316 /* Enable the QMAN and TPC channel */ 3317 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3318 QMAN_TPC_ENABLE); 3319 } 3320 } 3321 3322 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3323 so_base_hi); 3324 3325 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3326 3327 gaudi->hw_cap_initialized |= 3328 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3329 } 3330} 3331 3332static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3333 int qman_id, u64 qman_base_addr, int nic_id) 3334{ 3335 struct cpu_dyn_regs *dyn_regs = 3336 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3337 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3338 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3339 u32 nic_qm_err_cfg, irq_handler_offset; 3340 u32 q_off; 3341 3342 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3343 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3344 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3345 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3346 so_base_en_lo = lower_32_bits(CFG_BASE + 3347 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3348 so_base_en_hi = upper_32_bits(CFG_BASE + 3349 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3350 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3351 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3352 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3353 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3354 so_base_ws_lo = lower_32_bits(CFG_BASE + 3355 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3356 so_base_ws_hi = upper_32_bits(CFG_BASE + 3357 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3358 3359 q_off = nic_offset + qman_id * 4; 3360 3361 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3362 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3363 3364 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3365 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3366 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3367 3368 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3369 QMAN_LDMA_SIZE_OFFSET); 3370 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3371 QMAN_LDMA_SRC_OFFSET); 3372 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3373 QMAN_LDMA_DST_OFFSET); 3374 3375 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3376 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3377 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3378 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3379 3380 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3381 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3382 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3383 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3384 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3385 3386 if (qman_id == 0) { 3387 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3388 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3389 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3390 3391 /* Configure RAZWI IRQ */ 3392 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3393 if (hdev->stop_on_err) 3394 nic_qm_err_cfg |= 3395 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3396 3397 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3398 3399 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3400 lower_32_bits(CFG_BASE + irq_handler_offset)); 3401 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3402 upper_32_bits(CFG_BASE + irq_handler_offset)); 3403 3404 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3405 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3406 nic_id); 3407 3408 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3409 QM_ARB_ERR_MSG_EN_MASK); 3410 3411 /* Set timeout to maximum */ 3412 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3413 3414 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3415 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3416 QMAN_INTERNAL_MAKE_TRUSTED); 3417 } 3418} 3419 3420static void gaudi_init_nic_qmans(struct hl_device *hdev) 3421{ 3422 struct gaudi_device *gaudi = hdev->asic_specific; 3423 struct gaudi_internal_qman_info *q; 3424 u64 qman_base_addr; 3425 u32 nic_offset = 0; 3426 u32 nic_delta_between_qmans = 3427 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3428 u32 nic_delta_between_nics = 3429 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3430 int i, nic_id, internal_q_index; 3431 3432 if (!hdev->nic_ports_mask) 3433 return; 3434 3435 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3436 return; 3437 3438 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3439 3440 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3441 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3442 nic_offset += nic_delta_between_qmans; 3443 if (nic_id & 1) { 3444 nic_offset -= (nic_delta_between_qmans * 2); 3445 nic_offset += nic_delta_between_nics; 3446 } 3447 continue; 3448 } 3449 3450 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3451 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3452 nic_id * QMAN_STREAMS + i; 3453 q = &gaudi->internal_qmans[internal_q_index]; 3454 qman_base_addr = (u64) q->pq_dma_addr; 3455 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3456 qman_base_addr, nic_id); 3457 } 3458 3459 /* Enable the QMAN */ 3460 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3461 3462 nic_offset += nic_delta_between_qmans; 3463 if (nic_id & 1) { 3464 nic_offset -= (nic_delta_between_qmans * 2); 3465 nic_offset += nic_delta_between_nics; 3466 } 3467 3468 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3469 } 3470} 3471 3472static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3473{ 3474 struct gaudi_device *gaudi = hdev->asic_specific; 3475 3476 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3477 return; 3478 3479 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3480 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3481 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3482} 3483 3484static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3485{ 3486 struct gaudi_device *gaudi = hdev->asic_specific; 3487 3488 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3489 return; 3490 3491 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3492 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3493 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3494 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3495 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3496} 3497 3498static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3499{ 3500 struct gaudi_device *gaudi = hdev->asic_specific; 3501 3502 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3503 return; 3504 3505 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3506 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3507} 3508 3509static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3510{ 3511 struct gaudi_device *gaudi = hdev->asic_specific; 3512 u32 tpc_offset = 0; 3513 int tpc_id; 3514 3515 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3516 return; 3517 3518 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3519 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3520 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3521 } 3522} 3523 3524static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3525{ 3526 struct gaudi_device *gaudi = hdev->asic_specific; 3527 u32 nic_mask, nic_offset = 0; 3528 u32 nic_delta_between_qmans = 3529 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3530 u32 nic_delta_between_nics = 3531 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3532 int nic_id; 3533 3534 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3535 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3536 3537 if (gaudi->hw_cap_initialized & nic_mask) 3538 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3539 3540 nic_offset += nic_delta_between_qmans; 3541 if (nic_id & 1) { 3542 nic_offset -= (nic_delta_between_qmans * 2); 3543 nic_offset += nic_delta_between_nics; 3544 } 3545 } 3546} 3547 3548static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3549{ 3550 struct gaudi_device *gaudi = hdev->asic_specific; 3551 3552 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3553 return; 3554 3555 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3556 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3557 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3558 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3559} 3560 3561static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3562{ 3563 struct gaudi_device *gaudi = hdev->asic_specific; 3564 3565 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3566 return; 3567 3568 /* Stop CPs of HBM DMA QMANs */ 3569 3570 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3571 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3572 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3573 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3574 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3575} 3576 3577static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3578{ 3579 struct gaudi_device *gaudi = hdev->asic_specific; 3580 3581 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3582 return; 3583 3584 /* Stop CPs of MME QMANs */ 3585 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3586 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3587} 3588 3589static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3590{ 3591 struct gaudi_device *gaudi = hdev->asic_specific; 3592 3593 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3594 return; 3595 3596 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3597 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3598 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3599 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3600 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3601 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3602 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3603 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3604} 3605 3606static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3607{ 3608 struct gaudi_device *gaudi = hdev->asic_specific; 3609 3610 /* Stop upper CPs of QMANs */ 3611 3612 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3613 WREG32(mmNIC0_QM0_GLBL_CFG1, 3614 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3615 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3616 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3617 3618 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3619 WREG32(mmNIC0_QM1_GLBL_CFG1, 3620 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3621 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3622 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3623 3624 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3625 WREG32(mmNIC1_QM0_GLBL_CFG1, 3626 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3627 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3628 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3629 3630 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3631 WREG32(mmNIC1_QM1_GLBL_CFG1, 3632 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3633 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3634 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3635 3636 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3637 WREG32(mmNIC2_QM0_GLBL_CFG1, 3638 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3639 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3640 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3641 3642 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3643 WREG32(mmNIC2_QM1_GLBL_CFG1, 3644 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3645 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3646 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3647 3648 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3649 WREG32(mmNIC3_QM0_GLBL_CFG1, 3650 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3651 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3652 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3653 3654 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3655 WREG32(mmNIC3_QM1_GLBL_CFG1, 3656 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3657 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3658 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3659 3660 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3661 WREG32(mmNIC4_QM0_GLBL_CFG1, 3662 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3663 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3664 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3665 3666 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3667 WREG32(mmNIC4_QM1_GLBL_CFG1, 3668 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3669 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3670 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3671} 3672 3673static void gaudi_pci_dma_stall(struct hl_device *hdev) 3674{ 3675 struct gaudi_device *gaudi = hdev->asic_specific; 3676 3677 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3678 return; 3679 3680 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3681 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3682 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3683} 3684 3685static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3686{ 3687 struct gaudi_device *gaudi = hdev->asic_specific; 3688 3689 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3690 return; 3691 3692 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3693 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3694 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3695 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3696 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3697} 3698 3699static void gaudi_mme_stall(struct hl_device *hdev) 3700{ 3701 struct gaudi_device *gaudi = hdev->asic_specific; 3702 3703 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3704 return; 3705 3706 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3707 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3708 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3709 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3710 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3711 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3712 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3713 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3714 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3715 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3716 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3717 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3718 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3719 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3720 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3721 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3722 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3723} 3724 3725static void gaudi_tpc_stall(struct hl_device *hdev) 3726{ 3727 struct gaudi_device *gaudi = hdev->asic_specific; 3728 3729 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3730 return; 3731 3732 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3733 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3734 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3735 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3736 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3737 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3738 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3739 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3740} 3741 3742static void gaudi_disable_clock_gating(struct hl_device *hdev) 3743{ 3744 u32 qman_offset; 3745 int i; 3746 3747 if (hdev->asic_prop.fw_security_enabled) 3748 return; 3749 3750 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3751 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3752 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3753 3754 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3755 } 3756 3757 WREG32(mmMME0_QM_CGM_CFG, 0); 3758 WREG32(mmMME0_QM_CGM_CFG1, 0); 3759 WREG32(mmMME2_QM_CGM_CFG, 0); 3760 WREG32(mmMME2_QM_CGM_CFG1, 0); 3761 3762 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3763 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3764 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3765 3766 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3767 } 3768} 3769 3770static void gaudi_enable_timestamp(struct hl_device *hdev) 3771{ 3772 /* Disable the timestamp counter */ 3773 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3774 3775 /* Zero the lower/upper parts of the 64-bit counter */ 3776 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3777 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3778 3779 /* Enable the counter */ 3780 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3781} 3782 3783static void gaudi_disable_timestamp(struct hl_device *hdev) 3784{ 3785 /* Disable the timestamp counter */ 3786 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3787} 3788 3789static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3790{ 3791 u32 wait_timeout_ms; 3792 3793 if (hdev->pldm) 3794 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3795 else 3796 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3797 3798 if (fw_reset) 3799 goto skip_engines; 3800 3801 gaudi_stop_nic_qmans(hdev); 3802 gaudi_stop_mme_qmans(hdev); 3803 gaudi_stop_tpc_qmans(hdev); 3804 gaudi_stop_hbm_dma_qmans(hdev); 3805 gaudi_stop_pci_dma_qmans(hdev); 3806 3807 msleep(wait_timeout_ms); 3808 3809 gaudi_pci_dma_stall(hdev); 3810 gaudi_hbm_dma_stall(hdev); 3811 gaudi_tpc_stall(hdev); 3812 gaudi_mme_stall(hdev); 3813 3814 msleep(wait_timeout_ms); 3815 3816 gaudi_disable_nic_qmans(hdev); 3817 gaudi_disable_mme_qmans(hdev); 3818 gaudi_disable_tpc_qmans(hdev); 3819 gaudi_disable_hbm_dma_qmans(hdev); 3820 gaudi_disable_pci_dma_qmans(hdev); 3821 3822 gaudi_disable_timestamp(hdev); 3823 3824skip_engines: 3825 gaudi_disable_msi(hdev); 3826} 3827 3828static int gaudi_mmu_init(struct hl_device *hdev) 3829{ 3830 struct asic_fixed_properties *prop = &hdev->asic_prop; 3831 struct gaudi_device *gaudi = hdev->asic_specific; 3832 u64 hop0_addr; 3833 int rc, i; 3834 3835 if (!hdev->mmu_enable) 3836 return 0; 3837 3838 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3839 return 0; 3840 3841 for (i = 0 ; i < prop->max_asid ; i++) { 3842 hop0_addr = prop->mmu_pgt_addr + 3843 (i * prop->mmu_hop_table_size); 3844 3845 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3846 if (rc) { 3847 dev_err(hdev->dev, 3848 "failed to set hop0 addr for asid %d\n", i); 3849 goto err; 3850 } 3851 } 3852 3853 /* init MMU cache manage page */ 3854 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8); 3855 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40); 3856 3857 /* mem cache invalidation */ 3858 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3859 3860 hl_mmu_invalidate_cache(hdev, true, 0); 3861 3862 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3863 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3864 3865 WREG32(mmSTLB_HOP_CONFIGURATION, 3866 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440); 3867 3868 /* 3869 * The H/W expects the first PI after init to be 1. After wraparound 3870 * we'll write 0. 3871 */ 3872 gaudi->mmu_cache_inv_pi = 1; 3873 3874 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3875 3876 return 0; 3877 3878err: 3879 return rc; 3880} 3881 3882static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3883{ 3884 void __iomem *dst; 3885 3886 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3887 3888 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3889} 3890 3891static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3892{ 3893 void __iomem *dst; 3894 3895 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3896 3897 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3898} 3899 3900static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3901{ 3902 struct dynamic_fw_load_mgr *dynamic_loader; 3903 struct cpu_dyn_regs *dyn_regs; 3904 3905 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3906 3907 /* 3908 * here we update initial values for few specific dynamic regs (as 3909 * before reading the first descriptor from FW those value has to be 3910 * hard-coded) in later stages of the protocol those values will be 3911 * updated automatically by reading the FW descriptor so data there 3912 * will always be up-to-date 3913 */ 3914 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3915 dyn_regs->kmd_msg_to_cpu = 3916 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3917 dyn_regs->cpu_cmd_status_to_host = 3918 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3919 3920 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3921} 3922 3923static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3924{ 3925 struct static_fw_load_mgr *static_loader; 3926 3927 static_loader = &hdev->fw_loader.static_loader; 3928 3929 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3930 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3931 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3932 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3933 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3934 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3935 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3936 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3937 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3938 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3939 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3940 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3941 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3942 GAUDI_PLDM_RESET_WAIT_MSEC : 3943 GAUDI_CPU_RESET_WAIT_MSEC; 3944} 3945 3946static void gaudi_init_firmware_loader(struct hl_device *hdev) 3947{ 3948 struct asic_fixed_properties *prop = &hdev->asic_prop; 3949 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3950 3951 /* fill common fields */ 3952 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3953 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3954 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3955 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3956 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3957 fw_loader->skip_bmc = !hdev->bmc_enable; 3958 fw_loader->sram_bar_id = SRAM_BAR_ID; 3959 fw_loader->dram_bar_id = HBM_BAR_ID; 3960 3961 if (prop->dynamic_fw_load) 3962 gaudi_init_dynamic_firmware_loader(hdev); 3963 else 3964 gaudi_init_static_firmware_loader(hdev); 3965} 3966 3967static int gaudi_init_cpu(struct hl_device *hdev) 3968{ 3969 struct gaudi_device *gaudi = hdev->asic_specific; 3970 int rc; 3971 3972 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3973 return 0; 3974 3975 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3976 return 0; 3977 3978 /* 3979 * The device CPU works with 40 bits addresses. 3980 * This register sets the extension to 50 bits. 3981 */ 3982 if (!hdev->asic_prop.fw_security_enabled) 3983 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3984 3985 rc = hl_fw_init_cpu(hdev); 3986 3987 if (rc) 3988 return rc; 3989 3990 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3991 3992 return 0; 3993} 3994 3995static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3996{ 3997 struct cpu_dyn_regs *dyn_regs = 3998 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3999 struct asic_fixed_properties *prop = &hdev->asic_prop; 4000 struct gaudi_device *gaudi = hdev->asic_specific; 4001 u32 status, irq_handler_offset; 4002 struct hl_eq *eq; 4003 struct hl_hw_queue *cpu_pq = 4004 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 4005 int err; 4006 4007 if (!hdev->cpu_queues_enable) 4008 return 0; 4009 4010 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4011 return 0; 4012 4013 eq = &hdev->event_queue; 4014 4015 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4016 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4017 4018 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4019 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4020 4021 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 4022 lower_32_bits(hdev->cpu_accessible_dma_address)); 4023 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 4024 upper_32_bits(hdev->cpu_accessible_dma_address)); 4025 4026 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4027 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4028 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4029 4030 /* Used for EQ CI */ 4031 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4032 4033 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4034 4035 if (gaudi->multi_msi_mode) 4036 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4037 else 4038 WREG32(mmCPU_IF_QUEUE_INIT, 4039 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 4040 4041 irq_handler_offset = prop->gic_interrupts_enable ? 4042 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4043 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4044 4045 WREG32(irq_handler_offset, 4046 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4047 4048 err = hl_poll_timeout( 4049 hdev, 4050 mmCPU_IF_QUEUE_INIT, 4051 status, 4052 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4053 1000, 4054 cpu_timeout); 4055 4056 if (err) { 4057 dev_err(hdev->dev, 4058 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 4059 return -EIO; 4060 } 4061 4062 /* update FW application security bits */ 4063 if (prop->fw_cpu_boot_dev_sts0_valid) 4064 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4065 if (prop->fw_cpu_boot_dev_sts1_valid) 4066 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4067 4068 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 4069 return 0; 4070} 4071 4072static void gaudi_pre_hw_init(struct hl_device *hdev) 4073{ 4074 /* Perform read from the device to make sure device is up */ 4075 RREG32(mmHW_STATE); 4076 4077 if (!hdev->asic_prop.fw_security_enabled) { 4078 /* Set the access through PCI bars (Linux driver only) as 4079 * secured 4080 */ 4081 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 4082 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 4083 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 4084 4085 /* Perform read to flush the waiting writes to ensure 4086 * configuration was set in the device 4087 */ 4088 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 4089 } 4090 4091 /* 4092 * Let's mark in the H/W that we have reached this point. We check 4093 * this value in the reset_before_init function to understand whether 4094 * we need to reset the chip before doing H/W init. This register is 4095 * cleared by the H/W upon H/W reset 4096 */ 4097 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 4098} 4099 4100static int gaudi_hw_init(struct hl_device *hdev) 4101{ 4102 struct gaudi_device *gaudi = hdev->asic_specific; 4103 int rc; 4104 4105 gaudi_pre_hw_init(hdev); 4106 4107 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 4108 * So we set it here and if anyone tries to move it later to 4109 * a different address, there will be an error 4110 */ 4111 if (hdev->asic_prop.iatu_done_by_fw) 4112 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 4113 4114 /* 4115 * Before pushing u-boot/linux to device, need to set the hbm bar to 4116 * base address of dram 4117 */ 4118 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 4119 dev_err(hdev->dev, 4120 "failed to map HBM bar to DRAM base address\n"); 4121 return -EIO; 4122 } 4123 4124 rc = gaudi_init_cpu(hdev); 4125 if (rc) { 4126 dev_err(hdev->dev, "failed to initialize CPU\n"); 4127 return rc; 4128 } 4129 4130 /* In case the clock gating was enabled in preboot we need to disable 4131 * it here before touching the MME/TPC registers. 4132 */ 4133 gaudi_disable_clock_gating(hdev); 4134 4135 /* SRAM scrambler must be initialized after CPU is running from HBM */ 4136 gaudi_init_scrambler_sram(hdev); 4137 4138 /* This is here just in case we are working without CPU */ 4139 gaudi_init_scrambler_hbm(hdev); 4140 4141 gaudi_init_golden_registers(hdev); 4142 4143 rc = gaudi_mmu_init(hdev); 4144 if (rc) 4145 return rc; 4146 4147 gaudi_init_security(hdev); 4148 4149 gaudi_init_pci_dma_qmans(hdev); 4150 4151 gaudi_init_hbm_dma_qmans(hdev); 4152 4153 gaudi_init_mme_qmans(hdev); 4154 4155 gaudi_init_tpc_qmans(hdev); 4156 4157 gaudi_init_nic_qmans(hdev); 4158 4159 gaudi_enable_timestamp(hdev); 4160 4161 /* MSI must be enabled before CPU queues and NIC are initialized */ 4162 rc = gaudi_enable_msi(hdev); 4163 if (rc) 4164 goto disable_queues; 4165 4166 /* must be called after MSI was enabled */ 4167 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 4168 if (rc) { 4169 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 4170 rc); 4171 goto disable_msi; 4172 } 4173 4174 /* Perform read from the device to flush all configuration */ 4175 RREG32(mmHW_STATE); 4176 4177 return 0; 4178 4179disable_msi: 4180 gaudi_disable_msi(hdev); 4181disable_queues: 4182 gaudi_disable_mme_qmans(hdev); 4183 gaudi_disable_pci_dma_qmans(hdev); 4184 4185 return rc; 4186} 4187 4188static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4189{ 4190 struct cpu_dyn_regs *dyn_regs = 4191 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4192 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4193 struct gaudi_device *gaudi = hdev->asic_specific; 4194 bool driver_performs_reset; 4195 4196 if (!hard_reset) { 4197 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4198 return; 4199 } 4200 4201 if (hdev->pldm) { 4202 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4203 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4204 } else { 4205 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4206 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4207 } 4208 4209 if (fw_reset) { 4210 dev_dbg(hdev->dev, 4211 "Firmware performs HARD reset, going to wait %dms\n", 4212 reset_timeout_ms); 4213 4214 goto skip_reset; 4215 } 4216 4217 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4218 !hdev->asic_prop.hard_reset_done_by_fw); 4219 4220 /* Set device to handle FLR by H/W as we will put the device CPU to 4221 * halt mode 4222 */ 4223 if (driver_performs_reset) 4224 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4225 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4226 4227 /* If linux is loaded in the device CPU we need to communicate with it 4228 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4229 * registers in case of old F/Ws 4230 */ 4231 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4232 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4233 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4234 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4235 4236 WREG32(irq_handler_offset, 4237 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4238 4239 /* This is a hail-mary attempt to revive the card in the small chance that the 4240 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4241 * In that case, triggering reset through GIC won't help. We need to trigger the 4242 * reset as if Linux wasn't loaded. 4243 * 4244 * We do it only if the reset cause was HB, because that would be the indication 4245 * of such an event. 4246 * 4247 * In case watchdog hasn't expired but we still got HB, then this won't do any 4248 * damage. 4249 */ 4250 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4251 if (hdev->asic_prop.hard_reset_done_by_fw) 4252 hl_fw_ask_hard_reset_without_linux(hdev); 4253 else 4254 hl_fw_ask_halt_machine_without_linux(hdev); 4255 } 4256 } else { 4257 if (hdev->asic_prop.hard_reset_done_by_fw) 4258 hl_fw_ask_hard_reset_without_linux(hdev); 4259 else 4260 hl_fw_ask_halt_machine_without_linux(hdev); 4261 } 4262 4263 if (driver_performs_reset) { 4264 4265 /* Configure the reset registers. Must be done as early as 4266 * possible in case we fail during H/W initialization 4267 */ 4268 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4269 (CFG_RST_H_DMA_MASK | 4270 CFG_RST_H_MME_MASK | 4271 CFG_RST_H_SM_MASK | 4272 CFG_RST_H_TPC_7_MASK)); 4273 4274 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4275 4276 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4277 (CFG_RST_H_HBM_MASK | 4278 CFG_RST_H_TPC_7_MASK | 4279 CFG_RST_H_NIC_MASK | 4280 CFG_RST_H_SM_MASK | 4281 CFG_RST_H_DMA_MASK | 4282 CFG_RST_H_MME_MASK | 4283 CFG_RST_H_CPU_MASK | 4284 CFG_RST_H_MMU_MASK)); 4285 4286 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4287 (CFG_RST_L_IF_MASK | 4288 CFG_RST_L_PSOC_MASK | 4289 CFG_RST_L_TPC_MASK)); 4290 4291 msleep(cpu_timeout_ms); 4292 4293 /* Tell ASIC not to re-initialize PCIe */ 4294 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4295 4296 /* Restart BTL/BLR upon hard-reset */ 4297 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4298 4299 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4300 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4301 4302 dev_dbg(hdev->dev, 4303 "Issued HARD reset command, going to wait %dms\n", 4304 reset_timeout_ms); 4305 } else { 4306 dev_dbg(hdev->dev, 4307 "Firmware performs HARD reset, going to wait %dms\n", 4308 reset_timeout_ms); 4309 } 4310 4311skip_reset: 4312 /* 4313 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4314 * itself is in reset. Need to wait until the reset is deasserted 4315 */ 4316 msleep(reset_timeout_ms); 4317 4318 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4319 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) 4320 dev_err(hdev->dev, 4321 "Timeout while waiting for device to reset 0x%x\n", 4322 status); 4323 4324 if (gaudi) { 4325 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4326 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4327 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4328 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4329 HW_CAP_HBM_SCRAMBLER); 4330 4331 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4332 4333 hdev->device_cpu_is_halted = false; 4334 } 4335} 4336 4337static int gaudi_suspend(struct hl_device *hdev) 4338{ 4339 int rc; 4340 4341 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); 4342 if (rc) 4343 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4344 4345 return rc; 4346} 4347 4348static int gaudi_resume(struct hl_device *hdev) 4349{ 4350 return gaudi_init_iatu(hdev); 4351} 4352 4353static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4354 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4355{ 4356 int rc; 4357 4358 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4359 VM_DONTCOPY | VM_NORESERVE; 4360 4361 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4362 (dma_addr - HOST_PHYS_BASE), size); 4363 if (rc) 4364 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4365 4366 return rc; 4367} 4368 4369static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4370{ 4371 struct cpu_dyn_regs *dyn_regs = 4372 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4373 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4374 struct gaudi_device *gaudi = hdev->asic_specific; 4375 bool invalid_queue = false; 4376 int dma_id; 4377 4378 switch (hw_queue_id) { 4379 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4380 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4381 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4382 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4383 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4384 break; 4385 4386 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4387 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4388 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4389 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4390 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4391 break; 4392 4393 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4394 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4395 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4396 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4397 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4398 break; 4399 4400 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4401 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4402 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4403 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4404 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4405 break; 4406 4407 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4408 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4409 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4410 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4411 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4412 break; 4413 4414 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4415 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4416 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4417 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4418 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4419 break; 4420 4421 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4422 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4423 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4424 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4425 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4426 break; 4427 4428 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4429 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4430 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4431 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4432 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4433 break; 4434 4435 case GAUDI_QUEUE_ID_CPU_PQ: 4436 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4437 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4438 else 4439 invalid_queue = true; 4440 break; 4441 4442 case GAUDI_QUEUE_ID_MME_0_0: 4443 db_reg_offset = mmMME2_QM_PQ_PI_0; 4444 break; 4445 4446 case GAUDI_QUEUE_ID_MME_0_1: 4447 db_reg_offset = mmMME2_QM_PQ_PI_1; 4448 break; 4449 4450 case GAUDI_QUEUE_ID_MME_0_2: 4451 db_reg_offset = mmMME2_QM_PQ_PI_2; 4452 break; 4453 4454 case GAUDI_QUEUE_ID_MME_0_3: 4455 db_reg_offset = mmMME2_QM_PQ_PI_3; 4456 break; 4457 4458 case GAUDI_QUEUE_ID_MME_1_0: 4459 db_reg_offset = mmMME0_QM_PQ_PI_0; 4460 break; 4461 4462 case GAUDI_QUEUE_ID_MME_1_1: 4463 db_reg_offset = mmMME0_QM_PQ_PI_1; 4464 break; 4465 4466 case GAUDI_QUEUE_ID_MME_1_2: 4467 db_reg_offset = mmMME0_QM_PQ_PI_2; 4468 break; 4469 4470 case GAUDI_QUEUE_ID_MME_1_3: 4471 db_reg_offset = mmMME0_QM_PQ_PI_3; 4472 break; 4473 4474 case GAUDI_QUEUE_ID_TPC_0_0: 4475 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4476 break; 4477 4478 case GAUDI_QUEUE_ID_TPC_0_1: 4479 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4480 break; 4481 4482 case GAUDI_QUEUE_ID_TPC_0_2: 4483 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4484 break; 4485 4486 case GAUDI_QUEUE_ID_TPC_0_3: 4487 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4488 break; 4489 4490 case GAUDI_QUEUE_ID_TPC_1_0: 4491 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4492 break; 4493 4494 case GAUDI_QUEUE_ID_TPC_1_1: 4495 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4496 break; 4497 4498 case GAUDI_QUEUE_ID_TPC_1_2: 4499 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4500 break; 4501 4502 case GAUDI_QUEUE_ID_TPC_1_3: 4503 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4504 break; 4505 4506 case GAUDI_QUEUE_ID_TPC_2_0: 4507 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4508 break; 4509 4510 case GAUDI_QUEUE_ID_TPC_2_1: 4511 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4512 break; 4513 4514 case GAUDI_QUEUE_ID_TPC_2_2: 4515 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4516 break; 4517 4518 case GAUDI_QUEUE_ID_TPC_2_3: 4519 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4520 break; 4521 4522 case GAUDI_QUEUE_ID_TPC_3_0: 4523 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4524 break; 4525 4526 case GAUDI_QUEUE_ID_TPC_3_1: 4527 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4528 break; 4529 4530 case GAUDI_QUEUE_ID_TPC_3_2: 4531 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4532 break; 4533 4534 case GAUDI_QUEUE_ID_TPC_3_3: 4535 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4536 break; 4537 4538 case GAUDI_QUEUE_ID_TPC_4_0: 4539 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4540 break; 4541 4542 case GAUDI_QUEUE_ID_TPC_4_1: 4543 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4544 break; 4545 4546 case GAUDI_QUEUE_ID_TPC_4_2: 4547 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4548 break; 4549 4550 case GAUDI_QUEUE_ID_TPC_4_3: 4551 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4552 break; 4553 4554 case GAUDI_QUEUE_ID_TPC_5_0: 4555 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4556 break; 4557 4558 case GAUDI_QUEUE_ID_TPC_5_1: 4559 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4560 break; 4561 4562 case GAUDI_QUEUE_ID_TPC_5_2: 4563 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4564 break; 4565 4566 case GAUDI_QUEUE_ID_TPC_5_3: 4567 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4568 break; 4569 4570 case GAUDI_QUEUE_ID_TPC_6_0: 4571 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4572 break; 4573 4574 case GAUDI_QUEUE_ID_TPC_6_1: 4575 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4576 break; 4577 4578 case GAUDI_QUEUE_ID_TPC_6_2: 4579 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4580 break; 4581 4582 case GAUDI_QUEUE_ID_TPC_6_3: 4583 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4584 break; 4585 4586 case GAUDI_QUEUE_ID_TPC_7_0: 4587 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4588 break; 4589 4590 case GAUDI_QUEUE_ID_TPC_7_1: 4591 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4592 break; 4593 4594 case GAUDI_QUEUE_ID_TPC_7_2: 4595 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4596 break; 4597 4598 case GAUDI_QUEUE_ID_TPC_7_3: 4599 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4600 break; 4601 4602 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4603 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4604 invalid_queue = true; 4605 4606 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4607 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4608 break; 4609 4610 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4611 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4612 invalid_queue = true; 4613 4614 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4615 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4616 break; 4617 4618 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4619 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4620 invalid_queue = true; 4621 4622 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4623 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4624 break; 4625 4626 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4627 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4628 invalid_queue = true; 4629 4630 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4631 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4632 break; 4633 4634 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4635 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4636 invalid_queue = true; 4637 4638 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4639 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4640 break; 4641 4642 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4643 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4644 invalid_queue = true; 4645 4646 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4647 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4648 break; 4649 4650 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4651 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4652 invalid_queue = true; 4653 4654 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4655 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4656 break; 4657 4658 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4659 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4660 invalid_queue = true; 4661 4662 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4663 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4664 break; 4665 4666 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4667 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4668 invalid_queue = true; 4669 4670 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4671 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4672 break; 4673 4674 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4675 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4676 invalid_queue = true; 4677 4678 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4679 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4680 break; 4681 4682 default: 4683 invalid_queue = true; 4684 } 4685 4686 if (invalid_queue) { 4687 /* Should never get here */ 4688 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4689 hw_queue_id); 4690 return; 4691 } 4692 4693 db_value = pi; 4694 4695 /* ring the doorbell */ 4696 WREG32(db_reg_offset, db_value); 4697 4698 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4699 /* make sure device CPU will read latest data from host */ 4700 mb(); 4701 4702 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4703 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4704 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4705 4706 WREG32(irq_handler_offset, 4707 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4708 } 4709} 4710 4711static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4712 struct hl_bd *bd) 4713{ 4714 __le64 *pbd = (__le64 *) bd; 4715 4716 /* The QMANs are on the host memory so a simple copy suffice */ 4717 pqe[0] = pbd[0]; 4718 pqe[1] = pbd[1]; 4719} 4720 4721static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4722 dma_addr_t *dma_handle, gfp_t flags) 4723{ 4724 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4725 dma_handle, flags); 4726 4727 /* Shift to the device's base physical address of host memory */ 4728 if (kernel_addr) 4729 *dma_handle += HOST_PHYS_BASE; 4730 4731 return kernel_addr; 4732} 4733 4734static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4735 void *cpu_addr, dma_addr_t dma_handle) 4736{ 4737 /* Cancel the device's base physical address of host memory */ 4738 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4739 4740 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4741} 4742 4743static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4744{ 4745 struct asic_fixed_properties *prop = &hdev->asic_prop; 4746 u64 cur_addr = DRAM_BASE_ADDR_USER; 4747 u32 chunk_size, busy; 4748 int rc, dma_id; 4749 4750 while (cur_addr < prop->dram_end_address) { 4751 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4752 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4753 4754 chunk_size = 4755 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4756 4757 dev_dbg(hdev->dev, 4758 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4759 cur_addr, cur_addr + chunk_size); 4760 4761 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4762 lower_32_bits(val)); 4763 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4764 upper_32_bits(val)); 4765 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4766 lower_32_bits(cur_addr)); 4767 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4768 upper_32_bits(cur_addr)); 4769 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4770 chunk_size); 4771 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4772 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4773 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4774 4775 cur_addr += chunk_size; 4776 4777 if (cur_addr == prop->dram_end_address) 4778 break; 4779 } 4780 4781 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4782 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4783 4784 rc = hl_poll_timeout( 4785 hdev, 4786 mmDMA0_CORE_STS0 + dma_offset, 4787 busy, 4788 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4789 1000, 4790 HBM_SCRUBBING_TIMEOUT_US); 4791 4792 if (rc) { 4793 dev_err(hdev->dev, 4794 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4795 dma_id); 4796 return -EIO; 4797 } 4798 } 4799 } 4800 4801 return 0; 4802} 4803 4804static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) 4805{ 4806 struct asic_fixed_properties *prop = &hdev->asic_prop; 4807 int rc = 0; 4808 u64 val = 0; 4809 4810 if (!hdev->memory_scrub) 4811 return 0; 4812 4813 if (!addr && !size) { 4814 /* Wait till device is idle */ 4815 rc = hl_poll_timeout( 4816 hdev, 4817 mmDMA0_CORE_STS0/* dummy */, 4818 val/* dummy */, 4819 (hdev->asic_funcs->is_device_idle(hdev, NULL, 4820 0, NULL)), 4821 1000, 4822 HBM_SCRUBBING_TIMEOUT_US); 4823 if (rc) { 4824 dev_err(hdev->dev, "waiting for idle timeout\n"); 4825 return -EIO; 4826 } 4827 4828 /* Scrub SRAM */ 4829 addr = prop->sram_user_base_address; 4830 size = hdev->pldm ? 0x10000 : 4831 (prop->sram_size - SRAM_USER_BASE_OFFSET); 4832 val = 0x7777777777777777ull; 4833 4834 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4835 if (rc) { 4836 dev_err(hdev->dev, 4837 "Failed to clear SRAM in mem scrub all\n"); 4838 return rc; 4839 } 4840 4841 /* Scrub HBM using all DMA channels in parallel */ 4842 rc = gaudi_scrub_device_dram(hdev, 0xdeadbeaf); 4843 if (rc) 4844 dev_err(hdev->dev, 4845 "Failed to clear HBM in mem scrub all\n"); 4846 } 4847 4848 return rc; 4849} 4850 4851static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4852 u32 queue_id, dma_addr_t *dma_handle, 4853 u16 *queue_len) 4854{ 4855 struct gaudi_device *gaudi = hdev->asic_specific; 4856 struct gaudi_internal_qman_info *q; 4857 4858 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4859 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4860 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4861 return NULL; 4862 } 4863 4864 q = &gaudi->internal_qmans[queue_id]; 4865 *dma_handle = q->pq_dma_addr; 4866 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4867 4868 return q->pq_kernel_addr; 4869} 4870 4871static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4872 u16 len, u32 timeout, u64 *result) 4873{ 4874 struct gaudi_device *gaudi = hdev->asic_specific; 4875 4876 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4877 if (result) 4878 *result = 0; 4879 return 0; 4880 } 4881 4882 if (!timeout) 4883 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4884 4885 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4886 timeout, result); 4887} 4888 4889static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4890{ 4891 struct packet_msg_prot *fence_pkt; 4892 dma_addr_t pkt_dma_addr; 4893 u32 fence_val, tmp, timeout_usec; 4894 dma_addr_t fence_dma_addr; 4895 u32 *fence_ptr; 4896 int rc; 4897 4898 if (hdev->pldm) 4899 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4900 else 4901 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4902 4903 fence_val = GAUDI_QMAN0_FENCE_VAL; 4904 4905 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, 4906 &fence_dma_addr); 4907 if (!fence_ptr) { 4908 dev_err(hdev->dev, 4909 "Failed to allocate memory for H/W queue %d testing\n", 4910 hw_queue_id); 4911 return -ENOMEM; 4912 } 4913 4914 *fence_ptr = 0; 4915 4916 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4917 sizeof(struct packet_msg_prot), 4918 GFP_KERNEL, &pkt_dma_addr); 4919 if (!fence_pkt) { 4920 dev_err(hdev->dev, 4921 "Failed to allocate packet for H/W queue %d testing\n", 4922 hw_queue_id); 4923 rc = -ENOMEM; 4924 goto free_fence_ptr; 4925 } 4926 4927 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4928 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4929 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4930 4931 fence_pkt->ctl = cpu_to_le32(tmp); 4932 fence_pkt->value = cpu_to_le32(fence_val); 4933 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4934 4935 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4936 sizeof(struct packet_msg_prot), 4937 pkt_dma_addr); 4938 if (rc) { 4939 dev_err(hdev->dev, 4940 "Failed to send fence packet to H/W queue %d\n", 4941 hw_queue_id); 4942 goto free_pkt; 4943 } 4944 4945 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4946 1000, timeout_usec, true); 4947 4948 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4949 4950 if (rc == -ETIMEDOUT) { 4951 dev_err(hdev->dev, 4952 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4953 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4954 rc = -EIO; 4955 } 4956 4957free_pkt: 4958 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, 4959 pkt_dma_addr); 4960free_fence_ptr: 4961 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, 4962 fence_dma_addr); 4963 return rc; 4964} 4965 4966static int gaudi_test_cpu_queue(struct hl_device *hdev) 4967{ 4968 struct gaudi_device *gaudi = hdev->asic_specific; 4969 4970 /* 4971 * check capability here as send_cpu_message() won't update the result 4972 * value if no capability 4973 */ 4974 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4975 return 0; 4976 4977 return hl_fw_test_cpu_queue(hdev); 4978} 4979 4980static int gaudi_test_queues(struct hl_device *hdev) 4981{ 4982 int i, rc, ret_val = 0; 4983 4984 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4985 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4986 rc = gaudi_test_queue(hdev, i); 4987 if (rc) 4988 ret_val = -EINVAL; 4989 } 4990 } 4991 4992 rc = gaudi_test_cpu_queue(hdev); 4993 if (rc) 4994 ret_val = -EINVAL; 4995 4996 return ret_val; 4997} 4998 4999static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 5000 gfp_t mem_flags, dma_addr_t *dma_handle) 5001{ 5002 void *kernel_addr; 5003 5004 if (size > GAUDI_DMA_POOL_BLK_SIZE) 5005 return NULL; 5006 5007 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 5008 5009 /* Shift to the device's base physical address of host memory */ 5010 if (kernel_addr) 5011 *dma_handle += HOST_PHYS_BASE; 5012 5013 return kernel_addr; 5014} 5015 5016static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 5017 dma_addr_t dma_addr) 5018{ 5019 /* Cancel the device's base physical address of host memory */ 5020 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 5021 5022 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 5023} 5024 5025static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 5026 size_t size, dma_addr_t *dma_handle) 5027{ 5028 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 5029} 5030 5031static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 5032 size_t size, void *vaddr) 5033{ 5034 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 5035} 5036 5037static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 5038{ 5039 struct scatterlist *sg, *sg_next_iter; 5040 u32 count, dma_desc_cnt; 5041 u64 len, len_next; 5042 dma_addr_t addr, addr_next; 5043 5044 dma_desc_cnt = 0; 5045 5046 for_each_sgtable_dma_sg(sgt, sg, count) { 5047 len = sg_dma_len(sg); 5048 addr = sg_dma_address(sg); 5049 5050 if (len == 0) 5051 break; 5052 5053 while ((count + 1) < sgt->nents) { 5054 sg_next_iter = sg_next(sg); 5055 len_next = sg_dma_len(sg_next_iter); 5056 addr_next = sg_dma_address(sg_next_iter); 5057 5058 if (len_next == 0) 5059 break; 5060 5061 if ((addr + len == addr_next) && 5062 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5063 len += len_next; 5064 count++; 5065 sg = sg_next_iter; 5066 } else { 5067 break; 5068 } 5069 } 5070 5071 dma_desc_cnt++; 5072 } 5073 5074 return dma_desc_cnt * sizeof(struct packet_lin_dma); 5075} 5076 5077static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 5078 struct hl_cs_parser *parser, 5079 struct packet_lin_dma *user_dma_pkt, 5080 u64 addr, enum dma_data_direction dir) 5081{ 5082 struct hl_userptr *userptr; 5083 int rc; 5084 5085 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 5086 parser->job_userptr_list, &userptr)) 5087 goto already_pinned; 5088 5089 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 5090 if (!userptr) 5091 return -ENOMEM; 5092 5093 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 5094 userptr); 5095 if (rc) 5096 goto free_userptr; 5097 5098 list_add_tail(&userptr->job_node, parser->job_userptr_list); 5099 5100 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 5101 if (rc) { 5102 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 5103 goto unpin_memory; 5104 } 5105 5106 userptr->dma_mapped = true; 5107 userptr->dir = dir; 5108 5109already_pinned: 5110 parser->patched_cb_size += 5111 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 5112 5113 return 0; 5114 5115unpin_memory: 5116 list_del(&userptr->job_node); 5117 hl_unpin_host_memory(hdev, userptr); 5118free_userptr: 5119 kfree(userptr); 5120 return rc; 5121} 5122 5123static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 5124 struct hl_cs_parser *parser, 5125 struct packet_lin_dma *user_dma_pkt, 5126 bool src_in_host) 5127{ 5128 enum dma_data_direction dir; 5129 bool skip_host_mem_pin = false, user_memset; 5130 u64 addr; 5131 int rc = 0; 5132 5133 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 5134 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5135 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5136 5137 if (src_in_host) { 5138 if (user_memset) 5139 skip_host_mem_pin = true; 5140 5141 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 5142 dir = DMA_TO_DEVICE; 5143 addr = le64_to_cpu(user_dma_pkt->src_addr); 5144 } else { 5145 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 5146 dir = DMA_FROM_DEVICE; 5147 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5148 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5149 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5150 } 5151 5152 if (skip_host_mem_pin) 5153 parser->patched_cb_size += sizeof(*user_dma_pkt); 5154 else 5155 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 5156 addr, dir); 5157 5158 return rc; 5159} 5160 5161static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 5162 struct hl_cs_parser *parser, 5163 struct packet_lin_dma *user_dma_pkt) 5164{ 5165 bool src_in_host = false; 5166 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5167 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5168 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5169 5170 dev_dbg(hdev->dev, "DMA packet details:\n"); 5171 dev_dbg(hdev->dev, "source == 0x%llx\n", 5172 le64_to_cpu(user_dma_pkt->src_addr)); 5173 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 5174 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 5175 5176 /* 5177 * Special handling for DMA with size 0. Bypass all validations 5178 * because no transactions will be done except for WR_COMP, which 5179 * is not a security issue 5180 */ 5181 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5182 parser->patched_cb_size += sizeof(*user_dma_pkt); 5183 return 0; 5184 } 5185 5186 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5187 src_in_host = true; 5188 5189 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5190 src_in_host); 5191} 5192 5193static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5194 struct hl_cs_parser *parser, 5195 struct packet_load_and_exe *user_pkt) 5196{ 5197 u32 cfg; 5198 5199 cfg = le32_to_cpu(user_pkt->cfg); 5200 5201 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5202 dev_err(hdev->dev, 5203 "User not allowed to use Load and Execute\n"); 5204 return -EPERM; 5205 } 5206 5207 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5208 5209 return 0; 5210} 5211 5212static int gaudi_validate_cb(struct hl_device *hdev, 5213 struct hl_cs_parser *parser, bool is_mmu) 5214{ 5215 u32 cb_parsed_length = 0; 5216 int rc = 0; 5217 5218 parser->patched_cb_size = 0; 5219 5220 /* cb_user_size is more than 0 so loop will always be executed */ 5221 while (cb_parsed_length < parser->user_cb_size) { 5222 enum packet_id pkt_id; 5223 u16 pkt_size; 5224 struct gaudi_packet *user_pkt; 5225 5226 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5227 5228 pkt_id = (enum packet_id) ( 5229 (le64_to_cpu(user_pkt->header) & 5230 PACKET_HEADER_PACKET_ID_MASK) >> 5231 PACKET_HEADER_PACKET_ID_SHIFT); 5232 5233 if (!validate_packet_id(pkt_id)) { 5234 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5235 rc = -EINVAL; 5236 break; 5237 } 5238 5239 pkt_size = gaudi_packet_sizes[pkt_id]; 5240 cb_parsed_length += pkt_size; 5241 if (cb_parsed_length > parser->user_cb_size) { 5242 dev_err(hdev->dev, 5243 "packet 0x%x is out of CB boundary\n", pkt_id); 5244 rc = -EINVAL; 5245 break; 5246 } 5247 5248 switch (pkt_id) { 5249 case PACKET_MSG_PROT: 5250 dev_err(hdev->dev, 5251 "User not allowed to use MSG_PROT\n"); 5252 rc = -EPERM; 5253 break; 5254 5255 case PACKET_CP_DMA: 5256 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5257 rc = -EPERM; 5258 break; 5259 5260 case PACKET_STOP: 5261 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5262 rc = -EPERM; 5263 break; 5264 5265 case PACKET_WREG_BULK: 5266 dev_err(hdev->dev, 5267 "User not allowed to use WREG_BULK\n"); 5268 rc = -EPERM; 5269 break; 5270 5271 case PACKET_LOAD_AND_EXE: 5272 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5273 (struct packet_load_and_exe *) user_pkt); 5274 break; 5275 5276 case PACKET_LIN_DMA: 5277 parser->contains_dma_pkt = true; 5278 if (is_mmu) 5279 parser->patched_cb_size += pkt_size; 5280 else 5281 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5282 (struct packet_lin_dma *) user_pkt); 5283 break; 5284 5285 case PACKET_WREG_32: 5286 case PACKET_MSG_LONG: 5287 case PACKET_MSG_SHORT: 5288 case PACKET_REPEAT: 5289 case PACKET_FENCE: 5290 case PACKET_NOP: 5291 case PACKET_ARB_POINT: 5292 parser->patched_cb_size += pkt_size; 5293 break; 5294 5295 default: 5296 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5297 pkt_id); 5298 rc = -EINVAL; 5299 break; 5300 } 5301 5302 if (rc) 5303 break; 5304 } 5305 5306 /* 5307 * The new CB should have space at the end for two MSG_PROT packets: 5308 * 1. A packet that will act as a completion packet 5309 * 2. A packet that will generate MSI-X interrupt 5310 */ 5311 if (parser->completion) 5312 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2; 5313 5314 return rc; 5315} 5316 5317static int gaudi_patch_dma_packet(struct hl_device *hdev, 5318 struct hl_cs_parser *parser, 5319 struct packet_lin_dma *user_dma_pkt, 5320 struct packet_lin_dma *new_dma_pkt, 5321 u32 *new_dma_pkt_size) 5322{ 5323 struct hl_userptr *userptr; 5324 struct scatterlist *sg, *sg_next_iter; 5325 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5326 u64 len, len_next; 5327 dma_addr_t dma_addr, dma_addr_next; 5328 u64 device_memory_addr, addr; 5329 enum dma_data_direction dir; 5330 struct sg_table *sgt; 5331 bool src_in_host = false; 5332 bool skip_host_mem_pin = false; 5333 bool user_memset; 5334 5335 ctl = le32_to_cpu(user_dma_pkt->ctl); 5336 5337 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5338 src_in_host = true; 5339 5340 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5341 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5342 5343 if (src_in_host) { 5344 addr = le64_to_cpu(user_dma_pkt->src_addr); 5345 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5346 dir = DMA_TO_DEVICE; 5347 if (user_memset) 5348 skip_host_mem_pin = true; 5349 } else { 5350 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5351 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5352 dir = DMA_FROM_DEVICE; 5353 } 5354 5355 if ((!skip_host_mem_pin) && 5356 (!hl_userptr_is_pinned(hdev, addr, 5357 le32_to_cpu(user_dma_pkt->tsize), 5358 parser->job_userptr_list, &userptr))) { 5359 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5360 addr, user_dma_pkt->tsize); 5361 return -EFAULT; 5362 } 5363 5364 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5365 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5366 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5367 return 0; 5368 } 5369 5370 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5371 5372 sgt = userptr->sgt; 5373 dma_desc_cnt = 0; 5374 5375 for_each_sgtable_dma_sg(sgt, sg, count) { 5376 len = sg_dma_len(sg); 5377 dma_addr = sg_dma_address(sg); 5378 5379 if (len == 0) 5380 break; 5381 5382 while ((count + 1) < sgt->nents) { 5383 sg_next_iter = sg_next(sg); 5384 len_next = sg_dma_len(sg_next_iter); 5385 dma_addr_next = sg_dma_address(sg_next_iter); 5386 5387 if (len_next == 0) 5388 break; 5389 5390 if ((dma_addr + len == dma_addr_next) && 5391 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5392 len += len_next; 5393 count++; 5394 sg = sg_next_iter; 5395 } else { 5396 break; 5397 } 5398 } 5399 5400 ctl = le32_to_cpu(user_dma_pkt->ctl); 5401 if (likely(dma_desc_cnt)) 5402 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5403 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5404 new_dma_pkt->ctl = cpu_to_le32(ctl); 5405 new_dma_pkt->tsize = cpu_to_le32(len); 5406 5407 if (dir == DMA_TO_DEVICE) { 5408 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5409 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5410 } else { 5411 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5412 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5413 } 5414 5415 if (!user_memset) 5416 device_memory_addr += len; 5417 dma_desc_cnt++; 5418 new_dma_pkt++; 5419 } 5420 5421 if (!dma_desc_cnt) { 5422 dev_err(hdev->dev, 5423 "Error of 0 SG entries when patching DMA packet\n"); 5424 return -EFAULT; 5425 } 5426 5427 /* Fix the last dma packet - wrcomp must be as user set it */ 5428 new_dma_pkt--; 5429 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5430 5431 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5432 5433 return 0; 5434} 5435 5436static int gaudi_patch_cb(struct hl_device *hdev, 5437 struct hl_cs_parser *parser) 5438{ 5439 u32 cb_parsed_length = 0; 5440 u32 cb_patched_cur_length = 0; 5441 int rc = 0; 5442 5443 /* cb_user_size is more than 0 so loop will always be executed */ 5444 while (cb_parsed_length < parser->user_cb_size) { 5445 enum packet_id pkt_id; 5446 u16 pkt_size; 5447 u32 new_pkt_size = 0; 5448 struct gaudi_packet *user_pkt, *kernel_pkt; 5449 5450 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5451 kernel_pkt = parser->patched_cb->kernel_address + 5452 cb_patched_cur_length; 5453 5454 pkt_id = (enum packet_id) ( 5455 (le64_to_cpu(user_pkt->header) & 5456 PACKET_HEADER_PACKET_ID_MASK) >> 5457 PACKET_HEADER_PACKET_ID_SHIFT); 5458 5459 if (!validate_packet_id(pkt_id)) { 5460 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5461 rc = -EINVAL; 5462 break; 5463 } 5464 5465 pkt_size = gaudi_packet_sizes[pkt_id]; 5466 cb_parsed_length += pkt_size; 5467 if (cb_parsed_length > parser->user_cb_size) { 5468 dev_err(hdev->dev, 5469 "packet 0x%x is out of CB boundary\n", pkt_id); 5470 rc = -EINVAL; 5471 break; 5472 } 5473 5474 switch (pkt_id) { 5475 case PACKET_LIN_DMA: 5476 rc = gaudi_patch_dma_packet(hdev, parser, 5477 (struct packet_lin_dma *) user_pkt, 5478 (struct packet_lin_dma *) kernel_pkt, 5479 &new_pkt_size); 5480 cb_patched_cur_length += new_pkt_size; 5481 break; 5482 5483 case PACKET_MSG_PROT: 5484 dev_err(hdev->dev, 5485 "User not allowed to use MSG_PROT\n"); 5486 rc = -EPERM; 5487 break; 5488 5489 case PACKET_CP_DMA: 5490 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5491 rc = -EPERM; 5492 break; 5493 5494 case PACKET_STOP: 5495 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5496 rc = -EPERM; 5497 break; 5498 5499 case PACKET_WREG_32: 5500 case PACKET_WREG_BULK: 5501 case PACKET_MSG_LONG: 5502 case PACKET_MSG_SHORT: 5503 case PACKET_REPEAT: 5504 case PACKET_FENCE: 5505 case PACKET_NOP: 5506 case PACKET_ARB_POINT: 5507 case PACKET_LOAD_AND_EXE: 5508 memcpy(kernel_pkt, user_pkt, pkt_size); 5509 cb_patched_cur_length += pkt_size; 5510 break; 5511 5512 default: 5513 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5514 pkt_id); 5515 rc = -EINVAL; 5516 break; 5517 } 5518 5519 if (rc) 5520 break; 5521 } 5522 5523 return rc; 5524} 5525 5526static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5527 struct hl_cs_parser *parser) 5528{ 5529 u64 handle; 5530 u32 patched_cb_size; 5531 struct hl_cb *user_cb; 5532 int rc; 5533 5534 /* 5535 * The new CB should have space at the end for two MSG_PROT pkt: 5536 * 1. A packet that will act as a completion packet 5537 * 2. A packet that will generate MSI interrupt 5538 */ 5539 if (parser->completion) 5540 parser->patched_cb_size = parser->user_cb_size + 5541 sizeof(struct packet_msg_prot) * 2; 5542 else 5543 parser->patched_cb_size = parser->user_cb_size; 5544 5545 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5546 parser->patched_cb_size, false, false, 5547 &handle); 5548 5549 if (rc) { 5550 dev_err(hdev->dev, 5551 "Failed to allocate patched CB for DMA CS %d\n", 5552 rc); 5553 return rc; 5554 } 5555 5556 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5557 /* hl_cb_get should never fail */ 5558 if (!parser->patched_cb) { 5559 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5560 rc = -EFAULT; 5561 goto out; 5562 } 5563 5564 /* 5565 * The check that parser->user_cb_size <= parser->user_cb->size was done 5566 * in validate_queue_index(). 5567 */ 5568 memcpy(parser->patched_cb->kernel_address, 5569 parser->user_cb->kernel_address, 5570 parser->user_cb_size); 5571 5572 patched_cb_size = parser->patched_cb_size; 5573 5574 /* Validate patched CB instead of user CB */ 5575 user_cb = parser->user_cb; 5576 parser->user_cb = parser->patched_cb; 5577 rc = gaudi_validate_cb(hdev, parser, true); 5578 parser->user_cb = user_cb; 5579 5580 if (rc) { 5581 hl_cb_put(parser->patched_cb); 5582 goto out; 5583 } 5584 5585 if (patched_cb_size != parser->patched_cb_size) { 5586 dev_err(hdev->dev, "user CB size mismatch\n"); 5587 hl_cb_put(parser->patched_cb); 5588 rc = -EINVAL; 5589 goto out; 5590 } 5591 5592out: 5593 /* 5594 * Always call cb destroy here because we still have 1 reference 5595 * to it by calling cb_get earlier. After the job will be completed, 5596 * cb_put will release it, but here we want to remove it from the 5597 * idr 5598 */ 5599 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5600 5601 return rc; 5602} 5603 5604static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5605 struct hl_cs_parser *parser) 5606{ 5607 u64 handle; 5608 int rc; 5609 5610 rc = gaudi_validate_cb(hdev, parser, false); 5611 5612 if (rc) 5613 goto free_userptr; 5614 5615 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5616 parser->patched_cb_size, false, false, 5617 &handle); 5618 if (rc) { 5619 dev_err(hdev->dev, 5620 "Failed to allocate patched CB for DMA CS %d\n", rc); 5621 goto free_userptr; 5622 } 5623 5624 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5625 /* hl_cb_get should never fail here */ 5626 if (!parser->patched_cb) { 5627 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5628 rc = -EFAULT; 5629 goto out; 5630 } 5631 5632 rc = gaudi_patch_cb(hdev, parser); 5633 5634 if (rc) 5635 hl_cb_put(parser->patched_cb); 5636 5637out: 5638 /* 5639 * Always call cb destroy here because we still have 1 reference 5640 * to it by calling cb_get earlier. After the job will be completed, 5641 * cb_put will release it, but here we want to remove it from the 5642 * idr 5643 */ 5644 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5645 5646free_userptr: 5647 if (rc) 5648 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5649 return rc; 5650} 5651 5652static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5653 struct hl_cs_parser *parser) 5654{ 5655 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5656 struct gaudi_device *gaudi = hdev->asic_specific; 5657 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + 5658 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2)); 5659 5660 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5661 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) && 5662 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) { 5663 dev_err(hdev->dev, "h/w queue %d is disabled\n", 5664 parser->hw_queue_id); 5665 return -EINVAL; 5666 } 5667 5668 /* For internal queue jobs just check if CB address is valid */ 5669 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5670 parser->user_cb_size, 5671 asic_prop->sram_user_base_address, 5672 asic_prop->sram_end_address)) 5673 return 0; 5674 5675 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5676 parser->user_cb_size, 5677 asic_prop->dram_user_base_address, 5678 asic_prop->dram_end_address)) 5679 return 0; 5680 5681 /* PMMU and HPMMU addresses are equal, check only one of them */ 5682 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5683 parser->user_cb_size, 5684 asic_prop->pmmu.start_addr, 5685 asic_prop->pmmu.end_addr)) 5686 return 0; 5687 5688 dev_err(hdev->dev, 5689 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5690 parser->user_cb, parser->user_cb_size); 5691 5692 return -EFAULT; 5693} 5694 5695static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5696{ 5697 struct gaudi_device *gaudi = hdev->asic_specific; 5698 5699 if (parser->queue_type == QUEUE_TYPE_INT) 5700 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5701 5702 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5703 return gaudi_parse_cb_mmu(hdev, parser); 5704 else 5705 return gaudi_parse_cb_no_mmu(hdev, parser); 5706} 5707 5708static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, 5709 void *kernel_address, u32 len, 5710 u64 cq_addr, u32 cq_val, u32 msi_vec, 5711 bool eb) 5712{ 5713 struct gaudi_device *gaudi = hdev->asic_specific; 5714 struct packet_msg_prot *cq_pkt; 5715 u64 msi_addr; 5716 u32 tmp; 5717 5718 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5719 5720 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5721 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5722 5723 if (eb) 5724 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5725 5726 cq_pkt->ctl = cpu_to_le32(tmp); 5727 cq_pkt->value = cpu_to_le32(cq_val); 5728 cq_pkt->addr = cpu_to_le64(cq_addr); 5729 5730 cq_pkt++; 5731 5732 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5733 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5734 cq_pkt->ctl = cpu_to_le32(tmp); 5735 cq_pkt->value = cpu_to_le32(1); 5736 5737 if (gaudi->multi_msi_mode) 5738 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; 5739 else 5740 msi_addr = mmPCIE_CORE_MSI_REQ; 5741 5742 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5743} 5744 5745static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5746{ 5747 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5748} 5749 5750static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5751 u32 size, u64 val) 5752{ 5753 struct packet_lin_dma *lin_dma_pkt; 5754 struct hl_cs_job *job; 5755 u32 cb_size, ctl, err_cause; 5756 struct hl_cb *cb; 5757 int rc; 5758 5759 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5760 if (!cb) 5761 return -EFAULT; 5762 5763 lin_dma_pkt = cb->kernel_address; 5764 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5765 cb_size = sizeof(*lin_dma_pkt); 5766 5767 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5768 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5769 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5770 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5771 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5772 5773 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5774 lin_dma_pkt->src_addr = cpu_to_le64(val); 5775 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5776 lin_dma_pkt->tsize = cpu_to_le32(size); 5777 5778 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5779 if (!job) { 5780 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5781 rc = -ENOMEM; 5782 goto release_cb; 5783 } 5784 5785 /* Verify DMA is OK */ 5786 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5787 if (err_cause && !hdev->init_done) { 5788 dev_dbg(hdev->dev, 5789 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5790 err_cause); 5791 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5792 } 5793 5794 job->id = 0; 5795 job->user_cb = cb; 5796 atomic_inc(&job->user_cb->cs_cnt); 5797 job->user_cb_size = cb_size; 5798 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5799 job->patched_cb = job->user_cb; 5800 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5801 5802 hl_debugfs_add_job(hdev, job); 5803 5804 rc = gaudi_send_job_on_qman0(hdev, job); 5805 hl_debugfs_remove_job(hdev, job); 5806 kfree(job); 5807 atomic_dec(&cb->cs_cnt); 5808 5809 /* Verify DMA is OK */ 5810 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5811 if (err_cause) { 5812 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5813 rc = -EIO; 5814 if (!hdev->init_done) { 5815 dev_dbg(hdev->dev, 5816 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5817 err_cause); 5818 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5819 } 5820 } 5821 5822release_cb: 5823 hl_cb_put(cb); 5824 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5825 5826 return rc; 5827} 5828 5829static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5830 u32 num_regs, u32 val) 5831{ 5832 struct packet_msg_long *pkt; 5833 struct hl_cs_job *job; 5834 u32 cb_size, ctl; 5835 struct hl_cb *cb; 5836 int i, rc; 5837 5838 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5839 5840 if (cb_size > SZ_2M) { 5841 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5842 return -ENOMEM; 5843 } 5844 5845 cb = hl_cb_kernel_create(hdev, cb_size, false); 5846 if (!cb) 5847 return -EFAULT; 5848 5849 pkt = cb->kernel_address; 5850 5851 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5852 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5853 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5854 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5855 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5856 5857 for (i = 0; i < num_regs ; i++, pkt++) { 5858 pkt->ctl = cpu_to_le32(ctl); 5859 pkt->value = cpu_to_le32(val); 5860 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5861 } 5862 5863 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5864 if (!job) { 5865 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5866 rc = -ENOMEM; 5867 goto release_cb; 5868 } 5869 5870 job->id = 0; 5871 job->user_cb = cb; 5872 atomic_inc(&job->user_cb->cs_cnt); 5873 job->user_cb_size = cb_size; 5874 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5875 job->patched_cb = job->user_cb; 5876 job->job_cb_size = cb_size; 5877 5878 hl_debugfs_add_job(hdev, job); 5879 5880 rc = gaudi_send_job_on_qman0(hdev, job); 5881 hl_debugfs_remove_job(hdev, job); 5882 kfree(job); 5883 atomic_dec(&cb->cs_cnt); 5884 5885release_cb: 5886 hl_cb_put(cb); 5887 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5888 5889 return rc; 5890} 5891 5892static int gaudi_restore_sm_registers(struct hl_device *hdev) 5893{ 5894 u64 base_addr; 5895 u32 num_regs; 5896 int rc; 5897 5898 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5899 num_regs = NUM_OF_SOB_IN_BLOCK; 5900 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5901 if (rc) { 5902 dev_err(hdev->dev, "failed resetting SM registers"); 5903 return -ENOMEM; 5904 } 5905 5906 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5907 num_regs = NUM_OF_SOB_IN_BLOCK; 5908 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5909 if (rc) { 5910 dev_err(hdev->dev, "failed resetting SM registers"); 5911 return -ENOMEM; 5912 } 5913 5914 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5915 num_regs = NUM_OF_SOB_IN_BLOCK; 5916 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5917 if (rc) { 5918 dev_err(hdev->dev, "failed resetting SM registers"); 5919 return -ENOMEM; 5920 } 5921 5922 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5923 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5924 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5925 if (rc) { 5926 dev_err(hdev->dev, "failed resetting SM registers"); 5927 return -ENOMEM; 5928 } 5929 5930 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5931 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5932 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5933 if (rc) { 5934 dev_err(hdev->dev, "failed resetting SM registers"); 5935 return -ENOMEM; 5936 } 5937 5938 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5939 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5940 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5941 if (rc) { 5942 dev_err(hdev->dev, "failed resetting SM registers"); 5943 return -ENOMEM; 5944 } 5945 5946 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5947 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5948 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5949 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5950 if (rc) { 5951 dev_err(hdev->dev, "failed resetting SM registers"); 5952 return -ENOMEM; 5953 } 5954 5955 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5956 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5957 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5958 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5959 if (rc) { 5960 dev_err(hdev->dev, "failed resetting SM registers"); 5961 return -ENOMEM; 5962 } 5963 5964 return 0; 5965} 5966 5967static void gaudi_restore_dma_registers(struct hl_device *hdev) 5968{ 5969 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5970 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5971 int i; 5972 5973 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5974 u64 sob_addr = CFG_BASE + 5975 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5976 (i * sob_delta); 5977 u32 dma_offset = i * DMA_CORE_OFFSET; 5978 5979 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5980 lower_32_bits(sob_addr)); 5981 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5982 upper_32_bits(sob_addr)); 5983 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5984 5985 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5986 * modified by the user for SRAM reduction 5987 */ 5988 if (i > 1) 5989 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5990 0x00000001); 5991 } 5992} 5993 5994static void gaudi_restore_qm_registers(struct hl_device *hdev) 5995{ 5996 u32 qman_offset; 5997 int i; 5998 5999 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 6000 qman_offset = i * DMA_QMAN_OFFSET; 6001 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 6002 } 6003 6004 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 6005 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 6006 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 6007 } 6008 6009 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 6010 qman_offset = i * TPC_QMAN_OFFSET; 6011 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 6012 } 6013 6014 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 6015 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 6016 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 6017 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 6018 } 6019} 6020 6021static int gaudi_restore_user_registers(struct hl_device *hdev) 6022{ 6023 int rc; 6024 6025 rc = gaudi_restore_sm_registers(hdev); 6026 if (rc) 6027 return rc; 6028 6029 gaudi_restore_dma_registers(hdev); 6030 gaudi_restore_qm_registers(hdev); 6031 6032 return 0; 6033} 6034 6035static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 6036{ 6037 return 0; 6038} 6039 6040static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 6041{ 6042 struct asic_fixed_properties *prop = &hdev->asic_prop; 6043 struct gaudi_device *gaudi = hdev->asic_specific; 6044 u64 addr = prop->mmu_pgt_addr; 6045 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE; 6046 6047 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6048 return 0; 6049 6050 return gaudi_memset_device_memory(hdev, addr, size, 0); 6051} 6052 6053static void gaudi_restore_phase_topology(struct hl_device *hdev) 6054{ 6055 6056} 6057 6058static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 6059 u32 size_to_dma, dma_addr_t dma_addr) 6060{ 6061 u32 err_cause, val; 6062 u64 dma_offset; 6063 int rc; 6064 6065 dma_offset = dma_id * DMA_CORE_OFFSET; 6066 6067 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 6068 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 6069 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 6070 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 6071 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 6072 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 6073 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 6074 6075 rc = hl_poll_timeout( 6076 hdev, 6077 mmDMA0_CORE_STS0 + dma_offset, 6078 val, 6079 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 6080 0, 6081 1000000); 6082 6083 if (rc) { 6084 dev_err(hdev->dev, 6085 "DMA %d timed-out during reading of 0x%llx\n", 6086 dma_id, addr); 6087 return -EIO; 6088 } 6089 6090 /* Verify DMA is OK */ 6091 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6092 if (err_cause) { 6093 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 6094 dev_dbg(hdev->dev, 6095 "Clearing DMA0 engine from errors (cause 0x%x)\n", 6096 err_cause); 6097 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 6098 6099 return -EIO; 6100 } 6101 6102 return 0; 6103} 6104 6105static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 6106 void *blob_addr) 6107{ 6108 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 6109 u32 qm_glbl_sts0, qm_cgm_sts; 6110 u64 dma_offset, qm_offset; 6111 dma_addr_t dma_addr; 6112 void *kernel_addr; 6113 bool is_eng_idle; 6114 int rc = 0, dma_id; 6115 6116 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( 6117 hdev, SZ_2M, 6118 &dma_addr, 6119 GFP_KERNEL | __GFP_ZERO); 6120 6121 if (!kernel_addr) 6122 return -ENOMEM; 6123 6124 hdev->asic_funcs->hw_queues_lock(hdev); 6125 6126 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 6127 dma_offset = dma_id * DMA_CORE_OFFSET; 6128 qm_offset = dma_id * DMA_QMAN_OFFSET; 6129 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6130 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6131 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6132 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6133 IS_DMA_IDLE(dma_core_sts0); 6134 6135 if (!is_eng_idle) { 6136 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 6137 dma_offset = dma_id * DMA_CORE_OFFSET; 6138 qm_offset = dma_id * DMA_QMAN_OFFSET; 6139 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6140 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6141 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6142 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6143 IS_DMA_IDLE(dma_core_sts0); 6144 6145 if (!is_eng_idle) { 6146 dev_err_ratelimited(hdev->dev, 6147 "Can't read via DMA because it is BUSY\n"); 6148 rc = -EAGAIN; 6149 goto out; 6150 } 6151 } 6152 6153 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 6154 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 6155 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 6156 6157 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6158 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6159 * ASID 6160 */ 6161 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6162 6163 /* Verify DMA is OK */ 6164 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6165 if (err_cause) { 6166 dev_dbg(hdev->dev, 6167 "Clearing DMA0 engine from errors (cause 0x%x)\n", 6168 err_cause); 6169 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 6170 } 6171 6172 pos = 0; 6173 size_left = size; 6174 size_to_dma = SZ_2M; 6175 6176 while (size_left > 0) { 6177 6178 if (size_left < SZ_2M) 6179 size_to_dma = size_left; 6180 6181 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6182 dma_addr); 6183 if (rc) 6184 break; 6185 6186 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6187 6188 if (size_left <= SZ_2M) 6189 break; 6190 6191 pos += SZ_2M; 6192 addr += SZ_2M; 6193 size_left -= SZ_2M; 6194 } 6195 6196 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6197 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6198 * ASID 6199 */ 6200 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6201 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6202 6203 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6204 6205out: 6206 hdev->asic_funcs->hw_queues_unlock(hdev); 6207 6208 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, 6209 dma_addr); 6210 6211 return rc; 6212} 6213 6214static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6215{ 6216 struct gaudi_device *gaudi = hdev->asic_specific; 6217 6218 if (hdev->reset_info.hard_reset_pending) 6219 return U64_MAX; 6220 6221 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6222 (addr - gaudi->hbm_bar_cur_addr)); 6223} 6224 6225static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6226{ 6227 struct gaudi_device *gaudi = hdev->asic_specific; 6228 6229 if (hdev->reset_info.hard_reset_pending) 6230 return; 6231 6232 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6233 (addr - gaudi->hbm_bar_cur_addr)); 6234} 6235 6236void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6237{ 6238 /* mask to zero the MMBP and ASID bits */ 6239 WREG32_AND(reg, ~0x7FF); 6240 WREG32_OR(reg, asid); 6241} 6242 6243static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6244{ 6245 struct gaudi_device *gaudi = hdev->asic_specific; 6246 6247 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6248 return; 6249 6250 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6251 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6252 return; 6253 } 6254 6255 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6256 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6257 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6258 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6259 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6260 6261 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6262 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6263 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6264 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6265 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6266 6267 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6268 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6269 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6270 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6271 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6272 6273 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6274 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6275 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6276 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6277 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6278 6279 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6280 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6281 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6282 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6283 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6284 6285 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6286 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6287 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6288 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6289 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6290 6291 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6292 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6293 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6294 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6295 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6296 6297 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6298 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6299 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6300 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6301 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6302 6303 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6304 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6305 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6306 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6307 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6308 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6309 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6310 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6311 6312 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6313 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6314 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6315 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6316 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6317 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6318 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6319 6320 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6321 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6322 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6323 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6324 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6325 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6326 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6327 6328 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6329 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6330 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6331 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6332 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6333 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6334 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6335 6336 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6337 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6338 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6339 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6340 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6341 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6342 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6343 6344 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6345 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6346 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6347 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6348 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6349 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6350 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6351 6352 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6353 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6354 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6355 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6356 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6357 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6358 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6359 6360 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6361 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6362 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6363 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6364 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6365 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6366 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6367 6368 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6369 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6370 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6371 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6372 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6373 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6374 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6375 6376 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6377 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6378 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6379 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6380 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6381 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6382 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6383 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6384 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6385 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6386 6387 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6388 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6389 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6390 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6391 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6392 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6393 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6394 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6395 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6396 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6397 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6398 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6399 6400 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6401 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6402 asid); 6403 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6404 asid); 6405 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6406 asid); 6407 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6408 asid); 6409 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6410 asid); 6411 } 6412 6413 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6414 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6415 asid); 6416 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6417 asid); 6418 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6419 asid); 6420 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6421 asid); 6422 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6423 asid); 6424 } 6425 6426 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6427 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6428 asid); 6429 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6430 asid); 6431 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6432 asid); 6433 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6434 asid); 6435 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6436 asid); 6437 } 6438 6439 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6440 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6441 asid); 6442 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6443 asid); 6444 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6445 asid); 6446 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6447 asid); 6448 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6449 asid); 6450 } 6451 6452 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6453 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6454 asid); 6455 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6456 asid); 6457 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6458 asid); 6459 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6460 asid); 6461 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6462 asid); 6463 } 6464 6465 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6466 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6467 asid); 6468 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6469 asid); 6470 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6471 asid); 6472 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6473 asid); 6474 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6475 asid); 6476 } 6477 6478 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6479 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6480 asid); 6481 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6482 asid); 6483 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6484 asid); 6485 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6486 asid); 6487 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6488 asid); 6489 } 6490 6491 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6492 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6493 asid); 6494 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6495 asid); 6496 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6497 asid); 6498 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6499 asid); 6500 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6501 asid); 6502 } 6503 6504 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6505 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6506 asid); 6507 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6508 asid); 6509 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6510 asid); 6511 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6512 asid); 6513 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6514 asid); 6515 } 6516 6517 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6518 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6519 asid); 6520 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6521 asid); 6522 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6523 asid); 6524 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6525 asid); 6526 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6527 asid); 6528 } 6529 6530 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6531 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6532} 6533 6534static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6535 struct hl_cs_job *job) 6536{ 6537 struct packet_msg_prot *fence_pkt; 6538 u32 *fence_ptr; 6539 dma_addr_t fence_dma_addr; 6540 struct hl_cb *cb; 6541 u32 tmp, timeout, dma_offset; 6542 int rc; 6543 6544 if (hdev->pldm) 6545 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6546 else 6547 timeout = HL_DEVICE_TIMEOUT_USEC; 6548 6549 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 6550 dev_err_ratelimited(hdev->dev, 6551 "Can't send driver job on QMAN0 because the device is not idle\n"); 6552 return -EBUSY; 6553 } 6554 6555 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, 6556 &fence_dma_addr); 6557 if (!fence_ptr) { 6558 dev_err(hdev->dev, 6559 "Failed to allocate fence memory for QMAN0\n"); 6560 return -ENOMEM; 6561 } 6562 6563 cb = job->patched_cb; 6564 6565 fence_pkt = cb->kernel_address + 6566 job->job_cb_size - sizeof(struct packet_msg_prot); 6567 6568 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6569 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6570 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6571 6572 fence_pkt->ctl = cpu_to_le32(tmp); 6573 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6574 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6575 6576 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6577 6578 WREG32(mmDMA0_CORE_PROT + dma_offset, 6579 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6580 6581 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6582 job->job_cb_size, cb->bus_address); 6583 if (rc) { 6584 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6585 goto free_fence_ptr; 6586 } 6587 6588 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6589 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6590 timeout, true); 6591 6592 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6593 6594 if (rc == -ETIMEDOUT) { 6595 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6596 goto free_fence_ptr; 6597 } 6598 6599free_fence_ptr: 6600 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6601 6602 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, 6603 fence_dma_addr); 6604 return rc; 6605} 6606 6607static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6608{ 6609 if (event_type >= GAUDI_EVENT_SIZE) 6610 goto event_not_supported; 6611 6612 if (!gaudi_irq_map_table[event_type].valid) 6613 goto event_not_supported; 6614 6615 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6616 6617 return; 6618 6619event_not_supported: 6620 snprintf(desc, size, "N/A"); 6621} 6622 6623static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6624 bool is_write, s32 *engine_id_1, 6625 s32 *engine_id_2) 6626{ 6627 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6628 6629 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6630 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6631 6632 switch (x_y) { 6633 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6634 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6635 dma_id[0] = 0; 6636 dma_id[1] = 2; 6637 break; 6638 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6639 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6640 dma_id[0] = 1; 6641 dma_id[1] = 3; 6642 break; 6643 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6644 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6645 dma_id[0] = 4; 6646 dma_id[1] = 6; 6647 break; 6648 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6649 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6650 dma_id[0] = 5; 6651 dma_id[1] = 7; 6652 break; 6653 default: 6654 goto unknown_initiator; 6655 } 6656 6657 for (i = 0 ; i < 2 ; i++) { 6658 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6659 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6660 } 6661 6662 switch (x_y) { 6663 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6664 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6665 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6666 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6667 return "DMA0"; 6668 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6669 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6670 return "DMA2"; 6671 } else { 6672 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6673 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6674 return "DMA0 or DMA2"; 6675 } 6676 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6677 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6678 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6679 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6680 return "DMA1"; 6681 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6682 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6683 return "DMA3"; 6684 } else { 6685 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6686 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6687 return "DMA1 or DMA3"; 6688 } 6689 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6690 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6691 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6692 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6693 return "DMA4"; 6694 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6695 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6696 return "DMA6"; 6697 } else { 6698 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6699 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6700 return "DMA4 or DMA6"; 6701 } 6702 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6703 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6704 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6705 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6706 return "DMA5"; 6707 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6708 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6709 return "DMA7"; 6710 } else { 6711 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6712 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6713 return "DMA5 or DMA7"; 6714 } 6715 } 6716 6717unknown_initiator: 6718 return "unknown initiator"; 6719} 6720 6721static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6722 u32 *engine_id_1, u32 *engine_id_2) 6723{ 6724 u32 val, x_y, axi_id; 6725 6726 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6727 RREG32(mmMMU_UP_RAZWI_READ_ID); 6728 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6729 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6730 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6731 RAZWI_INITIATOR_AXI_ID_SHIFT); 6732 6733 switch (x_y) { 6734 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6735 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6736 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6737 return "TPC0"; 6738 } 6739 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6740 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6741 return "NIC0"; 6742 } 6743 break; 6744 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6745 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6746 return "TPC1"; 6747 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6748 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6749 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6750 return "MME0"; 6751 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6752 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6753 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6754 return "MME1"; 6755 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6756 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6757 return "TPC2"; 6758 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6759 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6760 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6761 return "TPC3"; 6762 } 6763 /* PCI, CPU or PSOC does not have engine id*/ 6764 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6765 return "PCI"; 6766 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6767 return "CPU"; 6768 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6769 return "PSOC"; 6770 break; 6771 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6772 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6773 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6774 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6775 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6776 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6777 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6778 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6779 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6780 engine_id_1, engine_id_2); 6781 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6782 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6783 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6784 return "TPC4"; 6785 } 6786 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6787 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6788 return "NIC1"; 6789 } 6790 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6791 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6792 return "NIC2"; 6793 } 6794 break; 6795 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6796 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6797 return "TPC5"; 6798 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6799 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6800 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6801 return "MME2"; 6802 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6803 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6804 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6805 return "MME3"; 6806 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6807 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6808 return "TPC6"; 6809 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6810 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6811 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6812 return "TPC7"; 6813 } 6814 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6815 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6816 return "NIC4"; 6817 } 6818 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6819 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6820 return "NIC5"; 6821 } 6822 break; 6823 default: 6824 break; 6825 } 6826 6827 dev_err(hdev->dev, 6828 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6829 val, 6830 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6831 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6832 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6833 RAZWI_INITIATOR_AXI_ID_MASK); 6834 6835 return "unknown initiator"; 6836} 6837 6838static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1, 6839 u32 *engine_id_2) 6840{ 6841 6842 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6843 dev_err_ratelimited(hdev->dev, 6844 "RAZWI event caused by illegal write of %s\n", 6845 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6846 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6847 } 6848 6849 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6850 dev_err_ratelimited(hdev->dev, 6851 "RAZWI event caused by illegal read of %s\n", 6852 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6853 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6854 } 6855} 6856 6857static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type) 6858{ 6859 struct gaudi_device *gaudi = hdev->asic_specific; 6860 u32 val; 6861 6862 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6863 return; 6864 6865 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6866 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6867 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6868 *addr <<= 32; 6869 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6870 6871 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6872 *type = HL_RAZWI_PAGE_FAULT; 6873 6874 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6875 } 6876 6877 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6878 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6879 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6880 *addr <<= 32; 6881 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6882 6883 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6884 *type = HL_RAZWI_MMU_ACCESS_ERROR; 6885 6886 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6887 } 6888} 6889 6890/* 6891 * +-------------------+------------------------------------------------------+ 6892 * | Configuration Reg | Description | 6893 * | Address | | 6894 * +-------------------+------------------------------------------------------+ 6895 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6896 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6897 * | |0xF34 memory wrappers 63:32 | 6898 * | |0xF38 memory wrappers 95:64 | 6899 * | |0xF3C memory wrappers 127:96 | 6900 * +-------------------+------------------------------------------------------+ 6901 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6902 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6903 * | |0xF44 memory wrappers 63:32 | 6904 * | |0xF48 memory wrappers 95:64 | 6905 * | |0xF4C memory wrappers 127:96 | 6906 * +-------------------+------------------------------------------------------+ 6907 */ 6908static int gaudi_extract_ecc_info(struct hl_device *hdev, 6909 struct ecc_info_extract_params *params, u64 *ecc_address, 6910 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6911{ 6912 u32 i, num_mem_regs, reg, err_bit; 6913 u64 err_addr, err_word = 0; 6914 6915 num_mem_regs = params->num_memories / 32 + 6916 ((params->num_memories % 32) ? 1 : 0); 6917 6918 if (params->block_address >= CFG_BASE) 6919 params->block_address -= CFG_BASE; 6920 6921 if (params->derr) 6922 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6923 else 6924 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6925 6926 /* Set invalid wrapper index */ 6927 *memory_wrapper_idx = 0xFF; 6928 6929 /* Iterate through memory wrappers, a single bit must be set */ 6930 for (i = 0 ; i < num_mem_regs ; i++) { 6931 err_addr += i * 4; 6932 err_word = RREG32(err_addr); 6933 if (err_word) { 6934 err_bit = __ffs(err_word); 6935 *memory_wrapper_idx = err_bit + (32 * i); 6936 break; 6937 } 6938 } 6939 6940 if (*memory_wrapper_idx == 0xFF) { 6941 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6942 return -EINVAL; 6943 } 6944 6945 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6946 *memory_wrapper_idx); 6947 6948 *ecc_address = 6949 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6950 *ecc_syndrom = 6951 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6952 6953 /* Clear error indication */ 6954 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6955 if (params->derr) 6956 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6957 else 6958 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6959 6960 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6961 6962 return 0; 6963} 6964 6965/* 6966 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6967 * 6968 * @idx: the current pi/ci value 6969 * @q_len: the queue length (power of 2) 6970 * 6971 * @return the cyclically decremented index 6972 */ 6973static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6974{ 6975 u32 mask = q_len - 1; 6976 6977 /* 6978 * modular decrement is equivalent to adding (queue_size -1) 6979 * later we take LSBs to make sure the value is in the 6980 * range [0, queue_len - 1] 6981 */ 6982 return (idx + q_len - 1) & mask; 6983} 6984 6985/** 6986 * gaudi_print_sw_config_stream_data - print SW config stream data 6987 * 6988 * @hdev: pointer to the habanalabs device structure 6989 * @stream: the QMAN's stream 6990 * @qman_base: base address of QMAN registers block 6991 */ 6992static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6993 u64 qman_base) 6994{ 6995 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6996 u32 cq_ptr_lo_off, size; 6997 6998 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6999 7000 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 7001 stream * cq_ptr_lo_off; 7002 cq_ptr_hi = cq_ptr_lo + 7003 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 7004 cq_tsize = cq_ptr_lo + 7005 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 7006 7007 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 7008 size = RREG32(cq_tsize); 7009 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 7010 stream, cq_ptr, size); 7011} 7012 7013/** 7014 * gaudi_print_last_pqes_on_err - print last PQEs on error 7015 * 7016 * @hdev: pointer to the habanalabs device structure 7017 * @qid_base: first QID of the QMAN (out of 4 streams) 7018 * @stream: the QMAN's stream 7019 * @qman_base: base address of QMAN registers block 7020 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 7021 */ 7022static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 7023 u32 stream, u64 qman_base, 7024 bool pr_sw_conf) 7025{ 7026 u32 ci, qm_ci_stream_off, queue_len; 7027 struct hl_hw_queue *q; 7028 u64 pq_ci; 7029 int i; 7030 7031 q = &hdev->kernel_queues[qid_base + stream]; 7032 7033 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 7034 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 7035 stream * qm_ci_stream_off; 7036 7037 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 7038 q->int_queue_len : HL_QUEUE_LENGTH; 7039 7040 hdev->asic_funcs->hw_queues_lock(hdev); 7041 7042 if (pr_sw_conf) 7043 gaudi_print_sw_config_stream_data(hdev, stream, qman_base); 7044 7045 ci = RREG32(pq_ci); 7046 7047 /* we should start printing form ci -1 */ 7048 ci = gaudi_queue_idx_dec(ci, queue_len); 7049 7050 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 7051 struct hl_bd *bd; 7052 u64 addr; 7053 u32 len; 7054 7055 bd = q->kernel_address; 7056 bd += ci; 7057 7058 len = le32_to_cpu(bd->len); 7059 /* len 0 means uninitialized entry- break */ 7060 if (!len) 7061 break; 7062 7063 addr = le64_to_cpu(bd->ptr); 7064 7065 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 7066 stream, ci, addr, len); 7067 7068 /* get previous ci, wrap if needed */ 7069 ci = gaudi_queue_idx_dec(ci, queue_len); 7070 } 7071 7072 hdev->asic_funcs->hw_queues_unlock(hdev); 7073} 7074 7075/** 7076 * print_qman_data_on_err - extract QMAN data on error 7077 * 7078 * @hdev: pointer to the habanalabs device structure 7079 * @qid_base: first QID of the QMAN (out of 4 streams) 7080 * @stream: the QMAN's stream 7081 * @qman_base: base address of QMAN registers block 7082 * 7083 * This function attempt to exatract as much data as possible on QMAN error. 7084 * On upper CP print the SW config stream data and last 8 PQEs. 7085 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 7086 */ 7087static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 7088 u32 stream, u64 qman_base) 7089{ 7090 u32 i; 7091 7092 if (stream != QMAN_STREAMS) { 7093 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, 7094 true); 7095 return; 7096 } 7097 7098 gaudi_print_sw_config_stream_data(hdev, stream, qman_base); 7099 7100 for (i = 0; i < QMAN_STREAMS; i++) 7101 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base, 7102 false); 7103} 7104 7105static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 7106 const char *qm_name, 7107 u64 qman_base, 7108 u32 qid_base) 7109{ 7110 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 7111 u64 glbl_sts_addr, arb_err_addr; 7112 char reg_desc[32]; 7113 7114 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 7115 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 7116 7117 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 7118 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7119 glbl_sts_clr_val = 0; 7120 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7121 7122 if (!glbl_sts_val) 7123 continue; 7124 7125 if (i == QMAN_STREAMS) 7126 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7127 else 7128 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7129 7130 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 7131 if (glbl_sts_val & BIT(j)) { 7132 dev_err_ratelimited(hdev->dev, 7133 "%s %s. err cause: %s\n", 7134 qm_name, reg_desc, 7135 gaudi_qman_error_cause[j]); 7136 glbl_sts_clr_val |= BIT(j); 7137 } 7138 } 7139 7140 /* Write 1 clear errors */ 7141 if (!hdev->stop_on_err) 7142 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 7143 else 7144 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7145 } 7146 7147 arb_err_val = RREG32(arb_err_addr); 7148 7149 if (!arb_err_val) 7150 return; 7151 7152 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7153 if (arb_err_val & BIT(j)) { 7154 dev_err_ratelimited(hdev->dev, 7155 "%s ARB_ERR. err cause: %s\n", 7156 qm_name, 7157 gaudi_qman_arb_error_cause[j]); 7158 } 7159 } 7160} 7161 7162static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7163 struct hl_eq_sm_sei_data *sei_data) 7164{ 7165 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7166 7167 /* Flip the bits as the enum is ordered in the opposite way */ 7168 index = (index ^ 0x3) & 0x3; 7169 7170 switch (sei_data->sei_cause) { 7171 case SM_SEI_SO_OVERFLOW: 7172 dev_err_ratelimited(hdev->dev, 7173 "%s SEI Error: SOB Group %u overflow/underflow", 7174 gaudi_sync_manager_names[index], 7175 le32_to_cpu(sei_data->sei_log)); 7176 break; 7177 case SM_SEI_LBW_4B_UNALIGNED: 7178 dev_err_ratelimited(hdev->dev, 7179 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7180 gaudi_sync_manager_names[index], 7181 le32_to_cpu(sei_data->sei_log)); 7182 break; 7183 case SM_SEI_AXI_RESPONSE_ERR: 7184 dev_err_ratelimited(hdev->dev, 7185 "%s SEI Error: AXI ID %u response error", 7186 gaudi_sync_manager_names[index], 7187 le32_to_cpu(sei_data->sei_log)); 7188 break; 7189 default: 7190 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7191 le32_to_cpu(sei_data->sei_log)); 7192 break; 7193 } 7194} 7195 7196static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7197 struct hl_eq_ecc_data *ecc_data) 7198{ 7199 struct ecc_info_extract_params params; 7200 u64 ecc_address = 0, ecc_syndrom = 0; 7201 u8 index, memory_wrapper_idx = 0; 7202 bool extract_info_from_fw; 7203 int rc; 7204 7205 if (hdev->asic_prop.fw_security_enabled) { 7206 extract_info_from_fw = true; 7207 goto extract_ecc_info; 7208 } 7209 7210 switch (event_type) { 7211 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7212 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7213 extract_info_from_fw = true; 7214 break; 7215 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7216 index = event_type - GAUDI_EVENT_TPC0_SERR; 7217 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7218 params.num_memories = 90; 7219 params.derr = false; 7220 extract_info_from_fw = false; 7221 break; 7222 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7223 index = event_type - GAUDI_EVENT_TPC0_DERR; 7224 params.block_address = 7225 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7226 params.num_memories = 90; 7227 params.derr = true; 7228 extract_info_from_fw = false; 7229 break; 7230 case GAUDI_EVENT_MME0_ACC_SERR: 7231 case GAUDI_EVENT_MME1_ACC_SERR: 7232 case GAUDI_EVENT_MME2_ACC_SERR: 7233 case GAUDI_EVENT_MME3_ACC_SERR: 7234 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7235 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7236 params.num_memories = 128; 7237 params.derr = false; 7238 extract_info_from_fw = false; 7239 break; 7240 case GAUDI_EVENT_MME0_ACC_DERR: 7241 case GAUDI_EVENT_MME1_ACC_DERR: 7242 case GAUDI_EVENT_MME2_ACC_DERR: 7243 case GAUDI_EVENT_MME3_ACC_DERR: 7244 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7245 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7246 params.num_memories = 128; 7247 params.derr = true; 7248 extract_info_from_fw = false; 7249 break; 7250 case GAUDI_EVENT_MME0_SBAB_SERR: 7251 case GAUDI_EVENT_MME1_SBAB_SERR: 7252 case GAUDI_EVENT_MME2_SBAB_SERR: 7253 case GAUDI_EVENT_MME3_SBAB_SERR: 7254 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7255 params.block_address = 7256 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7257 params.num_memories = 33; 7258 params.derr = false; 7259 extract_info_from_fw = false; 7260 break; 7261 case GAUDI_EVENT_MME0_SBAB_DERR: 7262 case GAUDI_EVENT_MME1_SBAB_DERR: 7263 case GAUDI_EVENT_MME2_SBAB_DERR: 7264 case GAUDI_EVENT_MME3_SBAB_DERR: 7265 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7266 params.block_address = 7267 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7268 params.num_memories = 33; 7269 params.derr = true; 7270 extract_info_from_fw = false; 7271 break; 7272 default: 7273 return; 7274 } 7275 7276extract_ecc_info: 7277 if (extract_info_from_fw) { 7278 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7279 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7280 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7281 } else { 7282 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7283 &ecc_syndrom, &memory_wrapper_idx); 7284 if (rc) 7285 return; 7286 } 7287 7288 dev_err(hdev->dev, 7289 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7290 ecc_address, ecc_syndrom, memory_wrapper_idx); 7291} 7292 7293static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type) 7294{ 7295 u64 qman_base; 7296 char desc[32]; 7297 u32 qid_base; 7298 u8 index; 7299 7300 switch (event_type) { 7301 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7302 index = event_type - GAUDI_EVENT_TPC0_QM; 7303 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7304 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7305 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7306 break; 7307 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7308 index = event_type - GAUDI_EVENT_MME0_QM; 7309 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS; 7310 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7311 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7312 break; 7313 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7314 index = event_type - GAUDI_EVENT_DMA0_QM; 7315 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7316 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7317 if (index > 1) 7318 qid_base++; 7319 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7320 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7321 break; 7322 case GAUDI_EVENT_NIC0_QM0: 7323 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7324 qman_base = mmNIC0_QM0_BASE; 7325 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7326 break; 7327 case GAUDI_EVENT_NIC0_QM1: 7328 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7329 qman_base = mmNIC0_QM1_BASE; 7330 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7331 break; 7332 case GAUDI_EVENT_NIC1_QM0: 7333 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7334 qman_base = mmNIC1_QM0_BASE; 7335 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7336 break; 7337 case GAUDI_EVENT_NIC1_QM1: 7338 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7339 qman_base = mmNIC1_QM1_BASE; 7340 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7341 break; 7342 case GAUDI_EVENT_NIC2_QM0: 7343 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7344 qman_base = mmNIC2_QM0_BASE; 7345 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7346 break; 7347 case GAUDI_EVENT_NIC2_QM1: 7348 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7349 qman_base = mmNIC2_QM1_BASE; 7350 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7351 break; 7352 case GAUDI_EVENT_NIC3_QM0: 7353 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7354 qman_base = mmNIC3_QM0_BASE; 7355 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7356 break; 7357 case GAUDI_EVENT_NIC3_QM1: 7358 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7359 qman_base = mmNIC3_QM1_BASE; 7360 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7361 break; 7362 case GAUDI_EVENT_NIC4_QM0: 7363 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7364 qman_base = mmNIC4_QM0_BASE; 7365 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7366 break; 7367 case GAUDI_EVENT_NIC4_QM1: 7368 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7369 qman_base = mmNIC4_QM1_BASE; 7370 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7371 break; 7372 default: 7373 return; 7374 } 7375 7376 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base); 7377} 7378 7379static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7380 bool razwi) 7381{ 7382 u32 engine_id_1, engine_id_2; 7383 char desc[64] = ""; 7384 u64 razwi_addr = 0; 7385 u8 razwi_type; 7386 int rc; 7387 7388 /* 7389 * Init engine id by default as not valid and only if razwi initiated from engine with 7390 * engine id it will get valid value. 7391 * Init razwi type to default, will be changed only if razwi caused by page fault of 7392 * MMU access error 7393 */ 7394 engine_id_1 = U16_MAX; 7395 engine_id_2 = U16_MAX; 7396 razwi_type = U8_MAX; 7397 7398 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7399 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7400 event_type, desc); 7401 7402 if (razwi) { 7403 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2); 7404 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); 7405 7406 /* In case it's the first razwi, save its parameters*/ 7407 rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1); 7408 if (!rc) { 7409 hdev->last_error.razwi.timestamp = ktime_get(); 7410 hdev->last_error.razwi.addr = razwi_addr; 7411 hdev->last_error.razwi.engine_id_1 = engine_id_1; 7412 hdev->last_error.razwi.engine_id_2 = engine_id_2; 7413 /* 7414 * If first engine id holds non valid value the razwi initiator 7415 * does not have engine id 7416 */ 7417 hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); 7418 hdev->last_error.razwi.type = razwi_type; 7419 7420 } 7421 } 7422} 7423 7424static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7425 struct cpucp_pkt_sync_err *sync_err) 7426{ 7427 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7428 7429 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", 7430 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); 7431} 7432 7433static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7434 struct hl_eq_fw_alive *fw_alive) 7435{ 7436 dev_err(hdev->dev, 7437 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7438 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? 7439 "Minor" : "Critical", fw_alive->process_id, 7440 fw_alive->thread_id, fw_alive->uptime_seconds); 7441} 7442 7443static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7444 void *data) 7445{ 7446 char desc[64] = "", *type; 7447 struct eq_nic_sei_event *eq_nic_sei = data; 7448 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7449 7450 switch (eq_nic_sei->axi_error_cause) { 7451 case RXB: 7452 type = "RXB"; 7453 break; 7454 case RXE: 7455 type = "RXE"; 7456 break; 7457 case TXS: 7458 type = "TXS"; 7459 break; 7460 case TXE: 7461 type = "TXE"; 7462 break; 7463 case QPC_RESP: 7464 type = "QPC_RESP"; 7465 break; 7466 case NON_AXI_ERR: 7467 type = "NON_AXI_ERR"; 7468 break; 7469 case TMR: 7470 type = "TMR"; 7471 break; 7472 default: 7473 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7474 eq_nic_sei->axi_error_cause); 7475 type = "N/A"; 7476 break; 7477 } 7478 7479 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7480 eq_nic_sei->id); 7481 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7482 event_type, desc); 7483} 7484 7485static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) 7486{ 7487 /* GAUDI doesn't support any reset except hard-reset */ 7488 return -EPERM; 7489} 7490 7491static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7492 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7493{ 7494 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7495 int rc = 0; 7496 7497 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7498 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7499 if (!hbm_ecc_data) { 7500 dev_err(hdev->dev, "No FW ECC data"); 7501 return 0; 7502 } 7503 7504 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7505 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7506 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7507 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7508 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7509 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7510 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7511 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7512 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7513 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7514 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7515 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7516 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7517 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7518 7519 dev_err(hdev->dev, 7520 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7521 device, ch, wr_par, rd_par, ca_par, serr, derr); 7522 dev_err(hdev->dev, 7523 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7524 device, ch, hbm_ecc_data->first_addr, type, 7525 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7526 hbm_ecc_data->dec_cnt); 7527 return 0; 7528 } 7529 7530 if (hdev->asic_prop.fw_security_enabled) { 7531 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7532 return 0; 7533 } 7534 7535 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7536 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7537 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7538 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7539 if (val) { 7540 rc = -EIO; 7541 dev_err(hdev->dev, 7542 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7543 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7544 (val >> 2) & 0x1, (val >> 3) & 0x1, 7545 (val >> 4) & 0x1); 7546 7547 val2 = RREG32(base + ch * 0x1000 + 0x060); 7548 dev_err(hdev->dev, 7549 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7550 device, ch * 2, 7551 RREG32(base + ch * 0x1000 + 0x064), 7552 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7553 (val2 & 0xFF0000) >> 16, 7554 (val2 & 0xFF000000) >> 24); 7555 } 7556 7557 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7558 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7559 if (val) { 7560 rc = -EIO; 7561 dev_err(hdev->dev, 7562 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7563 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7564 (val >> 2) & 0x1, (val >> 3) & 0x1, 7565 (val >> 4) & 0x1); 7566 7567 val2 = RREG32(base + ch * 0x1000 + 0x070); 7568 dev_err(hdev->dev, 7569 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7570 device, ch * 2 + 1, 7571 RREG32(base + ch * 0x1000 + 0x074), 7572 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7573 (val2 & 0xFF0000) >> 16, 7574 (val2 & 0xFF000000) >> 24); 7575 } 7576 7577 /* Clear interrupts */ 7578 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7579 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7580 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7581 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7582 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7583 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7584 } 7585 7586 val = RREG32(base + 0x8F30); 7587 val2 = RREG32(base + 0x8F34); 7588 if (val | val2) { 7589 rc = -EIO; 7590 dev_err(hdev->dev, 7591 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7592 device, val, val2); 7593 } 7594 val = RREG32(base + 0x8F40); 7595 val2 = RREG32(base + 0x8F44); 7596 if (val | val2) { 7597 rc = -EIO; 7598 dev_err(hdev->dev, 7599 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7600 device, val, val2); 7601 } 7602 7603 return rc; 7604} 7605 7606static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7607{ 7608 switch (hbm_event_type) { 7609 case GAUDI_EVENT_HBM0_SPI_0: 7610 case GAUDI_EVENT_HBM0_SPI_1: 7611 return 0; 7612 case GAUDI_EVENT_HBM1_SPI_0: 7613 case GAUDI_EVENT_HBM1_SPI_1: 7614 return 1; 7615 case GAUDI_EVENT_HBM2_SPI_0: 7616 case GAUDI_EVENT_HBM2_SPI_1: 7617 return 2; 7618 case GAUDI_EVENT_HBM3_SPI_0: 7619 case GAUDI_EVENT_HBM3_SPI_1: 7620 return 3; 7621 default: 7622 break; 7623 } 7624 7625 /* Should never happen */ 7626 return 0; 7627} 7628 7629static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7630 char *interrupt_name) 7631{ 7632 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7633 bool soft_reset_required = false; 7634 7635 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7636 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7637 7638 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7639 if (tpc_interrupts_cause & BIT(i)) { 7640 dev_err_ratelimited(hdev->dev, 7641 "TPC%d_%s interrupt cause: %s\n", 7642 tpc_id, interrupt_name, 7643 gaudi_tpc_interrupts_cause[i]); 7644 /* If this is QM error, we need to soft-reset */ 7645 if (i == 15) 7646 soft_reset_required = true; 7647 } 7648 7649 /* Clear interrupts */ 7650 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7651 7652 return soft_reset_required; 7653} 7654 7655static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7656{ 7657 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7658} 7659 7660static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7661{ 7662 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7663} 7664 7665static void gaudi_print_clk_change_info(struct hl_device *hdev, 7666 u16 event_type) 7667{ 7668 ktime_t zero_time = ktime_set(0, 0); 7669 7670 mutex_lock(&hdev->clk_throttling.lock); 7671 7672 switch (event_type) { 7673 case GAUDI_EVENT_FIX_POWER_ENV_S: 7674 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7675 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7676 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7677 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7678 dev_info_ratelimited(hdev->dev, 7679 "Clock throttling due to power consumption\n"); 7680 break; 7681 7682 case GAUDI_EVENT_FIX_POWER_ENV_E: 7683 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7684 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7685 dev_info_ratelimited(hdev->dev, 7686 "Power envelop is safe, back to optimal clock\n"); 7687 break; 7688 7689 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7690 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7691 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7692 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7693 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7694 dev_info_ratelimited(hdev->dev, 7695 "Clock throttling due to overheating\n"); 7696 break; 7697 7698 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7699 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7700 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7701 dev_info_ratelimited(hdev->dev, 7702 "Thermal envelop is safe, back to optimal clock\n"); 7703 break; 7704 7705 default: 7706 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7707 event_type); 7708 break; 7709 } 7710 7711 mutex_unlock(&hdev->clk_throttling.lock); 7712} 7713 7714static void gaudi_handle_eqe(struct hl_device *hdev, 7715 struct hl_eq_entry *eq_entry) 7716{ 7717 struct gaudi_device *gaudi = hdev->asic_specific; 7718 u64 data = le64_to_cpu(eq_entry->data[0]); 7719 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7720 u32 fw_fatal_err_flag = 0; 7721 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7722 >> EQ_CTL_EVENT_TYPE_SHIFT); 7723 bool reset_required; 7724 u8 cause; 7725 int rc; 7726 7727 if (event_type >= GAUDI_EVENT_SIZE) { 7728 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7729 event_type, GAUDI_EVENT_SIZE - 1); 7730 return; 7731 } 7732 7733 gaudi->events_stat[event_type]++; 7734 gaudi->events_stat_aggregate[event_type]++; 7735 7736 switch (event_type) { 7737 case GAUDI_EVENT_PCIE_CORE_DERR: 7738 case GAUDI_EVENT_PCIE_IF_DERR: 7739 case GAUDI_EVENT_PCIE_PHY_DERR: 7740 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7741 case GAUDI_EVENT_MME0_ACC_DERR: 7742 case GAUDI_EVENT_MME0_SBAB_DERR: 7743 case GAUDI_EVENT_MME1_ACC_DERR: 7744 case GAUDI_EVENT_MME1_SBAB_DERR: 7745 case GAUDI_EVENT_MME2_ACC_DERR: 7746 case GAUDI_EVENT_MME2_SBAB_DERR: 7747 case GAUDI_EVENT_MME3_ACC_DERR: 7748 case GAUDI_EVENT_MME3_SBAB_DERR: 7749 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7750 fallthrough; 7751 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7752 case GAUDI_EVENT_PSOC_MEM_DERR: 7753 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7754 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7755 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7756 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7757 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7758 case GAUDI_EVENT_MMU_DERR: 7759 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7760 gaudi_print_irq_info(hdev, event_type, true); 7761 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7762 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7763 goto reset_device; 7764 7765 case GAUDI_EVENT_GIC500: 7766 case GAUDI_EVENT_AXI_ECC: 7767 case GAUDI_EVENT_L2_RAM_ECC: 7768 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7769 gaudi_print_irq_info(hdev, event_type, false); 7770 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7771 goto reset_device; 7772 7773 case GAUDI_EVENT_HBM0_SPI_0: 7774 case GAUDI_EVENT_HBM1_SPI_0: 7775 case GAUDI_EVENT_HBM2_SPI_0: 7776 case GAUDI_EVENT_HBM3_SPI_0: 7777 gaudi_print_irq_info(hdev, event_type, false); 7778 gaudi_hbm_read_interrupts(hdev, 7779 gaudi_hbm_event_to_dev(event_type), 7780 &eq_entry->hbm_ecc_data); 7781 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7782 goto reset_device; 7783 7784 case GAUDI_EVENT_HBM0_SPI_1: 7785 case GAUDI_EVENT_HBM1_SPI_1: 7786 case GAUDI_EVENT_HBM2_SPI_1: 7787 case GAUDI_EVENT_HBM3_SPI_1: 7788 gaudi_print_irq_info(hdev, event_type, false); 7789 gaudi_hbm_read_interrupts(hdev, 7790 gaudi_hbm_event_to_dev(event_type), 7791 &eq_entry->hbm_ecc_data); 7792 hl_fw_unmask_irq(hdev, event_type); 7793 break; 7794 7795 case GAUDI_EVENT_TPC0_DEC: 7796 case GAUDI_EVENT_TPC1_DEC: 7797 case GAUDI_EVENT_TPC2_DEC: 7798 case GAUDI_EVENT_TPC3_DEC: 7799 case GAUDI_EVENT_TPC4_DEC: 7800 case GAUDI_EVENT_TPC5_DEC: 7801 case GAUDI_EVENT_TPC6_DEC: 7802 case GAUDI_EVENT_TPC7_DEC: 7803 gaudi_print_irq_info(hdev, event_type, true); 7804 reset_required = gaudi_tpc_read_interrupts(hdev, 7805 tpc_dec_event_to_tpc_id(event_type), 7806 "AXI_SLV_DEC_Error"); 7807 if (reset_required) { 7808 dev_err(hdev->dev, "reset required due to %s\n", 7809 gaudi_irq_map_table[event_type].name); 7810 7811 hl_device_reset(hdev, 0); 7812 } else { 7813 hl_fw_unmask_irq(hdev, event_type); 7814 } 7815 break; 7816 7817 case GAUDI_EVENT_TPC0_KRN_ERR: 7818 case GAUDI_EVENT_TPC1_KRN_ERR: 7819 case GAUDI_EVENT_TPC2_KRN_ERR: 7820 case GAUDI_EVENT_TPC3_KRN_ERR: 7821 case GAUDI_EVENT_TPC4_KRN_ERR: 7822 case GAUDI_EVENT_TPC5_KRN_ERR: 7823 case GAUDI_EVENT_TPC6_KRN_ERR: 7824 case GAUDI_EVENT_TPC7_KRN_ERR: 7825 gaudi_print_irq_info(hdev, event_type, true); 7826 reset_required = gaudi_tpc_read_interrupts(hdev, 7827 tpc_krn_event_to_tpc_id(event_type), 7828 "KRN_ERR"); 7829 if (reset_required) { 7830 dev_err(hdev->dev, "reset required due to %s\n", 7831 gaudi_irq_map_table[event_type].name); 7832 7833 hl_device_reset(hdev, 0); 7834 } else { 7835 hl_fw_unmask_irq(hdev, event_type); 7836 } 7837 break; 7838 7839 case GAUDI_EVENT_PCIE_CORE_SERR: 7840 case GAUDI_EVENT_PCIE_IF_SERR: 7841 case GAUDI_EVENT_PCIE_PHY_SERR: 7842 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7843 case GAUDI_EVENT_MME0_ACC_SERR: 7844 case GAUDI_EVENT_MME0_SBAB_SERR: 7845 case GAUDI_EVENT_MME1_ACC_SERR: 7846 case GAUDI_EVENT_MME1_SBAB_SERR: 7847 case GAUDI_EVENT_MME2_ACC_SERR: 7848 case GAUDI_EVENT_MME2_SBAB_SERR: 7849 case GAUDI_EVENT_MME3_ACC_SERR: 7850 case GAUDI_EVENT_MME3_SBAB_SERR: 7851 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7852 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7853 case GAUDI_EVENT_PSOC_MEM_SERR: 7854 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7855 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7856 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7857 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7858 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7859 fallthrough; 7860 case GAUDI_EVENT_MMU_SERR: 7861 gaudi_print_irq_info(hdev, event_type, true); 7862 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7863 hl_fw_unmask_irq(hdev, event_type); 7864 break; 7865 7866 case GAUDI_EVENT_PCIE_DEC: 7867 case GAUDI_EVENT_MME0_WBC_RSP: 7868 case GAUDI_EVENT_MME0_SBAB0_RSP: 7869 case GAUDI_EVENT_MME1_WBC_RSP: 7870 case GAUDI_EVENT_MME1_SBAB0_RSP: 7871 case GAUDI_EVENT_MME2_WBC_RSP: 7872 case GAUDI_EVENT_MME2_SBAB0_RSP: 7873 case GAUDI_EVENT_MME3_WBC_RSP: 7874 case GAUDI_EVENT_MME3_SBAB0_RSP: 7875 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7876 case GAUDI_EVENT_PSOC_AXI_DEC: 7877 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7878 case GAUDI_EVENT_MMU_PAGE_FAULT: 7879 case GAUDI_EVENT_MMU_WR_PERM: 7880 case GAUDI_EVENT_RAZWI_OR_ADC: 7881 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7882 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7883 fallthrough; 7884 case GAUDI_EVENT_NIC0_QM0: 7885 case GAUDI_EVENT_NIC0_QM1: 7886 case GAUDI_EVENT_NIC1_QM0: 7887 case GAUDI_EVENT_NIC1_QM1: 7888 case GAUDI_EVENT_NIC2_QM0: 7889 case GAUDI_EVENT_NIC2_QM1: 7890 case GAUDI_EVENT_NIC3_QM0: 7891 case GAUDI_EVENT_NIC3_QM1: 7892 case GAUDI_EVENT_NIC4_QM0: 7893 case GAUDI_EVENT_NIC4_QM1: 7894 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7895 gaudi_print_irq_info(hdev, event_type, true); 7896 gaudi_handle_qman_err(hdev, event_type); 7897 hl_fw_unmask_irq(hdev, event_type); 7898 break; 7899 7900 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7901 gaudi_print_irq_info(hdev, event_type, true); 7902 gaudi_handle_qman_err(hdev, event_type); 7903 hl_fw_unmask_irq(hdev, event_type); 7904 7905 /* In TPC QM event, notify on TPC assertion. While there isn't 7906 * a specific event for assertion yet, the FW generates QM event. 7907 * The SW upper layer will inspect an internal mapped area to indicate 7908 * if the event is a tpc assertion or tpc QM. 7909 */ 7910 hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT); 7911 break; 7912 7913 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7914 gaudi_print_irq_info(hdev, event_type, true); 7915 goto reset_device; 7916 7917 case GAUDI_EVENT_TPC0_BMON_SPMU: 7918 case GAUDI_EVENT_TPC1_BMON_SPMU: 7919 case GAUDI_EVENT_TPC2_BMON_SPMU: 7920 case GAUDI_EVENT_TPC3_BMON_SPMU: 7921 case GAUDI_EVENT_TPC4_BMON_SPMU: 7922 case GAUDI_EVENT_TPC5_BMON_SPMU: 7923 case GAUDI_EVENT_TPC6_BMON_SPMU: 7924 case GAUDI_EVENT_TPC7_BMON_SPMU: 7925 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7926 gaudi_print_irq_info(hdev, event_type, false); 7927 hl_fw_unmask_irq(hdev, event_type); 7928 break; 7929 7930 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7931 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7932 hl_fw_unmask_irq(hdev, event_type); 7933 break; 7934 7935 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7936 gaudi_print_irq_info(hdev, event_type, false); 7937 gaudi_print_sm_sei_info(hdev, event_type, 7938 &eq_entry->sm_sei_data); 7939 rc = hl_state_dump(hdev); 7940 if (rc) 7941 dev_err(hdev->dev, 7942 "Error during system state dump %d\n", rc); 7943 hl_fw_unmask_irq(hdev, event_type); 7944 break; 7945 7946 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7947 break; 7948 7949 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7950 gaudi_print_clk_change_info(hdev, event_type); 7951 hl_fw_unmask_irq(hdev, event_type); 7952 break; 7953 7954 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7955 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7956 dev_err(hdev->dev, 7957 "Received high temp H/W interrupt %d (cause %d)\n", 7958 event_type, cause); 7959 break; 7960 7961 case GAUDI_EVENT_DEV_RESET_REQ: 7962 gaudi_print_irq_info(hdev, event_type, false); 7963 goto reset_device; 7964 7965 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7966 gaudi_print_irq_info(hdev, event_type, false); 7967 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7968 goto reset_device; 7969 7970 case GAUDI_EVENT_FW_ALIVE_S: 7971 gaudi_print_irq_info(hdev, event_type, false); 7972 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7973 goto reset_device; 7974 7975 default: 7976 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7977 event_type); 7978 break; 7979 } 7980 7981 return; 7982 7983reset_device: 7984 if (hdev->asic_prop.fw_security_enabled) 7985 hl_device_reset(hdev, HL_DRV_RESET_HARD 7986 | HL_DRV_RESET_BYPASS_REQ_TO_FW 7987 | fw_fatal_err_flag); 7988 else if (hdev->hard_reset_on_fw_events) 7989 hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag); 7990 else 7991 hl_fw_unmask_irq(hdev, event_type); 7992} 7993 7994static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, 7995 u32 *size) 7996{ 7997 struct gaudi_device *gaudi = hdev->asic_specific; 7998 7999 if (aggregate) { 8000 *size = (u32) sizeof(gaudi->events_stat_aggregate); 8001 return gaudi->events_stat_aggregate; 8002 } 8003 8004 *size = (u32) sizeof(gaudi->events_stat); 8005 return gaudi->events_stat; 8006} 8007 8008static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, 8009 u32 flags) 8010{ 8011 struct gaudi_device *gaudi = hdev->asic_specific; 8012 u32 status, timeout_usec; 8013 int rc; 8014 8015 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 8016 hdev->reset_info.hard_reset_pending) 8017 return 0; 8018 8019 if (hdev->pldm) 8020 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 8021 else 8022 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 8023 8024 /* L0 & L1 invalidation */ 8025 WREG32(mmSTLB_INV_PS, 3); 8026 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 8027 WREG32(mmSTLB_INV_PS, 2); 8028 8029 rc = hl_poll_timeout( 8030 hdev, 8031 mmSTLB_INV_PS, 8032 status, 8033 !status, 8034 1000, 8035 timeout_usec); 8036 8037 WREG32(mmSTLB_INV_SET, 0); 8038 8039 return rc; 8040} 8041 8042static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 8043 bool is_hard, u32 flags, 8044 u32 asid, u64 va, u64 size) 8045{ 8046 /* Treat as invalidate all because there is no range invalidation 8047 * in Gaudi 8048 */ 8049 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 8050} 8051 8052static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, 8053 u32 asid, u64 phys_addr) 8054{ 8055 u32 status, timeout_usec; 8056 int rc; 8057 8058 if (hdev->pldm) 8059 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 8060 else 8061 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 8062 8063 WREG32(MMU_ASID, asid); 8064 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 8065 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 8066 WREG32(MMU_BUSY, 0x80000000); 8067 8068 rc = hl_poll_timeout( 8069 hdev, 8070 MMU_BUSY, 8071 status, 8072 !(status & 0x80000000), 8073 1000, 8074 timeout_usec); 8075 8076 if (rc) { 8077 dev_err(hdev->dev, 8078 "Timeout during MMU hop0 config of asid %d\n", asid); 8079 return rc; 8080 } 8081 8082 return 0; 8083} 8084 8085static int gaudi_send_heartbeat(struct hl_device *hdev) 8086{ 8087 struct gaudi_device *gaudi = hdev->asic_specific; 8088 8089 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8090 return 0; 8091 8092 return hl_fw_send_heartbeat(hdev); 8093} 8094 8095static int gaudi_cpucp_info_get(struct hl_device *hdev) 8096{ 8097 struct gaudi_device *gaudi = hdev->asic_specific; 8098 struct asic_fixed_properties *prop = &hdev->asic_prop; 8099 int rc; 8100 8101 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8102 return 0; 8103 8104 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8105 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8106 mmCPU_BOOT_ERR1); 8107 if (rc) 8108 return rc; 8109 8110 if (!strlen(prop->cpucp_info.card_name)) 8111 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8112 CARD_NAME_MAX_LEN); 8113 8114 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8115 8116 set_default_power_values(hdev); 8117 8118 return 0; 8119} 8120 8121static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, 8122 u8 mask_len, struct seq_file *s) 8123{ 8124 struct gaudi_device *gaudi = hdev->asic_specific; 8125 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8126 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8127 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8128 unsigned long *mask = (unsigned long *)mask_arr; 8129 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8130 bool is_idle = true, is_eng_idle, is_slave; 8131 u64 offset; 8132 int i, dma_id, port; 8133 8134 if (s) 8135 seq_puts(s, 8136 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8137 "--- ------- ------------ ---------- -------------\n"); 8138 8139 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8140 dma_id = gaudi_dma_assignment[i]; 8141 offset = dma_id * DMA_QMAN_OFFSET; 8142 8143 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8144 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8145 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8146 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8147 IS_DMA_IDLE(dma_core_sts0); 8148 is_idle &= is_eng_idle; 8149 8150 if (mask && !is_eng_idle) 8151 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8152 if (s) 8153 seq_printf(s, fmt, dma_id, 8154 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8155 qm_cgm_sts, dma_core_sts0); 8156 } 8157 8158 if (s) 8159 seq_puts(s, 8160 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8161 "--- ------- ------------ ---------- ----------\n"); 8162 8163 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8164 offset = i * TPC_QMAN_OFFSET; 8165 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8166 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8167 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8168 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8169 IS_TPC_IDLE(tpc_cfg_sts); 8170 is_idle &= is_eng_idle; 8171 8172 if (mask && !is_eng_idle) 8173 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8174 if (s) 8175 seq_printf(s, fmt, i, 8176 is_eng_idle ? "Y" : "N", 8177 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8178 } 8179 8180 if (s) 8181 seq_puts(s, 8182 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8183 "--- ------- ------------ ---------- -----------\n"); 8184 8185 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8186 offset = i * MME_QMAN_OFFSET; 8187 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8188 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8189 8190 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8191 is_slave = i % 2; 8192 if (!is_slave) { 8193 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8194 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8195 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8196 } 8197 8198 is_idle &= is_eng_idle; 8199 8200 if (mask && !is_eng_idle) 8201 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8202 if (s) { 8203 if (!is_slave) 8204 seq_printf(s, fmt, i, 8205 is_eng_idle ? "Y" : "N", 8206 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8207 else 8208 seq_printf(s, mme_slave_fmt, i, 8209 is_eng_idle ? "Y" : "N", "-", 8210 "-", mme_arch_sts); 8211 } 8212 } 8213 8214 if (s) 8215 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8216 "--- ------- ------------ ----------\n"); 8217 8218 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8219 offset = i * NIC_MACRO_QMAN_OFFSET; 8220 port = 2 * i; 8221 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8222 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8223 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8224 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8225 is_idle &= is_eng_idle; 8226 8227 if (mask && !is_eng_idle) 8228 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8229 if (s) 8230 seq_printf(s, nic_fmt, port, 8231 is_eng_idle ? "Y" : "N", 8232 qm_glbl_sts0, qm_cgm_sts); 8233 } 8234 8235 port = 2 * i + 1; 8236 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8237 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8238 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8239 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8240 is_idle &= is_eng_idle; 8241 8242 if (mask && !is_eng_idle) 8243 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8244 if (s) 8245 seq_printf(s, nic_fmt, port, 8246 is_eng_idle ? "Y" : "N", 8247 qm_glbl_sts0, qm_cgm_sts); 8248 } 8249 } 8250 8251 if (s) 8252 seq_puts(s, "\n"); 8253 8254 return is_idle; 8255} 8256 8257static void gaudi_hw_queues_lock(struct hl_device *hdev) 8258 __acquires(&gaudi->hw_queues_lock) 8259{ 8260 struct gaudi_device *gaudi = hdev->asic_specific; 8261 8262 spin_lock(&gaudi->hw_queues_lock); 8263} 8264 8265static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8266 __releases(&gaudi->hw_queues_lock) 8267{ 8268 struct gaudi_device *gaudi = hdev->asic_specific; 8269 8270 spin_unlock(&gaudi->hw_queues_lock); 8271} 8272 8273static u32 gaudi_get_pci_id(struct hl_device *hdev) 8274{ 8275 return hdev->pdev->device; 8276} 8277 8278static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8279 size_t max_size) 8280{ 8281 struct gaudi_device *gaudi = hdev->asic_specific; 8282 8283 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8284 return 0; 8285 8286 return hl_fw_get_eeprom_data(hdev, data, max_size); 8287} 8288 8289static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8290{ 8291 struct gaudi_device *gaudi = hdev->asic_specific; 8292 8293 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8294 return 0; 8295 8296 return hl_fw_get_monitor_dump(hdev, data); 8297} 8298 8299/* 8300 * this function should be used only during initialization and/or after reset, 8301 * when there are no active users. 8302 */ 8303static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8304{ 8305 u64 kernel_timeout; 8306 u32 status, offset; 8307 int rc; 8308 8309 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8310 8311 if (hdev->pldm) 8312 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8313 else 8314 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8315 8316 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8317 lower_32_bits(tpc_kernel)); 8318 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8319 upper_32_bits(tpc_kernel)); 8320 8321 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8322 lower_32_bits(tpc_kernel)); 8323 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8324 upper_32_bits(tpc_kernel)); 8325 /* set a valid LUT pointer, content is of no significance */ 8326 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8327 lower_32_bits(tpc_kernel)); 8328 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8329 upper_32_bits(tpc_kernel)); 8330 8331 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8332 lower_32_bits(CFG_BASE + 8333 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8334 8335 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8336 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8337 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8338 /* wait a bit for the engine to start executing */ 8339 usleep_range(1000, 1500); 8340 8341 /* wait until engine has finished executing */ 8342 rc = hl_poll_timeout( 8343 hdev, 8344 mmTPC0_CFG_STATUS + offset, 8345 status, 8346 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8347 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8348 1000, 8349 kernel_timeout); 8350 8351 if (rc) { 8352 dev_err(hdev->dev, 8353 "Timeout while waiting for TPC%d icache prefetch\n", 8354 tpc_id); 8355 return -EIO; 8356 } 8357 8358 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8359 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8360 8361 /* wait a bit for the engine to start executing */ 8362 usleep_range(1000, 1500); 8363 8364 /* wait until engine has finished executing */ 8365 rc = hl_poll_timeout( 8366 hdev, 8367 mmTPC0_CFG_STATUS + offset, 8368 status, 8369 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8370 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8371 1000, 8372 kernel_timeout); 8373 8374 if (rc) { 8375 dev_err(hdev->dev, 8376 "Timeout while waiting for TPC%d vector pipe\n", 8377 tpc_id); 8378 return -EIO; 8379 } 8380 8381 rc = hl_poll_timeout( 8382 hdev, 8383 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8384 status, 8385 (status == 0), 8386 1000, 8387 kernel_timeout); 8388 8389 if (rc) { 8390 dev_err(hdev->dev, 8391 "Timeout while waiting for TPC%d kernel to execute\n", 8392 tpc_id); 8393 return -EIO; 8394 } 8395 8396 return 0; 8397} 8398 8399static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8400 struct hl_ctx *ctx) 8401{ 8402 struct gaudi_device *gaudi = hdev->asic_specific; 8403 int min_alloc_order, rc, collective_cb_size; 8404 8405 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8406 return 0; 8407 8408 hdev->internal_cb_pool_virt_addr = 8409 hdev->asic_funcs->asic_dma_alloc_coherent(hdev, 8410 HOST_SPACE_INTERNAL_CB_SZ, 8411 &hdev->internal_cb_pool_dma_addr, 8412 GFP_KERNEL | __GFP_ZERO); 8413 8414 if (!hdev->internal_cb_pool_virt_addr) 8415 return -ENOMEM; 8416 8417 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8418 sizeof(struct packet_fence); 8419 min_alloc_order = ilog2(collective_cb_size); 8420 8421 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8422 if (!hdev->internal_cb_pool) { 8423 dev_err(hdev->dev, 8424 "Failed to create internal CB pool\n"); 8425 rc = -ENOMEM; 8426 goto free_internal_cb_pool; 8427 } 8428 8429 rc = gen_pool_add(hdev->internal_cb_pool, 8430 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8431 HOST_SPACE_INTERNAL_CB_SZ, -1); 8432 if (rc) { 8433 dev_err(hdev->dev, 8434 "Failed to add memory to internal CB pool\n"); 8435 rc = -EFAULT; 8436 goto destroy_internal_cb_pool; 8437 } 8438 8439 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8440 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8441 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8442 8443 if (!hdev->internal_cb_va_base) { 8444 rc = -ENOMEM; 8445 goto destroy_internal_cb_pool; 8446 } 8447 8448 mutex_lock(&ctx->mmu_lock); 8449 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8450 hdev->internal_cb_pool_dma_addr, 8451 HOST_SPACE_INTERNAL_CB_SZ); 8452 8453 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8454 mutex_unlock(&ctx->mmu_lock); 8455 8456 if (rc) 8457 goto unreserve_internal_cb_pool; 8458 8459 return 0; 8460 8461unreserve_internal_cb_pool: 8462 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8463 HOST_SPACE_INTERNAL_CB_SZ); 8464destroy_internal_cb_pool: 8465 gen_pool_destroy(hdev->internal_cb_pool); 8466free_internal_cb_pool: 8467 hdev->asic_funcs->asic_dma_free_coherent(hdev, 8468 HOST_SPACE_INTERNAL_CB_SZ, 8469 hdev->internal_cb_pool_virt_addr, 8470 hdev->internal_cb_pool_dma_addr); 8471 8472 return rc; 8473} 8474 8475static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8476 struct hl_ctx *ctx) 8477{ 8478 struct gaudi_device *gaudi = hdev->asic_specific; 8479 8480 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8481 return; 8482 8483 mutex_lock(&ctx->mmu_lock); 8484 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8485 HOST_SPACE_INTERNAL_CB_SZ); 8486 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8487 HOST_SPACE_INTERNAL_CB_SZ); 8488 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8489 mutex_unlock(&ctx->mmu_lock); 8490 8491 gen_pool_destroy(hdev->internal_cb_pool); 8492 8493 hdev->asic_funcs->asic_dma_free_coherent(hdev, 8494 HOST_SPACE_INTERNAL_CB_SZ, 8495 hdev->internal_cb_pool_virt_addr, 8496 hdev->internal_cb_pool_dma_addr); 8497} 8498 8499static int gaudi_ctx_init(struct hl_ctx *ctx) 8500{ 8501 int rc; 8502 8503 if (ctx->asid == HL_KERNEL_ASID_ID) 8504 return 0; 8505 8506 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8507 if (rc) 8508 return rc; 8509 8510 rc = gaudi_restore_user_registers(ctx->hdev); 8511 if (rc) 8512 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8513 8514 return rc; 8515} 8516 8517static void gaudi_ctx_fini(struct hl_ctx *ctx) 8518{ 8519 if (ctx->asid == HL_KERNEL_ASID_ID) 8520 return; 8521 8522 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8523} 8524 8525static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8526{ 8527 return gaudi_cq_assignment[cq_idx]; 8528} 8529 8530static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8531{ 8532 return sizeof(struct packet_msg_short) + 8533 sizeof(struct packet_msg_prot) * 2; 8534} 8535 8536static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8537{ 8538 return sizeof(struct packet_msg_short) * 4 + 8539 sizeof(struct packet_fence) + 8540 sizeof(struct packet_msg_prot) * 2; 8541} 8542 8543static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8544{ 8545 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8546} 8547 8548static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8549 u32 size, bool eb) 8550{ 8551 struct hl_cb *cb = (struct hl_cb *) data; 8552 struct packet_msg_short *pkt; 8553 u32 value, ctl, pkt_size = sizeof(*pkt); 8554 8555 pkt = cb->kernel_address + size; 8556 memset(pkt, 0, pkt_size); 8557 8558 /* Inc by 1, Mode ADD */ 8559 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8560 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8561 8562 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8563 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8564 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8565 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8566 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8567 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8568 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8569 8570 pkt->value = cpu_to_le32(value); 8571 pkt->ctl = cpu_to_le32(ctl); 8572 8573 return size + pkt_size; 8574} 8575 8576static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8577 u16 addr) 8578{ 8579 u32 ctl, pkt_size = sizeof(*pkt); 8580 8581 memset(pkt, 0, pkt_size); 8582 8583 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8584 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8585 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8586 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8587 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8588 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8589 8590 pkt->value = cpu_to_le32(value); 8591 pkt->ctl = cpu_to_le32(ctl); 8592 8593 return pkt_size; 8594} 8595 8596static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8597 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8598 u16 sob_val, u16 mon_id) 8599{ 8600 u64 monitor_base; 8601 u32 ctl, value, pkt_size = sizeof(*pkt); 8602 u16 msg_addr_offset; 8603 u8 mask; 8604 8605 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8606 dev_err(hdev->dev, 8607 "sob_base %u (mask %#x) is not valid\n", 8608 sob_base, sob_mask); 8609 return 0; 8610 } 8611 8612 /* 8613 * monitor_base should be the content of the base0 address registers, 8614 * so it will be added to the msg short offsets 8615 */ 8616 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8617 8618 msg_addr_offset = 8619 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8620 monitor_base; 8621 8622 memset(pkt, 0, pkt_size); 8623 8624 /* Monitor config packet: bind the monitor to a sync object */ 8625 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8626 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8627 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8628 0); /* GREATER OR EQUAL*/ 8629 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8630 8631 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8632 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8633 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8634 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8635 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8636 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8637 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8638 8639 pkt->value = cpu_to_le32(value); 8640 pkt->ctl = cpu_to_le32(ctl); 8641 8642 return pkt_size; 8643} 8644 8645static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8646{ 8647 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8648 8649 memset(pkt, 0, pkt_size); 8650 8651 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8652 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8653 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8654 8655 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8656 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8657 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8658 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8659 8660 pkt->cfg = cpu_to_le32(cfg); 8661 pkt->ctl = cpu_to_le32(ctl); 8662 8663 return pkt_size; 8664} 8665 8666static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8667{ 8668 u32 offset, nic_index; 8669 8670 switch (queue_id) { 8671 case GAUDI_QUEUE_ID_DMA_0_0: 8672 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8673 break; 8674 case GAUDI_QUEUE_ID_DMA_0_1: 8675 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8676 break; 8677 case GAUDI_QUEUE_ID_DMA_0_2: 8678 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8679 break; 8680 case GAUDI_QUEUE_ID_DMA_0_3: 8681 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8682 break; 8683 case GAUDI_QUEUE_ID_DMA_1_0: 8684 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8685 break; 8686 case GAUDI_QUEUE_ID_DMA_1_1: 8687 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8688 break; 8689 case GAUDI_QUEUE_ID_DMA_1_2: 8690 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8691 break; 8692 case GAUDI_QUEUE_ID_DMA_1_3: 8693 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8694 break; 8695 case GAUDI_QUEUE_ID_DMA_5_0: 8696 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8697 break; 8698 case GAUDI_QUEUE_ID_DMA_5_1: 8699 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8700 break; 8701 case GAUDI_QUEUE_ID_DMA_5_2: 8702 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8703 break; 8704 case GAUDI_QUEUE_ID_DMA_5_3: 8705 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8706 break; 8707 case GAUDI_QUEUE_ID_TPC_7_0: 8708 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8709 break; 8710 case GAUDI_QUEUE_ID_TPC_7_1: 8711 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8712 break; 8713 case GAUDI_QUEUE_ID_TPC_7_2: 8714 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8715 break; 8716 case GAUDI_QUEUE_ID_TPC_7_3: 8717 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8718 break; 8719 case GAUDI_QUEUE_ID_NIC_0_0: 8720 case GAUDI_QUEUE_ID_NIC_1_0: 8721 case GAUDI_QUEUE_ID_NIC_2_0: 8722 case GAUDI_QUEUE_ID_NIC_3_0: 8723 case GAUDI_QUEUE_ID_NIC_4_0: 8724 case GAUDI_QUEUE_ID_NIC_5_0: 8725 case GAUDI_QUEUE_ID_NIC_6_0: 8726 case GAUDI_QUEUE_ID_NIC_7_0: 8727 case GAUDI_QUEUE_ID_NIC_8_0: 8728 case GAUDI_QUEUE_ID_NIC_9_0: 8729 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8730 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8731 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8732 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8733 break; 8734 case GAUDI_QUEUE_ID_NIC_0_1: 8735 case GAUDI_QUEUE_ID_NIC_1_1: 8736 case GAUDI_QUEUE_ID_NIC_2_1: 8737 case GAUDI_QUEUE_ID_NIC_3_1: 8738 case GAUDI_QUEUE_ID_NIC_4_1: 8739 case GAUDI_QUEUE_ID_NIC_5_1: 8740 case GAUDI_QUEUE_ID_NIC_6_1: 8741 case GAUDI_QUEUE_ID_NIC_7_1: 8742 case GAUDI_QUEUE_ID_NIC_8_1: 8743 case GAUDI_QUEUE_ID_NIC_9_1: 8744 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8745 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8746 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8747 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8748 break; 8749 case GAUDI_QUEUE_ID_NIC_0_2: 8750 case GAUDI_QUEUE_ID_NIC_1_2: 8751 case GAUDI_QUEUE_ID_NIC_2_2: 8752 case GAUDI_QUEUE_ID_NIC_3_2: 8753 case GAUDI_QUEUE_ID_NIC_4_2: 8754 case GAUDI_QUEUE_ID_NIC_5_2: 8755 case GAUDI_QUEUE_ID_NIC_6_2: 8756 case GAUDI_QUEUE_ID_NIC_7_2: 8757 case GAUDI_QUEUE_ID_NIC_8_2: 8758 case GAUDI_QUEUE_ID_NIC_9_2: 8759 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8760 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8761 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8762 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8763 break; 8764 case GAUDI_QUEUE_ID_NIC_0_3: 8765 case GAUDI_QUEUE_ID_NIC_1_3: 8766 case GAUDI_QUEUE_ID_NIC_2_3: 8767 case GAUDI_QUEUE_ID_NIC_3_3: 8768 case GAUDI_QUEUE_ID_NIC_4_3: 8769 case GAUDI_QUEUE_ID_NIC_5_3: 8770 case GAUDI_QUEUE_ID_NIC_6_3: 8771 case GAUDI_QUEUE_ID_NIC_7_3: 8772 case GAUDI_QUEUE_ID_NIC_8_3: 8773 case GAUDI_QUEUE_ID_NIC_9_3: 8774 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8775 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8776 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8777 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8778 break; 8779 default: 8780 return -EINVAL; 8781 } 8782 8783 *addr = CFG_BASE + offset; 8784 8785 return 0; 8786} 8787 8788static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8789{ 8790 u64 monitor_base; 8791 u32 size = 0; 8792 u16 msg_addr_offset; 8793 8794 /* 8795 * monitor_base should be the content of the base0 address registers, 8796 * so it will be added to the msg short offsets 8797 */ 8798 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8799 8800 /* First monitor config packet: low address of the sync */ 8801 msg_addr_offset = 8802 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8803 monitor_base; 8804 8805 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8806 msg_addr_offset); 8807 8808 /* Second monitor config packet: high address of the sync */ 8809 msg_addr_offset = 8810 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8811 monitor_base; 8812 8813 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8814 msg_addr_offset); 8815 8816 /* 8817 * Third monitor config packet: the payload, i.e. what to write when the 8818 * sync triggers 8819 */ 8820 msg_addr_offset = 8821 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8822 monitor_base; 8823 8824 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8825 8826 return size; 8827} 8828 8829static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8830 struct hl_gen_wait_properties *prop) 8831{ 8832 struct hl_cb *cb = (struct hl_cb *) prop->data; 8833 void *buf = cb->kernel_address; 8834 u64 fence_addr = 0; 8835 u32 size = prop->size; 8836 8837 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8838 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8839 prop->q_idx); 8840 return 0; 8841 } 8842 8843 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8844 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8845 prop->sob_mask, prop->sob_val, prop->mon_id); 8846 size += gaudi_add_fence_pkt(buf + size); 8847 8848 return size; 8849} 8850 8851static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8852{ 8853 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8854 8855 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8856 hw_sob->sob_id); 8857 8858 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8859 hw_sob->sob_id * 4, 0); 8860 8861 kref_init(&hw_sob->kref); 8862} 8863 8864static u64 gaudi_get_device_time(struct hl_device *hdev) 8865{ 8866 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8867 8868 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8869} 8870 8871static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8872 u32 *block_size, u32 *block_id) 8873{ 8874 return -EPERM; 8875} 8876 8877static int gaudi_block_mmap(struct hl_device *hdev, 8878 struct vm_area_struct *vma, 8879 u32 block_id, u32 block_size) 8880{ 8881 return -EPERM; 8882} 8883 8884static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8885{ 8886 struct cpu_dyn_regs *dyn_regs = 8887 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8888 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8889 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8890 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8891 8892 WREG32(irq_handler_offset, 8893 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8894} 8895 8896static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8897{ 8898 switch (pll_idx) { 8899 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8900 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8901 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8902 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8903 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8904 case HL_GAUDI_MME_PLL: return MME_PLL; 8905 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8906 case HL_GAUDI_IF_PLL: return IF_PLL; 8907 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8908 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8909 default: return -EINVAL; 8910 } 8911} 8912 8913static int gaudi_add_sync_to_engine_map_entry( 8914 struct hl_sync_to_engine_map *map, u32 reg_value, 8915 enum hl_sync_engine_type engine_type, u32 engine_id) 8916{ 8917 struct hl_sync_to_engine_map_entry *entry; 8918 8919 /* Reg value represents a partial address of sync object, 8920 * it is used as unique identifier. For this we need to 8921 * clear the cutoff cfg base bits from the value. 8922 */ 8923 if (reg_value == 0 || reg_value == 0xffffffff) 8924 return 0; 8925 reg_value -= lower_32_bits(CFG_BASE); 8926 8927 /* create a new hash entry */ 8928 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8929 if (!entry) 8930 return -ENOMEM; 8931 entry->engine_type = engine_type; 8932 entry->engine_id = engine_id; 8933 entry->sync_id = reg_value; 8934 hash_add(map->tb, &entry->node, reg_value); 8935 8936 return 0; 8937} 8938 8939static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8940 struct hl_sync_to_engine_map *map) 8941{ 8942 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8943 int i, j, rc; 8944 u32 reg_value; 8945 8946 /* Iterate over TPC engines */ 8947 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8948 8949 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8950 sds->props[SP_NEXT_TPC] * i); 8951 8952 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8953 ENGINE_TPC, i); 8954 if (rc) 8955 goto free_sync_to_engine_map; 8956 } 8957 8958 /* Iterate over MME engines */ 8959 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8960 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8961 8962 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8963 sds->props[SP_NEXT_MME] * i + 8964 j * sizeof(u32)); 8965 8966 rc = gaudi_add_sync_to_engine_map_entry( 8967 map, reg_value, ENGINE_MME, 8968 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8969 if (rc) 8970 goto free_sync_to_engine_map; 8971 } 8972 } 8973 8974 /* Iterate over DMA engines */ 8975 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8976 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8977 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8978 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8979 ENGINE_DMA, i); 8980 if (rc) 8981 goto free_sync_to_engine_map; 8982 } 8983 8984 return 0; 8985 8986free_sync_to_engine_map: 8987 hl_state_dump_free_sync_to_engine_map(map); 8988 8989 return rc; 8990} 8991 8992static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8993{ 8994 return FIELD_GET( 8995 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8996 mon->status); 8997} 8998 8999static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 9000{ 9001 const size_t max_write = 10; 9002 u32 gid, mask, sob; 9003 int i, offset; 9004 9005 /* Sync object ID is calculated as follows: 9006 * (8 * group_id + cleared bits in mask) 9007 */ 9008 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 9009 mon->arm_data); 9010 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 9011 mon->arm_data); 9012 9013 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 9014 max_write; mask >>= 1, i++) { 9015 if (!(mask & 1)) { 9016 sob = gid * MONITOR_MAX_SOBS + i; 9017 9018 if (offset > 0) 9019 offset += snprintf(sobs + offset, max_write, 9020 ", "); 9021 9022 offset += snprintf(sobs + offset, max_write, "%u", sob); 9023 } 9024 } 9025} 9026 9027static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 9028 struct hl_device *hdev, 9029 struct hl_mon_state_dump *mon) 9030{ 9031 const char *name; 9032 char scratch_buf1[BIN_REG_STRING_SIZE], 9033 scratch_buf2[BIN_REG_STRING_SIZE]; 9034 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 9035 9036 name = hl_state_dump_get_monitor_name(hdev, mon); 9037 if (!name) 9038 name = ""; 9039 9040 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 9041 9042 return hl_snprintf_resize( 9043 buf, size, offset, 9044 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 9045 mon->id, name, 9046 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 9047 mon->arm_data), 9048 hl_format_as_binary( 9049 scratch_buf1, sizeof(scratch_buf1), 9050 FIELD_GET( 9051 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 9052 mon->arm_data)), 9053 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 9054 mon->arm_data), 9055 mon->wr_data, 9056 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 9057 hl_format_as_binary( 9058 scratch_buf2, sizeof(scratch_buf2), 9059 FIELD_GET( 9060 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 9061 mon->status)), 9062 monitored_sobs); 9063} 9064 9065 9066static int gaudi_print_fences_single_engine( 9067 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 9068 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 9069 size_t *size, size_t *offset) 9070{ 9071 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9072 int rc = -ENOMEM, i; 9073 u32 *statuses, *fences; 9074 9075 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 9076 sizeof(*statuses), GFP_KERNEL); 9077 if (!statuses) 9078 goto out; 9079 9080 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 9081 sds->props[SP_ENGINE_NUM_OF_QUEUES], 9082 sizeof(*fences), GFP_KERNEL); 9083 if (!fences) 9084 goto free_status; 9085 9086 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9087 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9088 9089 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9090 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9091 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9092 9093 /* The actual print */ 9094 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9095 u32 fence_id; 9096 u64 fence_cnt, fence_rdata; 9097 const char *engine_name; 9098 9099 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9100 statuses[i])) 9101 continue; 9102 9103 fence_id = 9104 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9105 fence_cnt = base_offset + CFG_BASE + 9106 sizeof(u32) * 9107 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9108 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9109 sds->props[SP_FENCE0_RDATA_OFFSET]; 9110 engine_name = hl_sync_engine_to_string(engine_type); 9111 9112 rc = hl_snprintf_resize( 9113 buf, size, offset, 9114 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9115 engine_name, engine_id, 9116 i, fence_id, 9117 fence_cnt, engine_name, engine_id, fence_id, i, 9118 fence_rdata, engine_name, engine_id, fence_id, i, 9119 fences[fence_id], 9120 statuses[i]); 9121 if (rc) 9122 goto free_fences; 9123 } 9124 9125 rc = 0; 9126 9127free_fences: 9128 kfree(fences); 9129free_status: 9130 kfree(statuses); 9131out: 9132 return rc; 9133} 9134 9135 9136static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9137 .monitor_valid = gaudi_monitor_valid, 9138 .print_single_monitor = gaudi_print_single_monitor, 9139 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9140 .print_fences_single_engine = gaudi_print_fences_single_engine, 9141}; 9142 9143static void gaudi_state_dump_init(struct hl_device *hdev) 9144{ 9145 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9146 int i; 9147 9148 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9149 hash_add(sds->so_id_to_str_tb, 9150 &gaudi_so_id_to_str[i].node, 9151 gaudi_so_id_to_str[i].id); 9152 9153 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9154 hash_add(sds->monitor_id_to_str_tb, 9155 &gaudi_monitor_id_to_str[i].node, 9156 gaudi_monitor_id_to_str[i].id); 9157 9158 sds->props = gaudi_state_dump_specs_props; 9159 9160 sds->sync_namager_names = gaudi_sync_manager_names; 9161 9162 sds->funcs = gaudi_state_dump_funcs; 9163} 9164 9165static u32 *gaudi_get_stream_master_qid_arr(void) 9166{ 9167 return gaudi_stream_master; 9168} 9169 9170static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info) 9171{ 9172 /* set 0 since multiple pages are not supported */ 9173 info->page_order_bitmask = 0; 9174} 9175 9176static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9177{ 9178 struct hl_device *hdev = dev_get_drvdata(dev); 9179 struct cpucp_info *cpucp_info; 9180 9181 cpucp_info = &hdev->asic_prop.cpucp_info; 9182 9183 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9184} 9185 9186static DEVICE_ATTR_RO(infineon_ver); 9187 9188static struct attribute *gaudi_vrm_dev_attrs[] = { 9189 &dev_attr_infineon_ver.attr, 9190}; 9191 9192static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9193 struct attribute_group *dev_vrm_attr_grp) 9194{ 9195 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9196 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9197} 9198 9199static const struct hl_asic_funcs gaudi_funcs = { 9200 .early_init = gaudi_early_init, 9201 .early_fini = gaudi_early_fini, 9202 .late_init = gaudi_late_init, 9203 .late_fini = gaudi_late_fini, 9204 .sw_init = gaudi_sw_init, 9205 .sw_fini = gaudi_sw_fini, 9206 .hw_init = gaudi_hw_init, 9207 .hw_fini = gaudi_hw_fini, 9208 .halt_engines = gaudi_halt_engines, 9209 .suspend = gaudi_suspend, 9210 .resume = gaudi_resume, 9211 .mmap = gaudi_mmap, 9212 .ring_doorbell = gaudi_ring_doorbell, 9213 .pqe_write = gaudi_pqe_write, 9214 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9215 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9216 .scrub_device_mem = gaudi_scrub_device_mem, 9217 .scrub_device_dram = gaudi_scrub_device_dram, 9218 .get_int_queue_base = gaudi_get_int_queue_base, 9219 .test_queues = gaudi_test_queues, 9220 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9221 .asic_dma_pool_free = gaudi_dma_pool_free, 9222 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9223 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9224 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9225 .cs_parser = gaudi_cs_parser, 9226 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9227 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size, 9228 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9229 .update_eq_ci = gaudi_update_eq_ci, 9230 .context_switch = gaudi_context_switch, 9231 .restore_phase_topology = gaudi_restore_phase_topology, 9232 .debugfs_read_dma = gaudi_debugfs_read_dma, 9233 .add_device_attr = gaudi_add_device_attr, 9234 .handle_eqe = gaudi_handle_eqe, 9235 .get_events_stat = gaudi_get_events_stat, 9236 .read_pte = gaudi_read_pte, 9237 .write_pte = gaudi_write_pte, 9238 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9239 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9240 .mmu_prefetch_cache_range = NULL, 9241 .send_heartbeat = gaudi_send_heartbeat, 9242 .debug_coresight = gaudi_debug_coresight, 9243 .is_device_idle = gaudi_is_device_idle, 9244 .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, 9245 .hw_queues_lock = gaudi_hw_queues_lock, 9246 .hw_queues_unlock = gaudi_hw_queues_unlock, 9247 .get_pci_id = gaudi_get_pci_id, 9248 .get_eeprom_data = gaudi_get_eeprom_data, 9249 .get_monitor_dump = gaudi_get_monitor_dump, 9250 .send_cpu_message = gaudi_send_cpu_message, 9251 .pci_bars_map = gaudi_pci_bars_map, 9252 .init_iatu = gaudi_init_iatu, 9253 .rreg = hl_rreg, 9254 .wreg = hl_wreg, 9255 .halt_coresight = gaudi_halt_coresight, 9256 .ctx_init = gaudi_ctx_init, 9257 .ctx_fini = gaudi_ctx_fini, 9258 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9259 .load_firmware_to_device = gaudi_load_firmware_to_device, 9260 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9261 .get_signal_cb_size = gaudi_get_signal_cb_size, 9262 .get_wait_cb_size = gaudi_get_wait_cb_size, 9263 .gen_signal_cb = gaudi_gen_signal_cb, 9264 .gen_wait_cb = gaudi_gen_wait_cb, 9265 .reset_sob = gaudi_reset_sob, 9266 .reset_sob_group = gaudi_reset_sob_group, 9267 .get_device_time = gaudi_get_device_time, 9268 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9269 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9270 .scramble_addr = hl_mmu_scramble_addr, 9271 .descramble_addr = hl_mmu_descramble_addr, 9272 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9273 .get_hw_block_id = gaudi_get_hw_block_id, 9274 .hw_block_mmap = gaudi_block_mmap, 9275 .enable_events_from_fw = gaudi_enable_events_from_fw, 9276 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9277 .init_firmware_loader = gaudi_init_firmware_loader, 9278 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9279 .state_dump_init = gaudi_state_dump_init, 9280 .get_sob_addr = gaudi_get_sob_addr, 9281 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9282 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9283 .is_valid_dram_page_size = NULL, 9284 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9285 .get_valid_dram_page_orders = gaudi_get_valid_dram_page_orders, 9286 .access_dev_mem = hl_access_dev_mem, 9287 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9288}; 9289 9290/** 9291 * gaudi_set_asic_funcs - set GAUDI function pointers 9292 * 9293 * @hdev: pointer to hl_device structure 9294 * 9295 */ 9296void gaudi_set_asic_funcs(struct hl_device *hdev) 9297{ 9298 hdev->asic_funcs = &gaudi_funcs; 9299}