cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gaudi.c (279049B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3/*
      4 * Copyright 2016-2022 HabanaLabs, Ltd.
      5 * All Rights Reserved.
      6 */
      7
      8#include "gaudiP.h"
      9#include "../include/hw_ip/mmu/mmu_general.h"
     10#include "../include/hw_ip/mmu/mmu_v1_1.h"
     11#include "../include/gaudi/gaudi_masks.h"
     12#include "../include/gaudi/gaudi_fw_if.h"
     13#include "../include/gaudi/gaudi_reg_map.h"
     14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
     15
     16#include <linux/module.h>
     17#include <linux/pci.h>
     18#include <linux/firmware.h>
     19#include <linux/hwmon.h>
     20#include <linux/iommu.h>
     21#include <linux/seq_file.h>
     22
     23/*
     24 * Gaudi security scheme:
     25 *
     26 * 1. Host is protected by:
     27 *        - Range registers
     28 *        - MMU
     29 *
     30 * 2. DDR is protected by:
     31 *        - Range registers (protect the first 512MB)
     32 *
     33 * 3. Configuration is protected by:
     34 *        - Range registers
     35 *        - Protection bits
     36 *
     37 * MMU is always enabled.
     38 *
     39 * QMAN DMA channels 0,1 (PCI DMAN):
     40 *     - DMA is not secured.
     41 *     - PQ and CQ are secured.
     42 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
     43 *                      because of TDMA (tensor DMA). Hence, WREG is always not
     44 *                      secured.
     45 *
     46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
     47 * channel 0 to be secured, execute the DMA and change it back to not secured.
     48 * Currently, the driver doesn't use the DMA while there are compute jobs
     49 * running.
     50 *
     51 * The current use cases for the driver to use the DMA are:
     52 *     - Clear SRAM on context switch (happens on context switch when device is
     53 *       idle)
     54 *     - MMU page tables area clear (happens on init)
     55 *
     56 * QMAN DMA 2-7, TPC, MME, NIC:
     57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
     58 * CQ, CP and the engine are not secured
     59 *
     60 */
     61
     62#define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
     63#define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
     64#define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
     65
     66#define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
     67
     68#define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
     69#define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
     70#define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
     71#define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
     72
     73#define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
     74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
     75#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
     76#define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
     77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
     78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
     79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
     80#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
     81#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
     82
     83#define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
     84
     85#define GAUDI_MAX_STRING_LEN		20
     86
     87#define GAUDI_CB_POOL_CB_CNT		512
     88#define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
     89
     90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
     91
     92#define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
     93
     94#define GAUDI_NUM_OF_QM_ERR_CAUSE	16
     95
     96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
     97
     98#define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
     99
    100#define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
    101		BIT(GAUDI_ENGINE_ID_MME_0) |\
    102		BIT(GAUDI_ENGINE_ID_MME_2) |\
    103		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
    104
    105#define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
    106
    107#define GAUDI_PLL_MAX 10
    108
    109#define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
    110
    111#define MONITOR_SOB_STRING_SIZE		256
    112
    113static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
    114	GAUDI_QUEUE_ID_DMA_0_0,
    115	GAUDI_QUEUE_ID_DMA_0_1,
    116	GAUDI_QUEUE_ID_DMA_0_2,
    117	GAUDI_QUEUE_ID_DMA_0_3,
    118	GAUDI_QUEUE_ID_DMA_1_0,
    119	GAUDI_QUEUE_ID_DMA_1_1,
    120	GAUDI_QUEUE_ID_DMA_1_2,
    121	GAUDI_QUEUE_ID_DMA_1_3
    122};
    123
    124static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
    125		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
    126		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
    127		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
    128		"gaudi cpu eq"
    129};
    130
    131static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
    132	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
    133	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
    134	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
    135	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
    136	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
    137	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
    138	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
    139	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
    140};
    141
    142static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
    143	[0] = GAUDI_QUEUE_ID_DMA_0_0,
    144	[1] = GAUDI_QUEUE_ID_DMA_0_1,
    145	[2] = GAUDI_QUEUE_ID_DMA_0_2,
    146	[3] = GAUDI_QUEUE_ID_DMA_0_3,
    147	[4] = GAUDI_QUEUE_ID_DMA_1_0,
    148	[5] = GAUDI_QUEUE_ID_DMA_1_1,
    149	[6] = GAUDI_QUEUE_ID_DMA_1_2,
    150	[7] = GAUDI_QUEUE_ID_DMA_1_3,
    151};
    152
    153static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
    154	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
    155	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
    156	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
    157	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
    158	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
    159	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
    160	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
    161	[PACKET_FENCE]		= sizeof(struct packet_fence),
    162	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
    163	[PACKET_NOP]		= sizeof(struct packet_nop),
    164	[PACKET_STOP]		= sizeof(struct packet_stop),
    165	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
    166	[PACKET_WAIT]		= sizeof(struct packet_wait),
    167	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
    168};
    169
    170static inline bool validate_packet_id(enum packet_id id)
    171{
    172	switch (id) {
    173	case PACKET_WREG_32:
    174	case PACKET_WREG_BULK:
    175	case PACKET_MSG_LONG:
    176	case PACKET_MSG_SHORT:
    177	case PACKET_CP_DMA:
    178	case PACKET_REPEAT:
    179	case PACKET_MSG_PROT:
    180	case PACKET_FENCE:
    181	case PACKET_LIN_DMA:
    182	case PACKET_NOP:
    183	case PACKET_STOP:
    184	case PACKET_ARB_POINT:
    185	case PACKET_WAIT:
    186	case PACKET_LOAD_AND_EXE:
    187		return true;
    188	default:
    189		return false;
    190	}
    191}
    192
    193static const char * const
    194gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
    195	"tpc_address_exceed_slm",
    196	"tpc_div_by_0",
    197	"tpc_spu_mac_overflow",
    198	"tpc_spu_addsub_overflow",
    199	"tpc_spu_abs_overflow",
    200	"tpc_spu_fp_dst_nan_inf",
    201	"tpc_spu_fp_dst_denorm",
    202	"tpc_vpu_mac_overflow",
    203	"tpc_vpu_addsub_overflow",
    204	"tpc_vpu_abs_overflow",
    205	"tpc_vpu_fp_dst_nan_inf",
    206	"tpc_vpu_fp_dst_denorm",
    207	"tpc_assertions",
    208	"tpc_illegal_instruction",
    209	"tpc_pc_wrap_around",
    210	"tpc_qm_sw_err",
    211	"tpc_hbw_rresp_err",
    212	"tpc_hbw_bresp_err",
    213	"tpc_lbw_rresp_err",
    214	"tpc_lbw_bresp_err"
    215};
    216
    217static const char * const
    218gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
    219	"PQ AXI HBW error",
    220	"CQ AXI HBW error",
    221	"CP AXI HBW error",
    222	"CP error due to undefined OPCODE",
    223	"CP encountered STOP OPCODE",
    224	"CP AXI LBW error",
    225	"CP WRREG32 or WRBULK returned error",
    226	"N/A",
    227	"FENCE 0 inc over max value and clipped",
    228	"FENCE 1 inc over max value and clipped",
    229	"FENCE 2 inc over max value and clipped",
    230	"FENCE 3 inc over max value and clipped",
    231	"FENCE 0 dec under min value and clipped",
    232	"FENCE 1 dec under min value and clipped",
    233	"FENCE 2 dec under min value and clipped",
    234	"FENCE 3 dec under min value and clipped"
    235};
    236
    237static const char * const
    238gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
    239	"Choice push while full error",
    240	"Choice Q watchdog error",
    241	"MSG AXI LBW returned with error"
    242};
    243
    244enum gaudi_sm_sei_cause {
    245	GAUDI_SM_SEI_SO_OVERFLOW,
    246	GAUDI_SM_SEI_LBW_4B_UNALIGNED,
    247	GAUDI_SM_SEI_AXI_RESPONSE_ERR
    248};
    249
    250static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
    251	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
    252	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
    253	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
    254	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
    255	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
    256	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
    257	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
    258	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
    259	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
    260	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
    261	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
    262	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
    263	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
    264	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
    265	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
    266	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
    267	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
    268	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
    269	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
    270	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
    271	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
    272	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
    273	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
    274	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
    275	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
    276	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
    277	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
    278	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
    279	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
    280	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
    281	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
    282	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
    283	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
    284	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
    285	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
    286	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
    287	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
    288	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
    289	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
    290	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
    291	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
    292	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
    293	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
    294	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
    295	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
    296	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
    297	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
    298	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
    299	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
    300	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
    301	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
    302	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
    303	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
    304	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
    305	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
    306	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
    307	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
    308	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
    309	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
    310	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
    311	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
    312	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
    313	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
    314	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
    315	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
    316	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
    317	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
    318	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
    319	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
    320	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
    321	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
    322	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
    323	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
    324	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
    325	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
    326	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
    327	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
    328	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
    329	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
    330	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
    331	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
    332	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
    333	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
    334	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
    335	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
    336	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
    337	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
    338	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
    339	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
    340	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
    341	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
    342	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
    343	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
    344	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
    345	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
    346	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
    347	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
    348	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
    349	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
    350	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
    351	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
    352	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
    353	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
    354	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
    355	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
    356	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
    357	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
    358	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
    359	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
    360	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
    361	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
    362	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
    363	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
    364};
    365
    366static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
    367	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
    368	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
    369	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
    370	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
    371	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
    372	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
    373	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
    374	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
    375	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
    376	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
    377	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
    378	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
    379	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
    380	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
    381	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
    382	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
    383	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
    384	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
    385	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
    386	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
    387	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
    388	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
    389	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
    390	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
    391	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
    392	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
    393	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
    394};
    395
    396static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
    397	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
    398	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
    399	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
    400	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
    401	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
    402	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
    403	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
    404	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
    405	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
    406	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
    407	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
    408};
    409
    410static s64 gaudi_state_dump_specs_props[] = {
    411	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
    412	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
    413	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
    414	[SP_MON_OBJ_WR_ADDR_LOW] =
    415		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
    416	[SP_MON_OBJ_WR_ADDR_HIGH] =
    417		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
    418	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
    419	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
    420	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
    421	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
    422	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
    423	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
    424	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
    425	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
    426	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
    427	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
    428	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
    429	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
    430	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
    431	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
    432	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
    433	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
    434	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
    435	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
    436	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
    437	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
    438	[SP_FENCE0_CNT_OFFSET] =
    439		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
    440	[SP_FENCE0_RDATA_OFFSET] =
    441		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
    442	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
    443	[SP_NUM_CORES] = 1,
    444};
    445
    446/* The order here is opposite to the order of the indexing in the h/w.
    447 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
    448 */
    449static const char * const gaudi_sync_manager_names[] = {
    450	"SYNC_MGR_E_N",
    451	"SYNC_MGR_W_N",
    452	"SYNC_MGR_E_S",
    453	"SYNC_MGR_W_S",
    454	NULL
    455};
    456
    457struct ecc_info_extract_params {
    458	u64 block_address;
    459	u32 num_memories;
    460	bool derr;
    461};
    462
    463static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
    464								u64 phys_addr);
    465static int gaudi_send_job_on_qman0(struct hl_device *hdev,
    466					struct hl_cs_job *job);
    467static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
    468					u32 size, u64 val);
    469static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
    470					u32 num_regs, u32 val);
    471static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
    472				u32 tpc_id);
    473static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
    474static int gaudi_cpucp_info_get(struct hl_device *hdev);
    475static void gaudi_disable_clock_gating(struct hl_device *hdev);
    476static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
    477static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
    478				u32 size, bool eb);
    479static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
    480				struct hl_gen_wait_properties *prop);
    481static inline enum hl_collective_mode
    482get_collective_mode(struct hl_device *hdev, u32 queue_id)
    483{
    484	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
    485		return HL_COLLECTIVE_MASTER;
    486
    487	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
    488			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
    489		return HL_COLLECTIVE_SLAVE;
    490
    491	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
    492			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
    493		return HL_COLLECTIVE_SLAVE;
    494
    495	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
    496			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
    497		return HL_COLLECTIVE_SLAVE;
    498
    499	return HL_COLLECTIVE_NOT_SUPPORTED;
    500}
    501
    502static inline void set_default_power_values(struct hl_device *hdev)
    503{
    504	struct asic_fixed_properties *prop = &hdev->asic_prop;
    505
    506	if (hdev->card_type == cpucp_card_type_pmc) {
    507		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
    508
    509		if (prop->fw_security_enabled)
    510			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
    511		else
    512			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
    513	} else {
    514		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
    515		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
    516	}
    517}
    518
    519static int gaudi_set_fixed_properties(struct hl_device *hdev)
    520{
    521	struct asic_fixed_properties *prop = &hdev->asic_prop;
    522	u32 num_sync_stream_queues = 0;
    523	int i;
    524
    525	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
    526	prop->hw_queues_props = kcalloc(prop->max_queues,
    527			sizeof(struct hw_queue_properties),
    528			GFP_KERNEL);
    529
    530	if (!prop->hw_queues_props)
    531		return -ENOMEM;
    532
    533	for (i = 0 ; i < prop->max_queues ; i++) {
    534		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
    535			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
    536			prop->hw_queues_props[i].driver_only = 0;
    537			prop->hw_queues_props[i].supports_sync_stream = 1;
    538			prop->hw_queues_props[i].cb_alloc_flags =
    539				CB_ALLOC_KERNEL;
    540			num_sync_stream_queues++;
    541		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
    542			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
    543			prop->hw_queues_props[i].driver_only = 1;
    544			prop->hw_queues_props[i].supports_sync_stream = 0;
    545			prop->hw_queues_props[i].cb_alloc_flags =
    546				CB_ALLOC_KERNEL;
    547		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
    548			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
    549			prop->hw_queues_props[i].driver_only = 0;
    550			prop->hw_queues_props[i].supports_sync_stream = 0;
    551			prop->hw_queues_props[i].cb_alloc_flags =
    552				CB_ALLOC_USER;
    553
    554		}
    555		prop->hw_queues_props[i].collective_mode =
    556						get_collective_mode(hdev, i);
    557	}
    558
    559	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
    560	prop->host_base_address = HOST_PHYS_BASE;
    561	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
    562	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
    563	prop->collective_first_sob = 0;
    564	prop->collective_first_mon = 0;
    565
    566	/* 2 SOBs per internal queue stream are reserved for collective */
    567	prop->sync_stream_first_sob =
    568			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
    569			* QMAN_STREAMS * HL_RSVD_SOBS;
    570
    571	/* 1 monitor per internal queue stream are reserved for collective
    572	 * 2 monitors per external queue stream are reserved for collective
    573	 */
    574	prop->sync_stream_first_mon =
    575			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
    576			(NUMBER_OF_EXT_HW_QUEUES * 2);
    577
    578	prop->dram_base_address = DRAM_PHYS_BASE;
    579	prop->dram_size = GAUDI_HBM_SIZE_32GB;
    580	prop->dram_end_address = prop->dram_base_address +
    581					prop->dram_size;
    582	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
    583
    584	prop->sram_base_address = SRAM_BASE_ADDR;
    585	prop->sram_size = SRAM_SIZE;
    586	prop->sram_end_address = prop->sram_base_address +
    587					prop->sram_size;
    588	prop->sram_user_base_address = prop->sram_base_address +
    589					SRAM_USER_BASE_OFFSET;
    590
    591	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
    592	if (hdev->pldm)
    593		prop->mmu_pgt_size = 0x800000; /* 8MB */
    594	else
    595		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
    596	prop->mmu_pte_size = HL_PTE_SIZE;
    597	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
    598	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
    599	prop->dram_page_size = PAGE_SIZE_2MB;
    600	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
    601	prop->dram_supports_virtual_memory = false;
    602
    603	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
    604	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
    605	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
    606	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
    607	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
    608	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
    609	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
    610	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
    611	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
    612	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
    613	prop->pmmu.start_addr = VA_HOST_SPACE_START;
    614	prop->pmmu.end_addr =
    615			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
    616	prop->pmmu.page_size = PAGE_SIZE_4KB;
    617	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
    618	prop->pmmu.last_mask = LAST_MASK;
    619	/* TODO: will be duplicated until implementing per-MMU props */
    620	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
    621	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
    622
    623	/* PMMU and HPMMU are the same except of page size */
    624	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
    625	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
    626
    627	/* shifts and masks are the same in PMMU and DMMU */
    628	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
    629	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
    630	prop->dmmu.end_addr = VA_HOST_SPACE_END;
    631	prop->dmmu.page_size = PAGE_SIZE_2MB;
    632
    633	prop->cfg_size = CFG_SIZE;
    634	prop->max_asid = MAX_ASID;
    635	prop->num_of_events = GAUDI_EVENT_SIZE;
    636	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
    637
    638	set_default_power_values(hdev);
    639
    640	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
    641	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
    642
    643	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
    644	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
    645
    646	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
    647					CARD_NAME_MAX_LEN);
    648
    649	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
    650
    651	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
    652			prop->sync_stream_first_sob +
    653			(num_sync_stream_queues * HL_RSVD_SOBS);
    654	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
    655			prop->sync_stream_first_mon +
    656			(num_sync_stream_queues * HL_RSVD_MONS);
    657
    658	prop->first_available_user_msix_interrupt = USHRT_MAX;
    659
    660	for (i = 0 ; i < HL_MAX_DCORES ; i++)
    661		prop->first_available_cq[i] = USHRT_MAX;
    662
    663	prop->fw_cpu_boot_dev_sts0_valid = false;
    664	prop->fw_cpu_boot_dev_sts1_valid = false;
    665	prop->hard_reset_done_by_fw = false;
    666	prop->gic_interrupts_enable = true;
    667
    668	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
    669
    670	prop->clk_pll_index = HL_GAUDI_MME_PLL;
    671	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
    672
    673	prop->use_get_power_for_reset_history = true;
    674
    675	prop->configurable_stop_on_err = true;
    676
    677	prop->set_max_power_on_device_init = true;
    678
    679	prop->dma_mask = 48;
    680
    681	return 0;
    682}
    683
    684static int gaudi_pci_bars_map(struct hl_device *hdev)
    685{
    686	static const char * const name[] = {"SRAM", "CFG", "HBM"};
    687	bool is_wc[3] = {false, false, true};
    688	int rc;
    689
    690	rc = hl_pci_bars_map(hdev, name, is_wc);
    691	if (rc)
    692		return rc;
    693
    694	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
    695			(CFG_BASE - SPI_FLASH_BASE_ADDR);
    696
    697	return 0;
    698}
    699
    700static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
    701{
    702	struct gaudi_device *gaudi = hdev->asic_specific;
    703	struct hl_inbound_pci_region pci_region;
    704	u64 old_addr = addr;
    705	int rc;
    706
    707	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
    708		return old_addr;
    709
    710	if (hdev->asic_prop.iatu_done_by_fw)
    711		return U64_MAX;
    712
    713	/* Inbound Region 2 - Bar 4 - Point to HBM */
    714	pci_region.mode = PCI_BAR_MATCH_MODE;
    715	pci_region.bar = HBM_BAR_ID;
    716	pci_region.addr = addr;
    717	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
    718	if (rc)
    719		return U64_MAX;
    720
    721	if (gaudi) {
    722		old_addr = gaudi->hbm_bar_cur_addr;
    723		gaudi->hbm_bar_cur_addr = addr;
    724	}
    725
    726	return old_addr;
    727}
    728
    729static int gaudi_init_iatu(struct hl_device *hdev)
    730{
    731	struct hl_inbound_pci_region inbound_region;
    732	struct hl_outbound_pci_region outbound_region;
    733	int rc;
    734
    735	if (hdev->asic_prop.iatu_done_by_fw)
    736		return 0;
    737
    738	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
    739	inbound_region.mode = PCI_BAR_MATCH_MODE;
    740	inbound_region.bar = SRAM_BAR_ID;
    741	inbound_region.addr = SRAM_BASE_ADDR;
    742	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
    743	if (rc)
    744		goto done;
    745
    746	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
    747	inbound_region.mode = PCI_BAR_MATCH_MODE;
    748	inbound_region.bar = CFG_BAR_ID;
    749	inbound_region.addr = SPI_FLASH_BASE_ADDR;
    750	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
    751	if (rc)
    752		goto done;
    753
    754	/* Inbound Region 2 - Bar 4 - Point to HBM */
    755	inbound_region.mode = PCI_BAR_MATCH_MODE;
    756	inbound_region.bar = HBM_BAR_ID;
    757	inbound_region.addr = DRAM_PHYS_BASE;
    758	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
    759	if (rc)
    760		goto done;
    761
    762	/* Outbound Region 0 - Point to Host */
    763	outbound_region.addr = HOST_PHYS_BASE;
    764	outbound_region.size = HOST_PHYS_SIZE;
    765	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
    766
    767done:
    768	return rc;
    769}
    770
    771static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
    772{
    773	return RREG32(mmHW_STATE);
    774}
    775
    776static int gaudi_early_init(struct hl_device *hdev)
    777{
    778	struct asic_fixed_properties *prop = &hdev->asic_prop;
    779	struct pci_dev *pdev = hdev->pdev;
    780	u32 fw_boot_status;
    781	int rc;
    782
    783	rc = gaudi_set_fixed_properties(hdev);
    784	if (rc) {
    785		dev_err(hdev->dev, "Failed setting fixed properties\n");
    786		return rc;
    787	}
    788
    789	/* Check BAR sizes */
    790	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
    791		dev_err(hdev->dev,
    792			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
    793			SRAM_BAR_ID,
    794			(unsigned long long) pci_resource_len(pdev,
    795							SRAM_BAR_ID),
    796			SRAM_BAR_SIZE);
    797		rc = -ENODEV;
    798		goto free_queue_props;
    799	}
    800
    801	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
    802		dev_err(hdev->dev,
    803			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
    804			CFG_BAR_ID,
    805			(unsigned long long) pci_resource_len(pdev,
    806								CFG_BAR_ID),
    807			CFG_BAR_SIZE);
    808		rc = -ENODEV;
    809		goto free_queue_props;
    810	}
    811
    812	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
    813	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
    814
    815	/* If FW security is enabled at this point it means no access to ELBI */
    816	if (hdev->asic_prop.fw_security_enabled) {
    817		hdev->asic_prop.iatu_done_by_fw = true;
    818
    819		/*
    820		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
    821		 * decision can only be taken based on PCI ID security.
    822		 */
    823		hdev->asic_prop.gic_interrupts_enable = false;
    824		goto pci_init;
    825	}
    826
    827	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
    828				&fw_boot_status);
    829	if (rc)
    830		goto free_queue_props;
    831
    832	/* Check whether FW is configuring iATU */
    833	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
    834			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
    835		hdev->asic_prop.iatu_done_by_fw = true;
    836
    837pci_init:
    838	rc = hl_pci_init(hdev);
    839	if (rc)
    840		goto free_queue_props;
    841
    842	/* Before continuing in the initialization, we need to read the preboot
    843	 * version to determine whether we run with a security-enabled firmware
    844	 */
    845	rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
    846					mmCPU_BOOT_DEV_STS0,
    847					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
    848					mmCPU_BOOT_ERR1,
    849					GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
    850	if (rc) {
    851		if (hdev->reset_on_preboot_fail)
    852			hdev->asic_funcs->hw_fini(hdev, true, false);
    853		goto pci_fini;
    854	}
    855
    856	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
    857		dev_info(hdev->dev,
    858			"H/W state is dirty, must reset before initializing\n");
    859		hdev->asic_funcs->hw_fini(hdev, true, false);
    860	}
    861
    862	return 0;
    863
    864pci_fini:
    865	hl_pci_fini(hdev);
    866free_queue_props:
    867	kfree(hdev->asic_prop.hw_queues_props);
    868	return rc;
    869}
    870
    871static int gaudi_early_fini(struct hl_device *hdev)
    872{
    873	kfree(hdev->asic_prop.hw_queues_props);
    874	hl_pci_fini(hdev);
    875
    876	return 0;
    877}
    878
    879/**
    880 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
    881 *
    882 * @hdev: pointer to hl_device structure
    883 *
    884 */
    885static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
    886{
    887	struct asic_fixed_properties *prop = &hdev->asic_prop;
    888	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
    889	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
    890	int rc;
    891
    892	if (hdev->asic_prop.fw_security_enabled) {
    893		struct gaudi_device *gaudi = hdev->asic_specific;
    894
    895		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
    896			return 0;
    897
    898		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
    899
    900		if (rc)
    901			return rc;
    902
    903		freq = pll_freq_arr[2];
    904	} else {
    905		/* Backward compatibility */
    906		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
    907		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
    908		nr = RREG32(mmPSOC_CPU_PLL_NR);
    909		nf = RREG32(mmPSOC_CPU_PLL_NF);
    910		od = RREG32(mmPSOC_CPU_PLL_OD);
    911
    912		if (div_sel == DIV_SEL_REF_CLK ||
    913				div_sel == DIV_SEL_DIVIDED_REF) {
    914			if (div_sel == DIV_SEL_REF_CLK)
    915				freq = PLL_REF_CLK;
    916			else
    917				freq = PLL_REF_CLK / (div_fctr + 1);
    918		} else if (div_sel == DIV_SEL_PLL_CLK ||
    919			div_sel == DIV_SEL_DIVIDED_PLL) {
    920			pll_clk = PLL_REF_CLK * (nf + 1) /
    921					((nr + 1) * (od + 1));
    922			if (div_sel == DIV_SEL_PLL_CLK)
    923				freq = pll_clk;
    924			else
    925				freq = pll_clk / (div_fctr + 1);
    926		} else {
    927			dev_warn(hdev->dev,
    928				"Received invalid div select value: %d",
    929				div_sel);
    930			freq = 0;
    931		}
    932	}
    933
    934	prop->psoc_timestamp_frequency = freq;
    935	prop->psoc_pci_pll_nr = nr;
    936	prop->psoc_pci_pll_nf = nf;
    937	prop->psoc_pci_pll_od = od;
    938	prop->psoc_pci_pll_div_factor = div_fctr;
    939
    940	return 0;
    941}
    942
    943static int _gaudi_init_tpc_mem(struct hl_device *hdev,
    944		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
    945{
    946	struct asic_fixed_properties *prop = &hdev->asic_prop;
    947	struct packet_lin_dma *init_tpc_mem_pkt;
    948	struct hl_cs_job *job;
    949	struct hl_cb *cb;
    950	u64 dst_addr;
    951	u32 cb_size, ctl;
    952	u8 tpc_id;
    953	int rc;
    954
    955	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
    956	if (!cb)
    957		return -EFAULT;
    958
    959	init_tpc_mem_pkt = cb->kernel_address;
    960	cb_size = sizeof(*init_tpc_mem_pkt);
    961	memset(init_tpc_mem_pkt, 0, cb_size);
    962
    963	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
    964
    965	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
    966	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
    967	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
    968	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
    969
    970	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
    971
    972	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
    973	dst_addr = (prop->sram_user_base_address &
    974			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
    975			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
    976	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
    977
    978	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
    979	if (!job) {
    980		dev_err(hdev->dev, "Failed to allocate a new job\n");
    981		rc = -ENOMEM;
    982		goto release_cb;
    983	}
    984
    985	job->id = 0;
    986	job->user_cb = cb;
    987	atomic_inc(&job->user_cb->cs_cnt);
    988	job->user_cb_size = cb_size;
    989	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
    990	job->patched_cb = job->user_cb;
    991	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
    992
    993	hl_debugfs_add_job(hdev, job);
    994
    995	rc = gaudi_send_job_on_qman0(hdev, job);
    996
    997	if (rc)
    998		goto free_job;
    999
   1000	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
   1001		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
   1002		if (rc)
   1003			break;
   1004	}
   1005
   1006free_job:
   1007	hl_userptr_delete_list(hdev, &job->userptr_list);
   1008	hl_debugfs_remove_job(hdev, job);
   1009	kfree(job);
   1010	atomic_dec(&cb->cs_cnt);
   1011
   1012release_cb:
   1013	hl_cb_put(cb);
   1014	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
   1015
   1016	return rc;
   1017}
   1018
   1019/*
   1020 * gaudi_init_tpc_mem() - Initialize TPC memories.
   1021 * @hdev: Pointer to hl_device structure.
   1022 *
   1023 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
   1024 *
   1025 * Return: 0 for success, negative value for error.
   1026 */
   1027static int gaudi_init_tpc_mem(struct hl_device *hdev)
   1028{
   1029	const struct firmware *fw;
   1030	size_t fw_size;
   1031	void *cpu_addr;
   1032	dma_addr_t dma_handle;
   1033	int rc, count = 5;
   1034
   1035again:
   1036	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
   1037	if (rc == -EINTR && count-- > 0) {
   1038		msleep(50);
   1039		goto again;
   1040	}
   1041
   1042	if (rc) {
   1043		dev_err(hdev->dev, "Failed to load firmware file %s\n",
   1044				GAUDI_TPC_FW_FILE);
   1045		goto out;
   1046	}
   1047
   1048	fw_size = fw->size;
   1049	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
   1050			&dma_handle, GFP_KERNEL | __GFP_ZERO);
   1051	if (!cpu_addr) {
   1052		dev_err(hdev->dev,
   1053			"Failed to allocate %zu of dma memory for TPC kernel\n",
   1054			fw_size);
   1055		rc = -ENOMEM;
   1056		goto out;
   1057	}
   1058
   1059	memcpy(cpu_addr, fw->data, fw_size);
   1060
   1061	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
   1062
   1063	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
   1064			dma_handle);
   1065
   1066out:
   1067	release_firmware(fw);
   1068	return rc;
   1069}
   1070
   1071static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
   1072{
   1073	struct gaudi_device *gaudi = hdev->asic_specific;
   1074	struct gaudi_collective_properties *prop = &gaudi->collective_props;
   1075	struct hl_hw_queue *q;
   1076	u32 i, sob_id, sob_group_id, queue_id;
   1077
   1078	/* Iterate through SOB groups and assign a SOB for each slave queue */
   1079	sob_group_id =
   1080		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
   1081	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
   1082
   1083	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
   1084	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
   1085		q = &hdev->kernel_queues[queue_id + (4 * i)];
   1086		q->sync_stream_prop.collective_sob_id = sob_id + i;
   1087	}
   1088
   1089	/* Both DMA5 and TPC7 use the same resources since only a single
   1090	 * engine need to participate in the reduction process
   1091	 */
   1092	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
   1093	q = &hdev->kernel_queues[queue_id];
   1094	q->sync_stream_prop.collective_sob_id =
   1095			sob_id + NIC_NUMBER_OF_ENGINES;
   1096
   1097	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
   1098	q = &hdev->kernel_queues[queue_id];
   1099	q->sync_stream_prop.collective_sob_id =
   1100			sob_id + NIC_NUMBER_OF_ENGINES;
   1101}
   1102
   1103static void gaudi_sob_group_hw_reset(struct kref *ref)
   1104{
   1105	struct gaudi_hw_sob_group *hw_sob_group =
   1106		container_of(ref, struct gaudi_hw_sob_group, kref);
   1107	struct hl_device *hdev = hw_sob_group->hdev;
   1108	int i;
   1109
   1110	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
   1111		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
   1112			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
   1113
   1114	kref_init(&hw_sob_group->kref);
   1115}
   1116
   1117static void gaudi_sob_group_reset_error(struct kref *ref)
   1118{
   1119	struct gaudi_hw_sob_group *hw_sob_group =
   1120		container_of(ref, struct gaudi_hw_sob_group, kref);
   1121	struct hl_device *hdev = hw_sob_group->hdev;
   1122
   1123	dev_crit(hdev->dev,
   1124		"SOB release shouldn't be called here, base_sob_id: %d\n",
   1125		hw_sob_group->base_sob_id);
   1126}
   1127
   1128static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
   1129{
   1130	struct gaudi_collective_properties *prop;
   1131	int i;
   1132
   1133	prop = &gaudi->collective_props;
   1134
   1135	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
   1136
   1137	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
   1138		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
   1139			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
   1140					BIT(i % HL_MAX_SOBS_PER_MONITOR);
   1141	/* Set collective engine bit */
   1142	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
   1143				BIT(i % HL_MAX_SOBS_PER_MONITOR);
   1144}
   1145
   1146static int gaudi_collective_init(struct hl_device *hdev)
   1147{
   1148	u32 i, sob_id, reserved_sobs_per_group;
   1149	struct gaudi_collective_properties *prop;
   1150	struct gaudi_device *gaudi;
   1151
   1152	gaudi = hdev->asic_specific;
   1153	prop = &gaudi->collective_props;
   1154	sob_id = hdev->asic_prop.collective_first_sob;
   1155
   1156	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
   1157	reserved_sobs_per_group =
   1158		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
   1159
   1160	/* Init SOB groups */
   1161	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
   1162		prop->hw_sob_group[i].hdev = hdev;
   1163		prop->hw_sob_group[i].base_sob_id = sob_id;
   1164		sob_id += reserved_sobs_per_group;
   1165		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
   1166	}
   1167
   1168	for (i = 0 ; i < QMAN_STREAMS; i++) {
   1169		prop->next_sob_group_val[i] = 1;
   1170		prop->curr_sob_group_idx[i] = 0;
   1171		gaudi_collective_map_sobs(hdev, i);
   1172	}
   1173
   1174	gaudi_collective_mstr_sob_mask_set(gaudi);
   1175
   1176	return 0;
   1177}
   1178
   1179static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
   1180{
   1181	struct gaudi_device *gaudi = hdev->asic_specific;
   1182	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
   1183
   1184	kref_put(&cprop->hw_sob_group[sob_group].kref,
   1185					gaudi_sob_group_hw_reset);
   1186}
   1187
   1188static void gaudi_collective_master_init_job(struct hl_device *hdev,
   1189		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
   1190{
   1191	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
   1192	struct gaudi_collective_properties *cprop;
   1193	struct hl_gen_wait_properties wait_prop;
   1194	struct hl_sync_stream_properties *prop;
   1195	struct gaudi_device *gaudi;
   1196
   1197	gaudi = hdev->asic_specific;
   1198	cprop = &gaudi->collective_props;
   1199	queue_id = job->hw_queue_id;
   1200	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
   1201
   1202	master_sob_base =
   1203		cprop->hw_sob_group[sob_group_offset].base_sob_id;
   1204	master_monitor = prop->collective_mstr_mon_id[0];
   1205
   1206	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
   1207
   1208	dev_dbg(hdev->dev,
   1209		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
   1210		master_sob_base, cprop->mstr_sob_mask[0],
   1211		cprop->next_sob_group_val[stream],
   1212		master_monitor, queue_id);
   1213
   1214	wait_prop.data = (void *) job->patched_cb;
   1215	wait_prop.sob_base = master_sob_base;
   1216	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
   1217	wait_prop.sob_val = cprop->next_sob_group_val[stream];
   1218	wait_prop.mon_id = master_monitor;
   1219	wait_prop.q_idx = queue_id;
   1220	wait_prop.size = cb_size;
   1221	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
   1222
   1223	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
   1224	master_monitor = prop->collective_mstr_mon_id[1];
   1225
   1226	dev_dbg(hdev->dev,
   1227		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
   1228		master_sob_base, cprop->mstr_sob_mask[1],
   1229		cprop->next_sob_group_val[stream],
   1230		master_monitor, queue_id);
   1231
   1232	wait_prop.sob_base = master_sob_base;
   1233	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
   1234	wait_prop.mon_id = master_monitor;
   1235	wait_prop.size = cb_size;
   1236	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
   1237}
   1238
   1239static void gaudi_collective_slave_init_job(struct hl_device *hdev,
   1240		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
   1241{
   1242	struct hl_gen_wait_properties wait_prop;
   1243	struct hl_sync_stream_properties *prop;
   1244	u32 queue_id, cb_size = 0;
   1245
   1246	queue_id = job->hw_queue_id;
   1247	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
   1248
   1249	if (job->cs->encaps_signals) {
   1250		/* use the encaps signal handle store earlier in the flow
   1251		 * and set the SOB information from the encaps
   1252		 * signals handle
   1253		 */
   1254		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
   1255						cs_cmpl);
   1256
   1257		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
   1258				job->cs->sequence,
   1259				cs_cmpl->hw_sob->sob_id,
   1260				cs_cmpl->sob_val);
   1261	}
   1262
   1263	/* Add to wait CBs using slave monitor */
   1264	wait_prop.data = (void *) job->user_cb;
   1265	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
   1266	wait_prop.sob_mask = 0x1;
   1267	wait_prop.sob_val = cs_cmpl->sob_val;
   1268	wait_prop.mon_id = prop->collective_slave_mon_id;
   1269	wait_prop.q_idx = queue_id;
   1270	wait_prop.size = cb_size;
   1271
   1272	dev_dbg(hdev->dev,
   1273		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
   1274		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
   1275		prop->collective_slave_mon_id, queue_id);
   1276
   1277	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
   1278
   1279	dev_dbg(hdev->dev,
   1280		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
   1281		prop->collective_sob_id, queue_id);
   1282
   1283	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
   1284			prop->collective_sob_id, cb_size, false);
   1285}
   1286
   1287static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
   1288{
   1289	struct hl_cs_compl *signal_cs_cmpl =
   1290		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
   1291	struct hl_cs_compl *cs_cmpl =
   1292		container_of(cs->fence, struct hl_cs_compl, base_fence);
   1293	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
   1294	struct gaudi_collective_properties *cprop;
   1295	u32 stream, queue_id, sob_group_offset;
   1296	struct gaudi_device *gaudi;
   1297	struct hl_device *hdev;
   1298	struct hl_cs_job *job;
   1299	struct hl_ctx *ctx;
   1300
   1301	ctx = cs->ctx;
   1302	hdev = ctx->hdev;
   1303	gaudi = hdev->asic_specific;
   1304	cprop = &gaudi->collective_props;
   1305
   1306	if (cs->encaps_signals) {
   1307		cs_cmpl->hw_sob = handle->hw_sob;
   1308		/* at this checkpoint we only need the hw_sob pointer
   1309		 * for the completion check before start going over the jobs
   1310		 * of the master/slaves, the sob_value will be taken later on
   1311		 * in gaudi_collective_slave_init_job depends on each
   1312		 * job wait offset value.
   1313		 */
   1314		cs_cmpl->sob_val = 0;
   1315	} else {
   1316		/* copy the SOB id and value of the signal CS */
   1317		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
   1318		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
   1319	}
   1320
   1321	/* check again if the signal cs already completed.
   1322	 * if yes then don't send any wait cs since the hw_sob
   1323	 * could be in reset already. if signal is not completed
   1324	 * then get refcount to hw_sob to prevent resetting the sob
   1325	 * while wait cs is not submitted.
   1326	 * note that this check is protected by two locks,
   1327	 * hw queue lock and completion object lock,
   1328	 * and the same completion object lock also protects
   1329	 * the hw_sob reset handler function.
   1330	 * The hw_queue lock prevent out of sync of hw_sob
   1331	 * refcount value, changed by signal/wait flows.
   1332	 */
   1333	spin_lock(&signal_cs_cmpl->lock);
   1334
   1335	if (completion_done(&cs->signal_fence->completion)) {
   1336		spin_unlock(&signal_cs_cmpl->lock);
   1337		return -EINVAL;
   1338	}
   1339	/* Increment kref since all slave queues are now waiting on it */
   1340	kref_get(&cs_cmpl->hw_sob->kref);
   1341
   1342	spin_unlock(&signal_cs_cmpl->lock);
   1343
   1344	/* Calculate the stream from collective master queue (1st job) */
   1345	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
   1346	stream = job->hw_queue_id % 4;
   1347	sob_group_offset =
   1348		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
   1349
   1350	list_for_each_entry(job, &cs->job_list, cs_node) {
   1351		queue_id = job->hw_queue_id;
   1352
   1353		if (hdev->kernel_queues[queue_id].collective_mode ==
   1354				HL_COLLECTIVE_MASTER)
   1355			gaudi_collective_master_init_job(hdev, job, stream,
   1356						sob_group_offset);
   1357		else
   1358			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
   1359	}
   1360
   1361	cs_cmpl->sob_group = sob_group_offset;
   1362
   1363	/* Handle sob group kref and wraparound */
   1364	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
   1365	cprop->next_sob_group_val[stream]++;
   1366
   1367	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
   1368		/*
   1369		 * Decrement as we reached the max value.
   1370		 * The release function won't be called here as we've
   1371		 * just incremented the refcount.
   1372		 */
   1373		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
   1374				gaudi_sob_group_reset_error);
   1375		cprop->next_sob_group_val[stream] = 1;
   1376		/* only two SOBs are currently in use */
   1377		cprop->curr_sob_group_idx[stream] =
   1378			(cprop->curr_sob_group_idx[stream] + 1) &
   1379							(HL_RSVD_SOBS - 1);
   1380
   1381		gaudi_collective_map_sobs(hdev, stream);
   1382
   1383		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
   1384				cprop->curr_sob_group_idx[stream], stream);
   1385	}
   1386
   1387	mb();
   1388	hl_fence_put(cs->signal_fence);
   1389	cs->signal_fence = NULL;
   1390
   1391	return 0;
   1392}
   1393
   1394static int gaudi_collective_wait_create_job(struct hl_device *hdev,
   1395		struct hl_ctx *ctx, struct hl_cs *cs,
   1396		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
   1397		u32 encaps_signal_offset)
   1398{
   1399	struct hw_queue_properties *hw_queue_prop;
   1400	struct hl_cs_counters_atomic *cntr;
   1401	struct hl_cs_job *job;
   1402	struct hl_cb *cb;
   1403	u32 cb_size;
   1404	bool patched_cb;
   1405
   1406	cntr = &hdev->aggregated_cs_counters;
   1407
   1408	if (mode == HL_COLLECTIVE_MASTER) {
   1409		/* CB size of collective master queue contains
   1410		 * 4 msg short packets for monitor 1 configuration
   1411		 * 1 fence packet
   1412		 * 4 msg short packets for monitor 2 configuration
   1413		 * 1 fence packet
   1414		 * 2 msg prot packets for completion and MSI-X
   1415		 */
   1416		cb_size = sizeof(struct packet_msg_short) * 8 +
   1417				sizeof(struct packet_fence) * 2 +
   1418				sizeof(struct packet_msg_prot) * 2;
   1419		patched_cb = true;
   1420	} else {
   1421		/* CB size of collective slave queues contains
   1422		 * 4 msg short packets for monitor configuration
   1423		 * 1 fence packet
   1424		 * 1 additional msg short packet for sob signal
   1425		 */
   1426		cb_size = sizeof(struct packet_msg_short) * 5 +
   1427				sizeof(struct packet_fence);
   1428		patched_cb = false;
   1429	}
   1430
   1431	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
   1432	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
   1433	if (!job) {
   1434		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
   1435		atomic64_inc(&cntr->out_of_mem_drop_cnt);
   1436		dev_err(hdev->dev, "Failed to allocate a new job\n");
   1437		return -ENOMEM;
   1438	}
   1439
   1440	/* Allocate internal mapped CB for non patched CBs */
   1441	cb = hl_cb_kernel_create(hdev, cb_size,
   1442			hdev->mmu_enable && !patched_cb);
   1443	if (!cb) {
   1444		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
   1445		atomic64_inc(&cntr->out_of_mem_drop_cnt);
   1446		kfree(job);
   1447		return -EFAULT;
   1448	}
   1449
   1450	job->id = 0;
   1451	job->cs = cs;
   1452	job->user_cb = cb;
   1453	atomic_inc(&job->user_cb->cs_cnt);
   1454	job->user_cb_size = cb_size;
   1455	job->hw_queue_id = queue_id;
   1456
   1457	/* since its guaranteed to have only one chunk in the collective wait
   1458	 * cs, we can use this chunk to set the encapsulated signal offset
   1459	 * in the jobs.
   1460	 */
   1461	if (cs->encaps_signals)
   1462		job->encaps_sig_wait_offset = encaps_signal_offset;
   1463
   1464	/*
   1465	 * No need in parsing, user CB is the patched CB.
   1466	 * We call hl_cb_destroy() out of two reasons - we don't need
   1467	 * the CB in the CB idr anymore and to decrement its refcount as
   1468	 * it was incremented inside hl_cb_kernel_create().
   1469	 */
   1470	if (patched_cb)
   1471		job->patched_cb = job->user_cb;
   1472	else
   1473		job->patched_cb = NULL;
   1474
   1475	job->job_cb_size = job->user_cb_size;
   1476	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
   1477
   1478	/* increment refcount as for external queues we get completion */
   1479	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
   1480		cs_get(cs);
   1481
   1482	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
   1483
   1484	list_add_tail(&job->cs_node, &cs->job_list);
   1485
   1486	hl_debugfs_add_job(hdev, job);
   1487
   1488	return 0;
   1489}
   1490
   1491static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
   1492		struct hl_ctx *ctx, struct hl_cs *cs,
   1493		u32 wait_queue_id, u32 collective_engine_id,
   1494		u32 encaps_signal_offset)
   1495{
   1496	struct gaudi_device *gaudi = hdev->asic_specific;
   1497	struct hw_queue_properties *hw_queue_prop;
   1498	u32 queue_id, collective_queue, num_jobs;
   1499	u32 stream, nic_queue, nic_idx = 0;
   1500	bool skip;
   1501	int i, rc = 0;
   1502
   1503	/* Verify wait queue id is configured as master */
   1504	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
   1505	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
   1506		dev_err(hdev->dev,
   1507			"Queue %d is not configured as collective master\n",
   1508			wait_queue_id);
   1509		return -EINVAL;
   1510	}
   1511
   1512	/* Verify engine id is supported */
   1513	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
   1514			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
   1515		dev_err(hdev->dev,
   1516			"Collective wait does not support engine %u\n",
   1517			collective_engine_id);
   1518		return -EINVAL;
   1519	}
   1520
   1521	stream = wait_queue_id % 4;
   1522
   1523	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
   1524		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
   1525	else
   1526		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
   1527
   1528	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
   1529	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
   1530
   1531	/* First job goes to the collective master queue, it will wait for
   1532	 * the collective slave queues to finish execution.
   1533	 * The synchronization is done using two monitors:
   1534	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
   1535	 * reduction engine (DMA5/TPC7).
   1536	 *
   1537	 * Rest of the jobs goes to the collective slave queues which will
   1538	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
   1539	 */
   1540	for (i = 0 ; i < num_jobs ; i++) {
   1541		if (i == 0) {
   1542			queue_id = wait_queue_id;
   1543			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
   1544				HL_COLLECTIVE_MASTER, queue_id,
   1545				wait_queue_id, encaps_signal_offset);
   1546		} else {
   1547			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
   1548				if (gaudi->hw_cap_initialized &
   1549					BIT(HW_CAP_NIC_SHIFT + nic_idx))
   1550					skip = false;
   1551				else
   1552					skip = true;
   1553
   1554				queue_id = nic_queue;
   1555				nic_queue += 4;
   1556				nic_idx++;
   1557
   1558				if (skip)
   1559					continue;
   1560			} else {
   1561				queue_id = collective_queue;
   1562			}
   1563
   1564			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
   1565				HL_COLLECTIVE_SLAVE, queue_id,
   1566				wait_queue_id, encaps_signal_offset);
   1567		}
   1568
   1569		if (rc)
   1570			return rc;
   1571	}
   1572
   1573	return rc;
   1574}
   1575
   1576static int gaudi_late_init(struct hl_device *hdev)
   1577{
   1578	struct gaudi_device *gaudi = hdev->asic_specific;
   1579	int rc;
   1580
   1581	rc = gaudi->cpucp_info_get(hdev);
   1582	if (rc) {
   1583		dev_err(hdev->dev, "Failed to get cpucp info\n");
   1584		return rc;
   1585	}
   1586
   1587	if ((hdev->card_type == cpucp_card_type_pci) &&
   1588			(hdev->nic_ports_mask & 0x3)) {
   1589		dev_info(hdev->dev,
   1590			"PCI card detected, only 8 ports are enabled\n");
   1591		hdev->nic_ports_mask &= ~0x3;
   1592
   1593		/* Stop and disable unused NIC QMANs */
   1594		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   1595					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   1596					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   1597
   1598		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   1599					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   1600					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   1601
   1602		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
   1603		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
   1604
   1605		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
   1606	}
   1607
   1608	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
   1609	if (rc) {
   1610		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
   1611		return rc;
   1612	}
   1613
   1614	/* Scrub both SRAM and DRAM */
   1615	rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
   1616	if (rc)
   1617		goto disable_pci_access;
   1618
   1619	rc = gaudi_fetch_psoc_frequency(hdev);
   1620	if (rc) {
   1621		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
   1622		goto disable_pci_access;
   1623	}
   1624
   1625	rc = gaudi_mmu_clear_pgt_range(hdev);
   1626	if (rc) {
   1627		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
   1628		goto disable_pci_access;
   1629	}
   1630
   1631	rc = gaudi_init_tpc_mem(hdev);
   1632	if (rc) {
   1633		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
   1634		goto disable_pci_access;
   1635	}
   1636
   1637	rc = gaudi_collective_init(hdev);
   1638	if (rc) {
   1639		dev_err(hdev->dev, "Failed to init collective\n");
   1640		goto disable_pci_access;
   1641	}
   1642
   1643	/* We only support a single ASID for the user, so for the sake of optimization, just
   1644	 * initialize the ASID one time during device initialization with the fixed value of 1
   1645	 */
   1646	gaudi_mmu_prepare(hdev, 1);
   1647
   1648	hl_fw_set_pll_profile(hdev);
   1649
   1650	return 0;
   1651
   1652disable_pci_access:
   1653	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
   1654
   1655	return rc;
   1656}
   1657
   1658static void gaudi_late_fini(struct hl_device *hdev)
   1659{
   1660	const struct hwmon_channel_info **channel_info_arr;
   1661	int i = 0;
   1662
   1663	if (!hdev->hl_chip_info->info)
   1664		return;
   1665
   1666	channel_info_arr = hdev->hl_chip_info->info;
   1667
   1668	while (channel_info_arr[i]) {
   1669		kfree(channel_info_arr[i]->config);
   1670		kfree(channel_info_arr[i]);
   1671		i++;
   1672	}
   1673
   1674	kfree(channel_info_arr);
   1675
   1676	hdev->hl_chip_info->info = NULL;
   1677}
   1678
   1679static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
   1680{
   1681	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
   1682	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
   1683	int i, j, rc = 0;
   1684
   1685	/*
   1686	 * The device CPU works with 40-bits addresses, while bit 39 must be set
   1687	 * to '1' when accessing the host.
   1688	 * Bits 49:39 of the full host address are saved for a later
   1689	 * configuration of the HW to perform extension to 50 bits.
   1690	 * Because there is a single HW register that holds the extension bits,
   1691	 * these bits must be identical in all allocated range.
   1692	 */
   1693
   1694	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
   1695		virt_addr_arr[i] =
   1696			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
   1697						HL_CPU_ACCESSIBLE_MEM_SIZE,
   1698						&dma_addr_arr[i],
   1699						GFP_KERNEL | __GFP_ZERO);
   1700		if (!virt_addr_arr[i]) {
   1701			rc = -ENOMEM;
   1702			goto free_dma_mem_arr;
   1703		}
   1704
   1705		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
   1706		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
   1707				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
   1708			break;
   1709	}
   1710
   1711	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
   1712		dev_err(hdev->dev,
   1713			"MSB of CPU accessible DMA memory are not identical in all range\n");
   1714		rc = -EFAULT;
   1715		goto free_dma_mem_arr;
   1716	}
   1717
   1718	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
   1719	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
   1720	hdev->cpu_pci_msb_addr =
   1721		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
   1722
   1723	if (!hdev->asic_prop.fw_security_enabled)
   1724		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
   1725
   1726free_dma_mem_arr:
   1727	for (j = 0 ; j < i ; j++)
   1728		hdev->asic_funcs->asic_dma_free_coherent(hdev,
   1729						HL_CPU_ACCESSIBLE_MEM_SIZE,
   1730						virt_addr_arr[j],
   1731						dma_addr_arr[j]);
   1732
   1733	return rc;
   1734}
   1735
   1736static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
   1737{
   1738	struct gaudi_device *gaudi = hdev->asic_specific;
   1739	struct gaudi_internal_qman_info *q;
   1740	u32 i;
   1741
   1742	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
   1743		q = &gaudi->internal_qmans[i];
   1744		if (!q->pq_kernel_addr)
   1745			continue;
   1746		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
   1747							q->pq_kernel_addr,
   1748							q->pq_dma_addr);
   1749	}
   1750}
   1751
   1752static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
   1753{
   1754	struct gaudi_device *gaudi = hdev->asic_specific;
   1755	struct gaudi_internal_qman_info *q;
   1756	int rc, i;
   1757
   1758	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
   1759		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
   1760			continue;
   1761
   1762		q = &gaudi->internal_qmans[i];
   1763
   1764		switch (i) {
   1765		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
   1766			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
   1767			break;
   1768		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
   1769			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
   1770			break;
   1771		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
   1772			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
   1773			break;
   1774		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
   1775			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
   1776			break;
   1777		default:
   1778			dev_err(hdev->dev, "Bad internal queue index %d", i);
   1779			rc = -EINVAL;
   1780			goto free_internal_qmans_pq_mem;
   1781		}
   1782
   1783		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
   1784						hdev, q->pq_size,
   1785						&q->pq_dma_addr,
   1786						GFP_KERNEL | __GFP_ZERO);
   1787		if (!q->pq_kernel_addr) {
   1788			rc = -ENOMEM;
   1789			goto free_internal_qmans_pq_mem;
   1790		}
   1791	}
   1792
   1793	return 0;
   1794
   1795free_internal_qmans_pq_mem:
   1796	gaudi_free_internal_qmans_pq_mem(hdev);
   1797	return rc;
   1798}
   1799
   1800static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
   1801{
   1802	struct asic_fixed_properties *prop = &hdev->asic_prop;
   1803	struct pci_mem_region *region;
   1804
   1805	/* CFG */
   1806	region = &hdev->pci_mem_region[PCI_REGION_CFG];
   1807	region->region_base = CFG_BASE;
   1808	region->region_size = CFG_SIZE;
   1809	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
   1810	region->bar_size = CFG_BAR_SIZE;
   1811	region->bar_id = CFG_BAR_ID;
   1812	region->used = 1;
   1813
   1814	/* SRAM */
   1815	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
   1816	region->region_base = SRAM_BASE_ADDR;
   1817	region->region_size = SRAM_SIZE;
   1818	region->offset_in_bar = 0;
   1819	region->bar_size = SRAM_BAR_SIZE;
   1820	region->bar_id = SRAM_BAR_ID;
   1821	region->used = 1;
   1822
   1823	/* DRAM */
   1824	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
   1825	region->region_base = DRAM_PHYS_BASE;
   1826	region->region_size = hdev->asic_prop.dram_size;
   1827	region->offset_in_bar = 0;
   1828	region->bar_size = prop->dram_pci_bar_size;
   1829	region->bar_id = HBM_BAR_ID;
   1830	region->used = 1;
   1831
   1832	/* SP SRAM */
   1833	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
   1834	region->region_base = PSOC_SCRATCHPAD_ADDR;
   1835	region->region_size = PSOC_SCRATCHPAD_SIZE;
   1836	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
   1837	region->bar_size = CFG_BAR_SIZE;
   1838	region->bar_id = CFG_BAR_ID;
   1839	region->used = 1;
   1840}
   1841
   1842static int gaudi_sw_init(struct hl_device *hdev)
   1843{
   1844	struct gaudi_device *gaudi;
   1845	u32 i, event_id = 0;
   1846	int rc;
   1847
   1848	/* Allocate device structure */
   1849	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
   1850	if (!gaudi)
   1851		return -ENOMEM;
   1852
   1853	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
   1854		if (gaudi_irq_map_table[i].valid) {
   1855			if (event_id == GAUDI_EVENT_SIZE) {
   1856				dev_err(hdev->dev,
   1857					"Event array exceeds the limit of %u events\n",
   1858					GAUDI_EVENT_SIZE);
   1859				rc = -EINVAL;
   1860				goto free_gaudi_device;
   1861			}
   1862
   1863			gaudi->events[event_id++] =
   1864					gaudi_irq_map_table[i].fc_id;
   1865		}
   1866	}
   1867
   1868	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
   1869
   1870	hdev->asic_specific = gaudi;
   1871
   1872	/* Create DMA pool for small allocations */
   1873	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
   1874			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
   1875	if (!hdev->dma_pool) {
   1876		dev_err(hdev->dev, "failed to create DMA pool\n");
   1877		rc = -ENOMEM;
   1878		goto free_gaudi_device;
   1879	}
   1880
   1881	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
   1882	if (rc)
   1883		goto free_dma_pool;
   1884
   1885	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
   1886	if (!hdev->cpu_accessible_dma_pool) {
   1887		dev_err(hdev->dev,
   1888			"Failed to create CPU accessible DMA pool\n");
   1889		rc = -ENOMEM;
   1890		goto free_cpu_dma_mem;
   1891	}
   1892
   1893	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
   1894				(uintptr_t) hdev->cpu_accessible_dma_mem,
   1895				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
   1896	if (rc) {
   1897		dev_err(hdev->dev,
   1898			"Failed to add memory to CPU accessible DMA pool\n");
   1899		rc = -EFAULT;
   1900		goto free_cpu_accessible_dma_pool;
   1901	}
   1902
   1903	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
   1904	if (rc)
   1905		goto free_cpu_accessible_dma_pool;
   1906
   1907	spin_lock_init(&gaudi->hw_queues_lock);
   1908
   1909	hdev->supports_sync_stream = true;
   1910	hdev->supports_coresight = true;
   1911	hdev->supports_staged_submission = true;
   1912	hdev->supports_wait_for_multi_cs = true;
   1913
   1914	hdev->asic_funcs->set_pci_memory_regions(hdev);
   1915	hdev->stream_master_qid_arr =
   1916				hdev->asic_funcs->get_stream_master_qid_arr();
   1917	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
   1918
   1919	return 0;
   1920
   1921free_cpu_accessible_dma_pool:
   1922	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
   1923free_cpu_dma_mem:
   1924	if (!hdev->asic_prop.fw_security_enabled)
   1925		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
   1926					hdev->cpu_pci_msb_addr);
   1927	hdev->asic_funcs->asic_dma_free_coherent(hdev,
   1928			HL_CPU_ACCESSIBLE_MEM_SIZE,
   1929			hdev->cpu_accessible_dma_mem,
   1930			hdev->cpu_accessible_dma_address);
   1931free_dma_pool:
   1932	dma_pool_destroy(hdev->dma_pool);
   1933free_gaudi_device:
   1934	kfree(gaudi);
   1935	return rc;
   1936}
   1937
   1938static int gaudi_sw_fini(struct hl_device *hdev)
   1939{
   1940	struct gaudi_device *gaudi = hdev->asic_specific;
   1941
   1942	gaudi_free_internal_qmans_pq_mem(hdev);
   1943
   1944	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
   1945
   1946	if (!hdev->asic_prop.fw_security_enabled)
   1947		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
   1948					hdev->cpu_pci_msb_addr);
   1949
   1950	hdev->asic_funcs->asic_dma_free_coherent(hdev,
   1951			HL_CPU_ACCESSIBLE_MEM_SIZE,
   1952			hdev->cpu_accessible_dma_mem,
   1953			hdev->cpu_accessible_dma_address);
   1954
   1955	dma_pool_destroy(hdev->dma_pool);
   1956
   1957	kfree(gaudi);
   1958
   1959	return 0;
   1960}
   1961
   1962static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
   1963{
   1964	struct hl_device *hdev = arg;
   1965	int i;
   1966
   1967	if (hdev->disabled)
   1968		return IRQ_HANDLED;
   1969
   1970	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
   1971		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
   1972
   1973	hl_irq_handler_eq(irq, &hdev->event_queue);
   1974
   1975	return IRQ_HANDLED;
   1976}
   1977
   1978/*
   1979 * For backward compatibility, new MSI interrupts should be set after the
   1980 * existing CPU and NIC interrupts.
   1981 */
   1982static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
   1983				bool cpu_eq)
   1984{
   1985	int msi_vec;
   1986
   1987	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
   1988		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
   1989				GAUDI_EVENT_QUEUE_MSI_IDX);
   1990
   1991	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
   1992			(nr + NIC_NUMBER_OF_ENGINES + 1);
   1993
   1994	return pci_irq_vector(hdev->pdev, msi_vec);
   1995}
   1996
   1997static int gaudi_enable_msi_single(struct hl_device *hdev)
   1998{
   1999	int rc, irq;
   2000
   2001	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
   2002
   2003	irq = gaudi_pci_irq_vector(hdev, 0, false);
   2004	rc = request_irq(irq, gaudi_irq_handler_single, 0,
   2005			"gaudi single msi", hdev);
   2006	if (rc)
   2007		dev_err(hdev->dev,
   2008			"Failed to request single MSI IRQ\n");
   2009
   2010	return rc;
   2011}
   2012
   2013static int gaudi_enable_msi_multi(struct hl_device *hdev)
   2014{
   2015	int cq_cnt = hdev->asic_prop.completion_queues_count;
   2016	int rc, i, irq_cnt_init, irq;
   2017
   2018	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
   2019		irq = gaudi_pci_irq_vector(hdev, i, false);
   2020		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
   2021				&hdev->completion_queue[i]);
   2022		if (rc) {
   2023			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
   2024			goto free_irqs;
   2025		}
   2026	}
   2027
   2028	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
   2029	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
   2030				&hdev->event_queue);
   2031	if (rc) {
   2032		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
   2033		goto free_irqs;
   2034	}
   2035
   2036	return 0;
   2037
   2038free_irqs:
   2039	for (i = 0 ; i < irq_cnt_init ; i++)
   2040		free_irq(gaudi_pci_irq_vector(hdev, i, false),
   2041				&hdev->completion_queue[i]);
   2042	return rc;
   2043}
   2044
   2045static int gaudi_enable_msi(struct hl_device *hdev)
   2046{
   2047	struct gaudi_device *gaudi = hdev->asic_specific;
   2048	int rc;
   2049
   2050	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
   2051		return 0;
   2052
   2053	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
   2054	if (rc < 0) {
   2055		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
   2056		return rc;
   2057	}
   2058
   2059	if (rc < NUMBER_OF_INTERRUPTS) {
   2060		gaudi->multi_msi_mode = false;
   2061		rc = gaudi_enable_msi_single(hdev);
   2062	} else {
   2063		gaudi->multi_msi_mode = true;
   2064		rc = gaudi_enable_msi_multi(hdev);
   2065	}
   2066
   2067	if (rc)
   2068		goto free_pci_irq_vectors;
   2069
   2070	gaudi->hw_cap_initialized |= HW_CAP_MSI;
   2071
   2072	return 0;
   2073
   2074free_pci_irq_vectors:
   2075	pci_free_irq_vectors(hdev->pdev);
   2076	return rc;
   2077}
   2078
   2079static void gaudi_sync_irqs(struct hl_device *hdev)
   2080{
   2081	struct gaudi_device *gaudi = hdev->asic_specific;
   2082	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
   2083
   2084	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
   2085		return;
   2086
   2087	/* Wait for all pending IRQs to be finished */
   2088	if (gaudi->multi_msi_mode) {
   2089		for (i = 0 ; i < cq_cnt ; i++)
   2090			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
   2091
   2092		synchronize_irq(gaudi_pci_irq_vector(hdev,
   2093						GAUDI_EVENT_QUEUE_MSI_IDX,
   2094						true));
   2095	} else {
   2096		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
   2097	}
   2098}
   2099
   2100static void gaudi_disable_msi(struct hl_device *hdev)
   2101{
   2102	struct gaudi_device *gaudi = hdev->asic_specific;
   2103	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
   2104
   2105	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
   2106		return;
   2107
   2108	gaudi_sync_irqs(hdev);
   2109
   2110	if (gaudi->multi_msi_mode) {
   2111		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
   2112						true);
   2113		free_irq(irq, &hdev->event_queue);
   2114
   2115		for (i = 0 ; i < cq_cnt ; i++) {
   2116			irq = gaudi_pci_irq_vector(hdev, i, false);
   2117			free_irq(irq, &hdev->completion_queue[i]);
   2118		}
   2119	} else {
   2120		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
   2121	}
   2122
   2123	pci_free_irq_vectors(hdev->pdev);
   2124
   2125	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
   2126}
   2127
   2128static void gaudi_init_scrambler_sram(struct hl_device *hdev)
   2129{
   2130	struct gaudi_device *gaudi = hdev->asic_specific;
   2131
   2132	if (hdev->asic_prop.fw_security_enabled)
   2133		return;
   2134
   2135	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
   2136						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
   2137		return;
   2138
   2139	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
   2140		return;
   2141
   2142	if (!hdev->sram_scrambler_enable)
   2143		return;
   2144
   2145	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
   2146			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2147	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
   2148			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2149	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
   2150			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2151	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
   2152			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2153	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
   2154			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2155	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
   2156			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2157	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
   2158			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2159	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
   2160			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2161
   2162	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
   2163			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2164	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
   2165			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2166	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
   2167			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2168	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
   2169			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2170	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
   2171			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2172	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
   2173			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2174	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
   2175			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2176	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
   2177			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
   2178
   2179	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
   2180			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2181	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
   2182			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2183	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
   2184			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2185	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
   2186			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2187	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
   2188			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2189	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
   2190			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2191	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
   2192			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2193	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
   2194			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
   2195
   2196	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
   2197}
   2198
   2199static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
   2200{
   2201	struct gaudi_device *gaudi = hdev->asic_specific;
   2202
   2203	if (hdev->asic_prop.fw_security_enabled)
   2204		return;
   2205
   2206	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
   2207					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
   2208		return;
   2209
   2210	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
   2211		return;
   2212
   2213	if (!hdev->dram_scrambler_enable)
   2214		return;
   2215
   2216	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
   2217			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2218	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
   2219			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2220	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
   2221			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2222	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
   2223			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2224	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
   2225			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2226	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
   2227			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2228	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
   2229			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2230	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
   2231			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2232
   2233	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
   2234			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2235	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
   2236			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2237	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
   2238			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2239	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
   2240			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2241	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
   2242			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2243	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
   2244			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2245	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
   2246			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2247	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
   2248			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
   2249
   2250	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
   2251			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2252	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
   2253			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2254	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
   2255			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2256	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
   2257			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2258	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
   2259			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2260	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
   2261			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2262	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
   2263			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2264	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
   2265			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
   2266
   2267	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
   2268}
   2269
   2270static void gaudi_init_e2e(struct hl_device *hdev)
   2271{
   2272	if (hdev->asic_prop.fw_security_enabled)
   2273		return;
   2274
   2275	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
   2276					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
   2277		return;
   2278
   2279	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
   2280	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
   2281	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
   2282	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
   2283
   2284	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
   2285	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
   2286	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
   2287	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
   2288
   2289	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
   2290	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
   2291	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
   2292	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
   2293
   2294	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
   2295	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
   2296	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
   2297	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
   2298
   2299	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
   2300	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
   2301	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
   2302	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
   2303
   2304	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
   2305	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
   2306	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
   2307	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
   2308
   2309	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
   2310	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
   2311	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
   2312	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
   2313
   2314	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
   2315	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
   2316	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
   2317	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
   2318
   2319	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
   2320	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
   2321	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
   2322	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
   2323
   2324	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
   2325	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
   2326	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
   2327	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
   2328
   2329	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
   2330	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
   2331	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
   2332	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
   2333
   2334	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
   2335	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
   2336	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
   2337	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
   2338
   2339	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
   2340	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
   2341	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
   2342	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
   2343
   2344	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
   2345	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
   2346	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
   2347	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
   2348
   2349	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
   2350	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
   2351	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
   2352	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
   2353
   2354	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
   2355	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
   2356	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
   2357	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
   2358
   2359	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
   2360	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
   2361	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
   2362	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
   2363
   2364	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
   2365	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
   2366	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
   2367	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
   2368
   2369	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
   2370	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
   2371	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
   2372	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
   2373
   2374	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
   2375	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
   2376	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
   2377	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
   2378
   2379	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
   2380	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
   2381	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
   2382	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
   2383
   2384	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
   2385	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
   2386	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
   2387	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
   2388
   2389	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
   2390	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
   2391	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
   2392	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
   2393
   2394	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
   2395	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
   2396	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
   2397	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
   2398
   2399	if (!hdev->dram_scrambler_enable) {
   2400		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
   2401		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
   2402		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
   2403		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
   2404
   2405		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
   2406		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
   2407		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
   2408		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
   2409
   2410		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
   2411		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
   2412		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
   2413		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
   2414
   2415		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
   2416		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
   2417		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
   2418		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
   2419
   2420		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
   2421		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
   2422		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
   2423		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
   2424
   2425		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
   2426		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
   2427		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
   2428		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
   2429
   2430		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
   2431		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
   2432		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
   2433		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
   2434
   2435		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
   2436		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
   2437		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
   2438		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
   2439
   2440		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
   2441		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
   2442		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
   2443		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
   2444
   2445		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
   2446		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
   2447		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
   2448		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
   2449
   2450		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
   2451		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
   2452		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
   2453		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
   2454
   2455		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
   2456		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
   2457		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
   2458		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
   2459
   2460		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
   2461		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
   2462		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
   2463		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
   2464
   2465		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
   2466		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
   2467		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
   2468		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
   2469
   2470		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
   2471		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
   2472		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
   2473		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
   2474
   2475		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
   2476		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
   2477		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
   2478		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
   2479
   2480		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
   2481		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
   2482		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
   2483		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
   2484
   2485		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
   2486		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
   2487		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
   2488		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
   2489
   2490		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
   2491		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
   2492		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
   2493		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
   2494
   2495		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
   2496		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
   2497		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
   2498		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
   2499
   2500		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
   2501		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
   2502		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
   2503		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
   2504
   2505		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
   2506		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
   2507		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
   2508		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
   2509
   2510		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
   2511		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
   2512		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
   2513		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
   2514
   2515		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
   2516		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
   2517		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
   2518		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
   2519	}
   2520
   2521	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
   2522			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2523	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
   2524			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2525
   2526	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
   2527			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2528	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
   2529			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2530
   2531	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
   2532			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2533	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
   2534			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2535
   2536	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
   2537			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2538	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
   2539			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2540
   2541	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
   2542			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2543	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
   2544			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2545
   2546	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
   2547			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2548	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
   2549			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2550
   2551	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
   2552			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2553	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
   2554			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2555
   2556	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
   2557			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2558	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
   2559			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2560
   2561	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
   2562			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2563	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
   2564			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2565
   2566	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
   2567			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2568	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
   2569			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2570
   2571	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
   2572			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2573	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
   2574			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2575
   2576	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
   2577			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2578	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
   2579			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2580
   2581	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
   2582			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2583	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
   2584			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2585
   2586	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
   2587			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2588	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
   2589			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2590
   2591	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
   2592			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2593	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
   2594			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2595
   2596	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
   2597			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
   2598	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
   2599			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
   2600
   2601	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
   2602			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2603	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
   2604			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2605
   2606	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
   2607			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2608	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
   2609			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2610
   2611	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
   2612			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2613	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
   2614			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2615
   2616	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
   2617			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2618	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
   2619			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2620
   2621	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
   2622			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2623	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
   2624			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2625
   2626	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
   2627			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2628	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
   2629			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2630
   2631	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
   2632			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2633	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
   2634			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2635
   2636	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
   2637			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
   2638	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
   2639			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
   2640}
   2641
   2642static void gaudi_init_hbm_cred(struct hl_device *hdev)
   2643{
   2644	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
   2645
   2646	if (hdev->asic_prop.fw_security_enabled)
   2647		return;
   2648
   2649	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
   2650						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
   2651		return;
   2652
   2653	hbm0_wr = 0x33333333;
   2654	hbm0_rd = 0x77777777;
   2655	hbm1_wr = 0x55555555;
   2656	hbm1_rd = 0xDDDDDDDD;
   2657
   2658	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
   2659	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
   2660	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
   2661	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
   2662
   2663	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
   2664	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
   2665	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
   2666	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
   2667
   2668	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
   2669	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
   2670	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
   2671	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
   2672
   2673	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
   2674	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
   2675	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
   2676	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
   2677
   2678	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
   2679			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2680			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2681	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
   2682			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2683			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2684	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
   2685			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2686			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2687	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
   2688			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2689			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2690
   2691	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
   2692			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2693			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2694	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
   2695			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2696			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2697	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
   2698			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2699			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2700	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
   2701			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
   2702			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
   2703}
   2704
   2705static void gaudi_init_golden_registers(struct hl_device *hdev)
   2706{
   2707	u32 tpc_offset;
   2708	int tpc_id, i;
   2709
   2710	gaudi_init_e2e(hdev);
   2711	gaudi_init_hbm_cred(hdev);
   2712
   2713	for (tpc_id = 0, tpc_offset = 0;
   2714				tpc_id < TPC_NUMBER_OF_ENGINES;
   2715				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
   2716		/* Mask all arithmetic interrupts from TPC */
   2717		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
   2718		/* Set 16 cache lines */
   2719		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
   2720				ICACHE_FETCH_LINE_NUM, 2);
   2721	}
   2722
   2723	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
   2724	for (i = 0 ; i < 128 ; i += 8)
   2725		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
   2726
   2727	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
   2728	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
   2729	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
   2730	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
   2731}
   2732
   2733static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
   2734					int qman_id, dma_addr_t qman_pq_addr)
   2735{
   2736	struct cpu_dyn_regs *dyn_regs =
   2737			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   2738	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
   2739	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
   2740	u32 q_off, dma_qm_offset;
   2741	u32 dma_qm_err_cfg, irq_handler_offset;
   2742
   2743	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   2744
   2745	mtr_base_en_lo = lower_32_bits(CFG_BASE +
   2746				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2747	mtr_base_en_hi = upper_32_bits(CFG_BASE +
   2748				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2749	so_base_en_lo = lower_32_bits(CFG_BASE +
   2750				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2751	so_base_en_hi = upper_32_bits(CFG_BASE +
   2752				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2753	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
   2754				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2755	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
   2756				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2757	so_base_ws_lo = lower_32_bits(CFG_BASE +
   2758				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2759	so_base_ws_hi = upper_32_bits(CFG_BASE +
   2760				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2761
   2762	q_off = dma_qm_offset + qman_id * 4;
   2763
   2764	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
   2765	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
   2766
   2767	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
   2768	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
   2769	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
   2770
   2771	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
   2772	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   2773							QMAN_LDMA_SRC_OFFSET);
   2774	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   2775							QMAN_LDMA_DST_OFFSET);
   2776
   2777	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
   2778	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
   2779	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
   2780	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
   2781	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
   2782	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
   2783	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
   2784	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
   2785
   2786	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
   2787
   2788	/* The following configuration is needed only once per QMAN */
   2789	if (qman_id == 0) {
   2790		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   2791				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   2792				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
   2793
   2794		/* Configure RAZWI IRQ */
   2795		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
   2796		if (hdev->stop_on_err)
   2797			dma_qm_err_cfg |=
   2798				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
   2799
   2800		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
   2801
   2802		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
   2803			lower_32_bits(CFG_BASE + irq_handler_offset));
   2804		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
   2805			upper_32_bits(CFG_BASE + irq_handler_offset));
   2806
   2807		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
   2808			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
   2809									dma_id);
   2810
   2811		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
   2812				QM_ARB_ERR_MSG_EN_MASK);
   2813
   2814		/* Set timeout to maximum */
   2815		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
   2816
   2817		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
   2818				QMAN_EXTERNAL_MAKE_TRUSTED);
   2819
   2820		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
   2821	}
   2822}
   2823
   2824static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
   2825{
   2826	struct cpu_dyn_regs *dyn_regs =
   2827			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   2828	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
   2829	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
   2830	u32 irq_handler_offset;
   2831
   2832	/* Set to maximum possible according to physical size */
   2833	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
   2834	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
   2835
   2836	/* WA for H/W bug H3-2116 */
   2837	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
   2838
   2839	/* STOP_ON bit implies no completion to operation in case of RAZWI */
   2840	if (hdev->stop_on_err)
   2841		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
   2842
   2843	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
   2844
   2845	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   2846			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   2847			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
   2848
   2849	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
   2850		lower_32_bits(CFG_BASE + irq_handler_offset));
   2851	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
   2852		upper_32_bits(CFG_BASE + irq_handler_offset));
   2853
   2854	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
   2855		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
   2856	WREG32(mmDMA0_CORE_PROT + dma_offset,
   2857			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
   2858	/* If the channel is secured, it should be in MMU bypass mode */
   2859	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
   2860			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
   2861	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
   2862}
   2863
   2864static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
   2865				u32 enable_mask)
   2866{
   2867	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   2868
   2869	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
   2870}
   2871
   2872static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
   2873{
   2874	struct gaudi_device *gaudi = hdev->asic_specific;
   2875	struct hl_hw_queue *q;
   2876	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
   2877
   2878	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
   2879		return;
   2880
   2881	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
   2882		dma_id = gaudi_dma_assignment[i];
   2883		/*
   2884		 * For queues after the CPU Q need to add 1 to get the correct
   2885		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
   2886		 * order to get the correct MSI register.
   2887		 */
   2888		if (dma_id > 1) {
   2889			cpu_skip = 1;
   2890			nic_skip = NIC_NUMBER_OF_ENGINES;
   2891		} else {
   2892			cpu_skip = 0;
   2893			nic_skip = 0;
   2894		}
   2895
   2896		for (j = 0 ; j < QMAN_STREAMS ; j++) {
   2897			q_idx = 4 * dma_id + j + cpu_skip;
   2898			q = &hdev->kernel_queues[q_idx];
   2899			q->cq_id = cq_id++;
   2900			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
   2901			gaudi_init_pci_dma_qman(hdev, dma_id, j,
   2902						q->bus_address);
   2903		}
   2904
   2905		gaudi_init_dma_core(hdev, dma_id);
   2906
   2907		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
   2908	}
   2909
   2910	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
   2911}
   2912
   2913static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
   2914					int qman_id, u64 qman_base_addr)
   2915{
   2916	struct cpu_dyn_regs *dyn_regs =
   2917			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   2918	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
   2919	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
   2920	u32 dma_qm_err_cfg, irq_handler_offset;
   2921	u32 q_off, dma_qm_offset;
   2922
   2923	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   2924
   2925	mtr_base_en_lo = lower_32_bits(CFG_BASE +
   2926			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2927	mtr_base_en_hi = upper_32_bits(CFG_BASE +
   2928				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2929	so_base_en_lo = lower_32_bits(CFG_BASE +
   2930				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2931	so_base_en_hi = upper_32_bits(CFG_BASE +
   2932				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2933	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
   2934				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2935	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
   2936				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   2937	so_base_ws_lo = lower_32_bits(CFG_BASE +
   2938				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2939	so_base_ws_hi = upper_32_bits(CFG_BASE +
   2940				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   2941
   2942	q_off = dma_qm_offset + qman_id * 4;
   2943
   2944	if (qman_id < 4) {
   2945		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
   2946					lower_32_bits(qman_base_addr));
   2947		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
   2948					upper_32_bits(qman_base_addr));
   2949
   2950		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
   2951		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
   2952		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
   2953
   2954		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   2955							QMAN_CPDMA_SIZE_OFFSET);
   2956		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   2957							QMAN_CPDMA_SRC_OFFSET);
   2958		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   2959							QMAN_CPDMA_DST_OFFSET);
   2960	} else {
   2961		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   2962				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   2963				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
   2964
   2965		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   2966							QMAN_LDMA_SIZE_OFFSET);
   2967		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   2968							QMAN_LDMA_SRC_OFFSET);
   2969		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   2970							QMAN_LDMA_DST_OFFSET);
   2971
   2972		/* Configure RAZWI IRQ */
   2973		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
   2974		if (hdev->stop_on_err)
   2975			dma_qm_err_cfg |=
   2976				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
   2977
   2978		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
   2979
   2980		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
   2981			lower_32_bits(CFG_BASE + irq_handler_offset));
   2982		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
   2983			upper_32_bits(CFG_BASE + irq_handler_offset));
   2984
   2985		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
   2986			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
   2987									dma_id);
   2988
   2989		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
   2990				QM_ARB_ERR_MSG_EN_MASK);
   2991
   2992		/* Set timeout to maximum */
   2993		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
   2994
   2995		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
   2996		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
   2997				QMAN_INTERNAL_MAKE_TRUSTED);
   2998	}
   2999
   3000	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
   3001	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
   3002	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
   3003	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
   3004
   3005	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
   3006	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
   3007		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
   3008				mtr_base_ws_lo);
   3009		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
   3010				mtr_base_ws_hi);
   3011		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
   3012				so_base_ws_lo);
   3013		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
   3014				so_base_ws_hi);
   3015	}
   3016}
   3017
   3018static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
   3019{
   3020	struct gaudi_device *gaudi = hdev->asic_specific;
   3021	struct gaudi_internal_qman_info *q;
   3022	u64 qman_base_addr;
   3023	int i, j, dma_id, internal_q_index;
   3024
   3025	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
   3026		return;
   3027
   3028	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
   3029		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
   3030
   3031		for (j = 0 ; j < QMAN_STREAMS ; j++) {
   3032			 /*
   3033			  * Add the CPU queue in order to get the correct queue
   3034			  * number as all internal queue are placed after it
   3035			  */
   3036			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
   3037
   3038			q = &gaudi->internal_qmans[internal_q_index];
   3039			qman_base_addr = (u64) q->pq_dma_addr;
   3040			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
   3041						qman_base_addr);
   3042		}
   3043
   3044		/* Initializing lower CP for HBM DMA QMAN */
   3045		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
   3046
   3047		gaudi_init_dma_core(hdev, dma_id);
   3048
   3049		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
   3050	}
   3051
   3052	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
   3053}
   3054
   3055static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
   3056					int qman_id, u64 qman_base_addr)
   3057{
   3058	struct cpu_dyn_regs *dyn_regs =
   3059			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   3060	u32 mtr_base_lo, mtr_base_hi;
   3061	u32 so_base_lo, so_base_hi;
   3062	u32 irq_handler_offset;
   3063	u32 q_off, mme_id;
   3064	u32 mme_qm_err_cfg;
   3065
   3066	mtr_base_lo = lower_32_bits(CFG_BASE +
   3067				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3068	mtr_base_hi = upper_32_bits(CFG_BASE +
   3069				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3070	so_base_lo = lower_32_bits(CFG_BASE +
   3071				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3072	so_base_hi = upper_32_bits(CFG_BASE +
   3073				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3074
   3075	q_off = mme_offset + qman_id * 4;
   3076
   3077	if (qman_id < 4) {
   3078		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
   3079					lower_32_bits(qman_base_addr));
   3080		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
   3081					upper_32_bits(qman_base_addr));
   3082
   3083		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
   3084		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
   3085		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
   3086
   3087		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   3088							QMAN_CPDMA_SIZE_OFFSET);
   3089		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   3090							QMAN_CPDMA_SRC_OFFSET);
   3091		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   3092							QMAN_CPDMA_DST_OFFSET);
   3093	} else {
   3094		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   3095				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   3096				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
   3097
   3098		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   3099							QMAN_LDMA_SIZE_OFFSET);
   3100		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   3101							QMAN_LDMA_SRC_OFFSET);
   3102		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   3103							QMAN_LDMA_DST_OFFSET);
   3104
   3105		/* Configure RAZWI IRQ */
   3106		mme_id = mme_offset /
   3107				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
   3108
   3109		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
   3110		if (hdev->stop_on_err)
   3111			mme_qm_err_cfg |=
   3112				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
   3113
   3114		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
   3115
   3116		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
   3117			lower_32_bits(CFG_BASE + irq_handler_offset));
   3118		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
   3119			upper_32_bits(CFG_BASE + irq_handler_offset));
   3120
   3121		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
   3122			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
   3123									mme_id);
   3124
   3125		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
   3126				QM_ARB_ERR_MSG_EN_MASK);
   3127
   3128		/* Set timeout to maximum */
   3129		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
   3130
   3131		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
   3132		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
   3133				QMAN_INTERNAL_MAKE_TRUSTED);
   3134	}
   3135
   3136	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
   3137	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
   3138	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
   3139	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
   3140}
   3141
   3142static void gaudi_init_mme_qmans(struct hl_device *hdev)
   3143{
   3144	struct gaudi_device *gaudi = hdev->asic_specific;
   3145	struct gaudi_internal_qman_info *q;
   3146	u64 qman_base_addr;
   3147	u32 mme_offset;
   3148	int i, internal_q_index;
   3149
   3150	if (gaudi->hw_cap_initialized & HW_CAP_MME)
   3151		return;
   3152
   3153	/*
   3154	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
   3155	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
   3156	 */
   3157
   3158	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
   3159
   3160	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
   3161		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
   3162		q = &gaudi->internal_qmans[internal_q_index];
   3163		qman_base_addr = (u64) q->pq_dma_addr;
   3164		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
   3165					qman_base_addr);
   3166		if (i == 3)
   3167			mme_offset = 0;
   3168	}
   3169
   3170	/* Initializing lower CP for MME QMANs */
   3171	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
   3172	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
   3173	gaudi_init_mme_qman(hdev, 0, 4, 0);
   3174
   3175	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
   3176	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
   3177
   3178	gaudi->hw_cap_initialized |= HW_CAP_MME;
   3179}
   3180
   3181static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
   3182				int qman_id, u64 qman_base_addr)
   3183{
   3184	struct cpu_dyn_regs *dyn_regs =
   3185			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   3186	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
   3187	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
   3188	u32 tpc_qm_err_cfg, irq_handler_offset;
   3189	u32 q_off, tpc_id;
   3190
   3191	mtr_base_en_lo = lower_32_bits(CFG_BASE +
   3192			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3193	mtr_base_en_hi = upper_32_bits(CFG_BASE +
   3194				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3195	so_base_en_lo = lower_32_bits(CFG_BASE +
   3196				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3197	so_base_en_hi = upper_32_bits(CFG_BASE +
   3198				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3199	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
   3200				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3201	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
   3202				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3203	so_base_ws_lo = lower_32_bits(CFG_BASE +
   3204				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3205	so_base_ws_hi = upper_32_bits(CFG_BASE +
   3206				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3207
   3208	q_off = tpc_offset + qman_id * 4;
   3209
   3210	tpc_id = tpc_offset /
   3211			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
   3212
   3213	if (qman_id < 4) {
   3214		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
   3215					lower_32_bits(qman_base_addr));
   3216		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
   3217					upper_32_bits(qman_base_addr));
   3218
   3219		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
   3220		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
   3221		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
   3222
   3223		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   3224							QMAN_CPDMA_SIZE_OFFSET);
   3225		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   3226							QMAN_CPDMA_SRC_OFFSET);
   3227		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   3228							QMAN_CPDMA_DST_OFFSET);
   3229	} else {
   3230		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   3231				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   3232				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
   3233
   3234		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   3235							QMAN_LDMA_SIZE_OFFSET);
   3236		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   3237							QMAN_LDMA_SRC_OFFSET);
   3238		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   3239							QMAN_LDMA_DST_OFFSET);
   3240
   3241		/* Configure RAZWI IRQ */
   3242		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
   3243		if (hdev->stop_on_err)
   3244			tpc_qm_err_cfg |=
   3245				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
   3246
   3247		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
   3248
   3249		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
   3250			lower_32_bits(CFG_BASE + irq_handler_offset));
   3251		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
   3252			upper_32_bits(CFG_BASE + irq_handler_offset));
   3253
   3254		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
   3255			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
   3256									tpc_id);
   3257
   3258		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
   3259				QM_ARB_ERR_MSG_EN_MASK);
   3260
   3261		/* Set timeout to maximum */
   3262		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
   3263
   3264		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
   3265		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
   3266				QMAN_INTERNAL_MAKE_TRUSTED);
   3267	}
   3268
   3269	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
   3270	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
   3271	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
   3272	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
   3273
   3274	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
   3275	if (tpc_id == 6) {
   3276		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
   3277				mtr_base_ws_lo);
   3278		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
   3279				mtr_base_ws_hi);
   3280		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
   3281				so_base_ws_lo);
   3282		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
   3283				so_base_ws_hi);
   3284	}
   3285}
   3286
   3287static void gaudi_init_tpc_qmans(struct hl_device *hdev)
   3288{
   3289	struct gaudi_device *gaudi = hdev->asic_specific;
   3290	struct gaudi_internal_qman_info *q;
   3291	u64 qman_base_addr;
   3292	u32 so_base_hi, tpc_offset = 0;
   3293	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
   3294			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
   3295	int i, tpc_id, internal_q_index;
   3296
   3297	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
   3298		return;
   3299
   3300	so_base_hi = upper_32_bits(CFG_BASE +
   3301				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3302
   3303	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
   3304		for (i = 0 ; i < QMAN_STREAMS ; i++) {
   3305			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
   3306						tpc_id * QMAN_STREAMS + i;
   3307			q = &gaudi->internal_qmans[internal_q_index];
   3308			qman_base_addr = (u64) q->pq_dma_addr;
   3309			gaudi_init_tpc_qman(hdev, tpc_offset, i,
   3310						qman_base_addr);
   3311
   3312			if (i == 3) {
   3313				/* Initializing lower CP for TPC QMAN */
   3314				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
   3315
   3316				/* Enable the QMAN and TPC channel */
   3317				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
   3318						QMAN_TPC_ENABLE);
   3319			}
   3320		}
   3321
   3322		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
   3323				so_base_hi);
   3324
   3325		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
   3326
   3327		gaudi->hw_cap_initialized |=
   3328				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
   3329	}
   3330}
   3331
   3332static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
   3333				int qman_id, u64 qman_base_addr, int nic_id)
   3334{
   3335	struct cpu_dyn_regs *dyn_regs =
   3336			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   3337	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
   3338	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
   3339	u32 nic_qm_err_cfg, irq_handler_offset;
   3340	u32 q_off;
   3341
   3342	mtr_base_en_lo = lower_32_bits(CFG_BASE +
   3343			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3344	mtr_base_en_hi = upper_32_bits(CFG_BASE +
   3345				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3346	so_base_en_lo = lower_32_bits(CFG_BASE +
   3347				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3348	so_base_en_hi = upper_32_bits(CFG_BASE +
   3349				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3350	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
   3351				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3352	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
   3353				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
   3354	so_base_ws_lo = lower_32_bits(CFG_BASE +
   3355				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3356	so_base_ws_hi = upper_32_bits(CFG_BASE +
   3357				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
   3358
   3359	q_off = nic_offset + qman_id * 4;
   3360
   3361	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
   3362	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
   3363
   3364	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
   3365	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
   3366	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
   3367
   3368	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
   3369							QMAN_LDMA_SIZE_OFFSET);
   3370	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
   3371							QMAN_LDMA_SRC_OFFSET);
   3372	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
   3373							QMAN_LDMA_DST_OFFSET);
   3374
   3375	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
   3376	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
   3377	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
   3378	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
   3379
   3380	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
   3381	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
   3382	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
   3383	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
   3384	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
   3385
   3386	if (qman_id == 0) {
   3387		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   3388				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   3389				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
   3390
   3391		/* Configure RAZWI IRQ */
   3392		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
   3393		if (hdev->stop_on_err)
   3394			nic_qm_err_cfg |=
   3395				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
   3396
   3397		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
   3398
   3399		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
   3400			lower_32_bits(CFG_BASE + irq_handler_offset));
   3401		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
   3402			upper_32_bits(CFG_BASE + irq_handler_offset));
   3403
   3404		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
   3405			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
   3406									nic_id);
   3407
   3408		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
   3409				QM_ARB_ERR_MSG_EN_MASK);
   3410
   3411		/* Set timeout to maximum */
   3412		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
   3413
   3414		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
   3415		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
   3416				QMAN_INTERNAL_MAKE_TRUSTED);
   3417	}
   3418}
   3419
   3420static void gaudi_init_nic_qmans(struct hl_device *hdev)
   3421{
   3422	struct gaudi_device *gaudi = hdev->asic_specific;
   3423	struct gaudi_internal_qman_info *q;
   3424	u64 qman_base_addr;
   3425	u32 nic_offset = 0;
   3426	u32 nic_delta_between_qmans =
   3427			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
   3428	u32 nic_delta_between_nics =
   3429			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
   3430	int i, nic_id, internal_q_index;
   3431
   3432	if (!hdev->nic_ports_mask)
   3433		return;
   3434
   3435	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
   3436		return;
   3437
   3438	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
   3439
   3440	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
   3441		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
   3442			nic_offset += nic_delta_between_qmans;
   3443			if (nic_id & 1) {
   3444				nic_offset -= (nic_delta_between_qmans * 2);
   3445				nic_offset += nic_delta_between_nics;
   3446			}
   3447			continue;
   3448		}
   3449
   3450		for (i = 0 ; i < QMAN_STREAMS ; i++) {
   3451			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
   3452						nic_id * QMAN_STREAMS + i;
   3453			q = &gaudi->internal_qmans[internal_q_index];
   3454			qman_base_addr = (u64) q->pq_dma_addr;
   3455			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
   3456						qman_base_addr, nic_id);
   3457		}
   3458
   3459		/* Enable the QMAN */
   3460		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
   3461
   3462		nic_offset += nic_delta_between_qmans;
   3463		if (nic_id & 1) {
   3464			nic_offset -= (nic_delta_between_qmans * 2);
   3465			nic_offset += nic_delta_between_nics;
   3466		}
   3467
   3468		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
   3469	}
   3470}
   3471
   3472static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
   3473{
   3474	struct gaudi_device *gaudi = hdev->asic_specific;
   3475
   3476	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
   3477		return;
   3478
   3479	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
   3480	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
   3481	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
   3482}
   3483
   3484static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
   3485{
   3486	struct gaudi_device *gaudi = hdev->asic_specific;
   3487
   3488	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
   3489		return;
   3490
   3491	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
   3492	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
   3493	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
   3494	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
   3495	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
   3496}
   3497
   3498static void gaudi_disable_mme_qmans(struct hl_device *hdev)
   3499{
   3500	struct gaudi_device *gaudi = hdev->asic_specific;
   3501
   3502	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
   3503		return;
   3504
   3505	WREG32(mmMME2_QM_GLBL_CFG0, 0);
   3506	WREG32(mmMME0_QM_GLBL_CFG0, 0);
   3507}
   3508
   3509static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
   3510{
   3511	struct gaudi_device *gaudi = hdev->asic_specific;
   3512	u32 tpc_offset = 0;
   3513	int tpc_id;
   3514
   3515	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
   3516		return;
   3517
   3518	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
   3519		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
   3520		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
   3521	}
   3522}
   3523
   3524static void gaudi_disable_nic_qmans(struct hl_device *hdev)
   3525{
   3526	struct gaudi_device *gaudi = hdev->asic_specific;
   3527	u32 nic_mask, nic_offset = 0;
   3528	u32 nic_delta_between_qmans =
   3529			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
   3530	u32 nic_delta_between_nics =
   3531			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
   3532	int nic_id;
   3533
   3534	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
   3535		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
   3536
   3537		if (gaudi->hw_cap_initialized & nic_mask)
   3538			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
   3539
   3540		nic_offset += nic_delta_between_qmans;
   3541		if (nic_id & 1) {
   3542			nic_offset -= (nic_delta_between_qmans * 2);
   3543			nic_offset += nic_delta_between_nics;
   3544		}
   3545	}
   3546}
   3547
   3548static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
   3549{
   3550	struct gaudi_device *gaudi = hdev->asic_specific;
   3551
   3552	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
   3553		return;
   3554
   3555	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
   3556	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3557	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3558	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3559}
   3560
   3561static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
   3562{
   3563	struct gaudi_device *gaudi = hdev->asic_specific;
   3564
   3565	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
   3566		return;
   3567
   3568	/* Stop CPs of HBM DMA QMANs */
   3569
   3570	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3571	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3572	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3573	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3574	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3575}
   3576
   3577static void gaudi_stop_mme_qmans(struct hl_device *hdev)
   3578{
   3579	struct gaudi_device *gaudi = hdev->asic_specific;
   3580
   3581	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
   3582		return;
   3583
   3584	/* Stop CPs of MME QMANs */
   3585	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3586	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3587}
   3588
   3589static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
   3590{
   3591	struct gaudi_device *gaudi = hdev->asic_specific;
   3592
   3593	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
   3594		return;
   3595
   3596	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3597	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3598	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3599	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3600	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3601	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3602	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3603	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   3604}
   3605
   3606static void gaudi_stop_nic_qmans(struct hl_device *hdev)
   3607{
   3608	struct gaudi_device *gaudi = hdev->asic_specific;
   3609
   3610	/* Stop upper CPs of QMANs */
   3611
   3612	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
   3613		WREG32(mmNIC0_QM0_GLBL_CFG1,
   3614				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3615				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3616				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3617
   3618	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
   3619		WREG32(mmNIC0_QM1_GLBL_CFG1,
   3620				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3621				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3622				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3623
   3624	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
   3625		WREG32(mmNIC1_QM0_GLBL_CFG1,
   3626				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3627				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3628				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3629
   3630	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
   3631		WREG32(mmNIC1_QM1_GLBL_CFG1,
   3632				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3633				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3634				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3635
   3636	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
   3637		WREG32(mmNIC2_QM0_GLBL_CFG1,
   3638				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3639				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3640				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3641
   3642	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
   3643		WREG32(mmNIC2_QM1_GLBL_CFG1,
   3644				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3645				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3646				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3647
   3648	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
   3649		WREG32(mmNIC3_QM0_GLBL_CFG1,
   3650				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3651				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3652				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3653
   3654	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
   3655		WREG32(mmNIC3_QM1_GLBL_CFG1,
   3656				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3657				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3658				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3659
   3660	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
   3661		WREG32(mmNIC4_QM0_GLBL_CFG1,
   3662				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3663				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3664				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3665
   3666	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
   3667		WREG32(mmNIC4_QM1_GLBL_CFG1,
   3668				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
   3669				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
   3670				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
   3671}
   3672
   3673static void gaudi_pci_dma_stall(struct hl_device *hdev)
   3674{
   3675	struct gaudi_device *gaudi = hdev->asic_specific;
   3676
   3677	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
   3678		return;
   3679
   3680	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3681	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3682	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3683}
   3684
   3685static void gaudi_hbm_dma_stall(struct hl_device *hdev)
   3686{
   3687	struct gaudi_device *gaudi = hdev->asic_specific;
   3688
   3689	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
   3690		return;
   3691
   3692	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3693	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3694	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3695	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3696	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
   3697}
   3698
   3699static void gaudi_mme_stall(struct hl_device *hdev)
   3700{
   3701	struct gaudi_device *gaudi = hdev->asic_specific;
   3702
   3703	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
   3704		return;
   3705
   3706	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
   3707	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3708	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3709	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3710	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3711	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3712	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3713	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3714	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3715	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3716	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3717	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3718	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3719	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3720	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
   3721	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3722	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
   3723}
   3724
   3725static void gaudi_tpc_stall(struct hl_device *hdev)
   3726{
   3727	struct gaudi_device *gaudi = hdev->asic_specific;
   3728
   3729	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
   3730		return;
   3731
   3732	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3733	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3734	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3735	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3736	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3737	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3738	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3739	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   3740}
   3741
   3742static void gaudi_disable_clock_gating(struct hl_device *hdev)
   3743{
   3744	u32 qman_offset;
   3745	int i;
   3746
   3747	if (hdev->asic_prop.fw_security_enabled)
   3748		return;
   3749
   3750	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
   3751		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
   3752		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
   3753
   3754		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
   3755	}
   3756
   3757	WREG32(mmMME0_QM_CGM_CFG, 0);
   3758	WREG32(mmMME0_QM_CGM_CFG1, 0);
   3759	WREG32(mmMME2_QM_CGM_CFG, 0);
   3760	WREG32(mmMME2_QM_CGM_CFG1, 0);
   3761
   3762	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
   3763		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
   3764		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
   3765
   3766		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
   3767	}
   3768}
   3769
   3770static void gaudi_enable_timestamp(struct hl_device *hdev)
   3771{
   3772	/* Disable the timestamp counter */
   3773	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
   3774
   3775	/* Zero the lower/upper parts of the 64-bit counter */
   3776	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
   3777	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
   3778
   3779	/* Enable the counter */
   3780	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
   3781}
   3782
   3783static void gaudi_disable_timestamp(struct hl_device *hdev)
   3784{
   3785	/* Disable the timestamp counter */
   3786	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
   3787}
   3788
   3789static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
   3790{
   3791	u32 wait_timeout_ms;
   3792
   3793	if (hdev->pldm)
   3794		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
   3795	else
   3796		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
   3797
   3798	if (fw_reset)
   3799		goto skip_engines;
   3800
   3801	gaudi_stop_nic_qmans(hdev);
   3802	gaudi_stop_mme_qmans(hdev);
   3803	gaudi_stop_tpc_qmans(hdev);
   3804	gaudi_stop_hbm_dma_qmans(hdev);
   3805	gaudi_stop_pci_dma_qmans(hdev);
   3806
   3807	msleep(wait_timeout_ms);
   3808
   3809	gaudi_pci_dma_stall(hdev);
   3810	gaudi_hbm_dma_stall(hdev);
   3811	gaudi_tpc_stall(hdev);
   3812	gaudi_mme_stall(hdev);
   3813
   3814	msleep(wait_timeout_ms);
   3815
   3816	gaudi_disable_nic_qmans(hdev);
   3817	gaudi_disable_mme_qmans(hdev);
   3818	gaudi_disable_tpc_qmans(hdev);
   3819	gaudi_disable_hbm_dma_qmans(hdev);
   3820	gaudi_disable_pci_dma_qmans(hdev);
   3821
   3822	gaudi_disable_timestamp(hdev);
   3823
   3824skip_engines:
   3825	gaudi_disable_msi(hdev);
   3826}
   3827
   3828static int gaudi_mmu_init(struct hl_device *hdev)
   3829{
   3830	struct asic_fixed_properties *prop = &hdev->asic_prop;
   3831	struct gaudi_device *gaudi = hdev->asic_specific;
   3832	u64 hop0_addr;
   3833	int rc, i;
   3834
   3835	if (!hdev->mmu_enable)
   3836		return 0;
   3837
   3838	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
   3839		return 0;
   3840
   3841	for (i = 0 ; i < prop->max_asid ; i++) {
   3842		hop0_addr = prop->mmu_pgt_addr +
   3843				(i * prop->mmu_hop_table_size);
   3844
   3845		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
   3846		if (rc) {
   3847			dev_err(hdev->dev,
   3848				"failed to set hop0 addr for asid %d\n", i);
   3849			goto err;
   3850		}
   3851	}
   3852
   3853	/* init MMU cache manage page */
   3854	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
   3855	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
   3856
   3857	/* mem cache invalidation */
   3858	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
   3859
   3860	hl_mmu_invalidate_cache(hdev, true, 0);
   3861
   3862	WREG32(mmMMU_UP_MMU_ENABLE, 1);
   3863	WREG32(mmMMU_UP_SPI_MASK, 0xF);
   3864
   3865	WREG32(mmSTLB_HOP_CONFIGURATION,
   3866			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
   3867
   3868	/*
   3869	 * The H/W expects the first PI after init to be 1. After wraparound
   3870	 * we'll write 0.
   3871	 */
   3872	gaudi->mmu_cache_inv_pi = 1;
   3873
   3874	gaudi->hw_cap_initialized |= HW_CAP_MMU;
   3875
   3876	return 0;
   3877
   3878err:
   3879	return rc;
   3880}
   3881
   3882static int gaudi_load_firmware_to_device(struct hl_device *hdev)
   3883{
   3884	void __iomem *dst;
   3885
   3886	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
   3887
   3888	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
   3889}
   3890
   3891static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
   3892{
   3893	void __iomem *dst;
   3894
   3895	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
   3896
   3897	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
   3898}
   3899
   3900static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
   3901{
   3902	struct dynamic_fw_load_mgr *dynamic_loader;
   3903	struct cpu_dyn_regs *dyn_regs;
   3904
   3905	dynamic_loader = &hdev->fw_loader.dynamic_loader;
   3906
   3907	/*
   3908	 * here we update initial values for few specific dynamic regs (as
   3909	 * before reading the first descriptor from FW those value has to be
   3910	 * hard-coded) in later stages of the protocol those values will be
   3911	 * updated automatically by reading the FW descriptor so data there
   3912	 * will always be up-to-date
   3913	 */
   3914	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
   3915	dyn_regs->kmd_msg_to_cpu =
   3916				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
   3917	dyn_regs->cpu_cmd_status_to_host =
   3918				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
   3919
   3920	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
   3921}
   3922
   3923static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
   3924{
   3925	struct static_fw_load_mgr *static_loader;
   3926
   3927	static_loader = &hdev->fw_loader.static_loader;
   3928
   3929	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
   3930	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
   3931	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
   3932	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
   3933	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
   3934	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
   3935	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
   3936	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
   3937	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
   3938	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
   3939	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
   3940	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
   3941	static_loader->cpu_reset_wait_msec = hdev->pldm ?
   3942			GAUDI_PLDM_RESET_WAIT_MSEC :
   3943			GAUDI_CPU_RESET_WAIT_MSEC;
   3944}
   3945
   3946static void gaudi_init_firmware_loader(struct hl_device *hdev)
   3947{
   3948	struct asic_fixed_properties *prop = &hdev->asic_prop;
   3949	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
   3950
   3951	/* fill common fields */
   3952	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
   3953	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
   3954	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
   3955	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
   3956	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
   3957	fw_loader->skip_bmc = !hdev->bmc_enable;
   3958	fw_loader->sram_bar_id = SRAM_BAR_ID;
   3959	fw_loader->dram_bar_id = HBM_BAR_ID;
   3960
   3961	if (prop->dynamic_fw_load)
   3962		gaudi_init_dynamic_firmware_loader(hdev);
   3963	else
   3964		gaudi_init_static_firmware_loader(hdev);
   3965}
   3966
   3967static int gaudi_init_cpu(struct hl_device *hdev)
   3968{
   3969	struct gaudi_device *gaudi = hdev->asic_specific;
   3970	int rc;
   3971
   3972	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
   3973		return 0;
   3974
   3975	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
   3976		return 0;
   3977
   3978	/*
   3979	 * The device CPU works with 40 bits addresses.
   3980	 * This register sets the extension to 50 bits.
   3981	 */
   3982	if (!hdev->asic_prop.fw_security_enabled)
   3983		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
   3984
   3985	rc = hl_fw_init_cpu(hdev);
   3986
   3987	if (rc)
   3988		return rc;
   3989
   3990	gaudi->hw_cap_initialized |= HW_CAP_CPU;
   3991
   3992	return 0;
   3993}
   3994
   3995static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
   3996{
   3997	struct cpu_dyn_regs *dyn_regs =
   3998			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   3999	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4000	struct gaudi_device *gaudi = hdev->asic_specific;
   4001	u32 status, irq_handler_offset;
   4002	struct hl_eq *eq;
   4003	struct hl_hw_queue *cpu_pq =
   4004			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
   4005	int err;
   4006
   4007	if (!hdev->cpu_queues_enable)
   4008		return 0;
   4009
   4010	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
   4011		return 0;
   4012
   4013	eq = &hdev->event_queue;
   4014
   4015	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
   4016	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
   4017
   4018	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
   4019	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
   4020
   4021	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
   4022			lower_32_bits(hdev->cpu_accessible_dma_address));
   4023	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
   4024			upper_32_bits(hdev->cpu_accessible_dma_address));
   4025
   4026	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
   4027	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
   4028	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
   4029
   4030	/* Used for EQ CI */
   4031	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
   4032
   4033	WREG32(mmCPU_IF_PF_PQ_PI, 0);
   4034
   4035	if (gaudi->multi_msi_mode)
   4036		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
   4037	else
   4038		WREG32(mmCPU_IF_QUEUE_INIT,
   4039			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
   4040
   4041	irq_handler_offset = prop->gic_interrupts_enable ?
   4042			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   4043			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
   4044
   4045	WREG32(irq_handler_offset,
   4046		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
   4047
   4048	err = hl_poll_timeout(
   4049		hdev,
   4050		mmCPU_IF_QUEUE_INIT,
   4051		status,
   4052		(status == PQ_INIT_STATUS_READY_FOR_HOST),
   4053		1000,
   4054		cpu_timeout);
   4055
   4056	if (err) {
   4057		dev_err(hdev->dev,
   4058			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
   4059		return -EIO;
   4060	}
   4061
   4062	/* update FW application security bits */
   4063	if (prop->fw_cpu_boot_dev_sts0_valid)
   4064		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
   4065	if (prop->fw_cpu_boot_dev_sts1_valid)
   4066		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
   4067
   4068	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
   4069	return 0;
   4070}
   4071
   4072static void gaudi_pre_hw_init(struct hl_device *hdev)
   4073{
   4074	/* Perform read from the device to make sure device is up */
   4075	RREG32(mmHW_STATE);
   4076
   4077	if (!hdev->asic_prop.fw_security_enabled) {
   4078		/* Set the access through PCI bars (Linux driver only) as
   4079		 * secured
   4080		 */
   4081		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
   4082				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
   4083				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
   4084
   4085		/* Perform read to flush the waiting writes to ensure
   4086		 * configuration was set in the device
   4087		 */
   4088		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
   4089	}
   4090
   4091	/*
   4092	 * Let's mark in the H/W that we have reached this point. We check
   4093	 * this value in the reset_before_init function to understand whether
   4094	 * we need to reset the chip before doing H/W init. This register is
   4095	 * cleared by the H/W upon H/W reset
   4096	 */
   4097	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
   4098}
   4099
   4100static int gaudi_hw_init(struct hl_device *hdev)
   4101{
   4102	struct gaudi_device *gaudi = hdev->asic_specific;
   4103	int rc;
   4104
   4105	gaudi_pre_hw_init(hdev);
   4106
   4107	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
   4108	 * So we set it here and if anyone tries to move it later to
   4109	 * a different address, there will be an error
   4110	 */
   4111	if (hdev->asic_prop.iatu_done_by_fw)
   4112		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
   4113
   4114	/*
   4115	 * Before pushing u-boot/linux to device, need to set the hbm bar to
   4116	 * base address of dram
   4117	 */
   4118	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
   4119		dev_err(hdev->dev,
   4120			"failed to map HBM bar to DRAM base address\n");
   4121		return -EIO;
   4122	}
   4123
   4124	rc = gaudi_init_cpu(hdev);
   4125	if (rc) {
   4126		dev_err(hdev->dev, "failed to initialize CPU\n");
   4127		return rc;
   4128	}
   4129
   4130	/* In case the clock gating was enabled in preboot we need to disable
   4131	 * it here before touching the MME/TPC registers.
   4132	 */
   4133	gaudi_disable_clock_gating(hdev);
   4134
   4135	/* SRAM scrambler must be initialized after CPU is running from HBM */
   4136	gaudi_init_scrambler_sram(hdev);
   4137
   4138	/* This is here just in case we are working without CPU */
   4139	gaudi_init_scrambler_hbm(hdev);
   4140
   4141	gaudi_init_golden_registers(hdev);
   4142
   4143	rc = gaudi_mmu_init(hdev);
   4144	if (rc)
   4145		return rc;
   4146
   4147	gaudi_init_security(hdev);
   4148
   4149	gaudi_init_pci_dma_qmans(hdev);
   4150
   4151	gaudi_init_hbm_dma_qmans(hdev);
   4152
   4153	gaudi_init_mme_qmans(hdev);
   4154
   4155	gaudi_init_tpc_qmans(hdev);
   4156
   4157	gaudi_init_nic_qmans(hdev);
   4158
   4159	gaudi_enable_timestamp(hdev);
   4160
   4161	/* MSI must be enabled before CPU queues and NIC are initialized */
   4162	rc = gaudi_enable_msi(hdev);
   4163	if (rc)
   4164		goto disable_queues;
   4165
   4166	/* must be called after MSI was enabled */
   4167	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
   4168	if (rc) {
   4169		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
   4170			rc);
   4171		goto disable_msi;
   4172	}
   4173
   4174	/* Perform read from the device to flush all configuration */
   4175	RREG32(mmHW_STATE);
   4176
   4177	return 0;
   4178
   4179disable_msi:
   4180	gaudi_disable_msi(hdev);
   4181disable_queues:
   4182	gaudi_disable_mme_qmans(hdev);
   4183	gaudi_disable_pci_dma_qmans(hdev);
   4184
   4185	return rc;
   4186}
   4187
   4188static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
   4189{
   4190	struct cpu_dyn_regs *dyn_regs =
   4191			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   4192	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
   4193	struct gaudi_device *gaudi = hdev->asic_specific;
   4194	bool driver_performs_reset;
   4195
   4196	if (!hard_reset) {
   4197		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
   4198		return;
   4199	}
   4200
   4201	if (hdev->pldm) {
   4202		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
   4203		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
   4204	} else {
   4205		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
   4206		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
   4207	}
   4208
   4209	if (fw_reset) {
   4210		dev_dbg(hdev->dev,
   4211			"Firmware performs HARD reset, going to wait %dms\n",
   4212			reset_timeout_ms);
   4213
   4214		goto skip_reset;
   4215	}
   4216
   4217	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
   4218					!hdev->asic_prop.hard_reset_done_by_fw);
   4219
   4220	/* Set device to handle FLR by H/W as we will put the device CPU to
   4221	 * halt mode
   4222	 */
   4223	if (driver_performs_reset)
   4224		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
   4225					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
   4226
   4227	/* If linux is loaded in the device CPU we need to communicate with it
   4228	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
   4229	 * registers in case of old F/Ws
   4230	 */
   4231	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
   4232		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   4233				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   4234				le32_to_cpu(dyn_regs->gic_host_halt_irq);
   4235
   4236		WREG32(irq_handler_offset,
   4237			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
   4238
   4239		/* This is a hail-mary attempt to revive the card in the small chance that the
   4240		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
   4241		 * In that case, triggering reset through GIC won't help. We need to trigger the
   4242		 * reset as if Linux wasn't loaded.
   4243		 *
   4244		 * We do it only if the reset cause was HB, because that would be the indication
   4245		 * of such an event.
   4246		 *
   4247		 * In case watchdog hasn't expired but we still got HB, then this won't do any
   4248		 * damage.
   4249		 */
   4250		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
   4251			if (hdev->asic_prop.hard_reset_done_by_fw)
   4252				hl_fw_ask_hard_reset_without_linux(hdev);
   4253			else
   4254				hl_fw_ask_halt_machine_without_linux(hdev);
   4255		}
   4256	} else {
   4257		if (hdev->asic_prop.hard_reset_done_by_fw)
   4258			hl_fw_ask_hard_reset_without_linux(hdev);
   4259		else
   4260			hl_fw_ask_halt_machine_without_linux(hdev);
   4261	}
   4262
   4263	if (driver_performs_reset) {
   4264
   4265		/* Configure the reset registers. Must be done as early as
   4266		 * possible in case we fail during H/W initialization
   4267		 */
   4268		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
   4269						(CFG_RST_H_DMA_MASK |
   4270						CFG_RST_H_MME_MASK |
   4271						CFG_RST_H_SM_MASK |
   4272						CFG_RST_H_TPC_7_MASK));
   4273
   4274		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
   4275
   4276		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
   4277						(CFG_RST_H_HBM_MASK |
   4278						CFG_RST_H_TPC_7_MASK |
   4279						CFG_RST_H_NIC_MASK |
   4280						CFG_RST_H_SM_MASK |
   4281						CFG_RST_H_DMA_MASK |
   4282						CFG_RST_H_MME_MASK |
   4283						CFG_RST_H_CPU_MASK |
   4284						CFG_RST_H_MMU_MASK));
   4285
   4286		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
   4287						(CFG_RST_L_IF_MASK |
   4288						CFG_RST_L_PSOC_MASK |
   4289						CFG_RST_L_TPC_MASK));
   4290
   4291		msleep(cpu_timeout_ms);
   4292
   4293		/* Tell ASIC not to re-initialize PCIe */
   4294		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
   4295
   4296		/* Restart BTL/BLR upon hard-reset */
   4297		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
   4298
   4299		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
   4300			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
   4301
   4302		dev_dbg(hdev->dev,
   4303			"Issued HARD reset command, going to wait %dms\n",
   4304			reset_timeout_ms);
   4305	} else {
   4306		dev_dbg(hdev->dev,
   4307			"Firmware performs HARD reset, going to wait %dms\n",
   4308			reset_timeout_ms);
   4309	}
   4310
   4311skip_reset:
   4312	/*
   4313	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
   4314	 * itself is in reset. Need to wait until the reset is deasserted
   4315	 */
   4316	msleep(reset_timeout_ms);
   4317
   4318	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
   4319	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
   4320		dev_err(hdev->dev,
   4321			"Timeout while waiting for device to reset 0x%x\n",
   4322			status);
   4323
   4324	if (gaudi) {
   4325		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
   4326						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
   4327						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
   4328						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
   4329						HW_CAP_HBM_SCRAMBLER);
   4330
   4331		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
   4332
   4333		hdev->device_cpu_is_halted = false;
   4334	}
   4335}
   4336
   4337static int gaudi_suspend(struct hl_device *hdev)
   4338{
   4339	int rc;
   4340
   4341	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
   4342	if (rc)
   4343		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
   4344
   4345	return rc;
   4346}
   4347
   4348static int gaudi_resume(struct hl_device *hdev)
   4349{
   4350	return gaudi_init_iatu(hdev);
   4351}
   4352
   4353static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
   4354			void *cpu_addr, dma_addr_t dma_addr, size_t size)
   4355{
   4356	int rc;
   4357
   4358	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
   4359			VM_DONTCOPY | VM_NORESERVE;
   4360
   4361	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
   4362				(dma_addr - HOST_PHYS_BASE), size);
   4363	if (rc)
   4364		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
   4365
   4366	return rc;
   4367}
   4368
   4369static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
   4370{
   4371	struct cpu_dyn_regs *dyn_regs =
   4372			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   4373	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
   4374	struct gaudi_device *gaudi = hdev->asic_specific;
   4375	bool invalid_queue = false;
   4376	int dma_id;
   4377
   4378	switch (hw_queue_id) {
   4379	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
   4380		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
   4381		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4382		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
   4383		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4384		break;
   4385
   4386	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
   4387		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
   4388		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4389		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
   4390		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4391		break;
   4392
   4393	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
   4394		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
   4395		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4396		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
   4397		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4398		break;
   4399
   4400	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
   4401		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
   4402		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4403		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
   4404		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4405		break;
   4406
   4407	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
   4408		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
   4409		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4410		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
   4411		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4412		break;
   4413
   4414	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
   4415		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
   4416		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4417		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
   4418		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4419		break;
   4420
   4421	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
   4422		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
   4423		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4424		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
   4425		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4426		break;
   4427
   4428	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
   4429		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
   4430		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
   4431		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
   4432		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
   4433		break;
   4434
   4435	case GAUDI_QUEUE_ID_CPU_PQ:
   4436		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
   4437			db_reg_offset = mmCPU_IF_PF_PQ_PI;
   4438		else
   4439			invalid_queue = true;
   4440		break;
   4441
   4442	case GAUDI_QUEUE_ID_MME_0_0:
   4443		db_reg_offset = mmMME2_QM_PQ_PI_0;
   4444		break;
   4445
   4446	case GAUDI_QUEUE_ID_MME_0_1:
   4447		db_reg_offset = mmMME2_QM_PQ_PI_1;
   4448		break;
   4449
   4450	case GAUDI_QUEUE_ID_MME_0_2:
   4451		db_reg_offset = mmMME2_QM_PQ_PI_2;
   4452		break;
   4453
   4454	case GAUDI_QUEUE_ID_MME_0_3:
   4455		db_reg_offset = mmMME2_QM_PQ_PI_3;
   4456		break;
   4457
   4458	case GAUDI_QUEUE_ID_MME_1_0:
   4459		db_reg_offset = mmMME0_QM_PQ_PI_0;
   4460		break;
   4461
   4462	case GAUDI_QUEUE_ID_MME_1_1:
   4463		db_reg_offset = mmMME0_QM_PQ_PI_1;
   4464		break;
   4465
   4466	case GAUDI_QUEUE_ID_MME_1_2:
   4467		db_reg_offset = mmMME0_QM_PQ_PI_2;
   4468		break;
   4469
   4470	case GAUDI_QUEUE_ID_MME_1_3:
   4471		db_reg_offset = mmMME0_QM_PQ_PI_3;
   4472		break;
   4473
   4474	case GAUDI_QUEUE_ID_TPC_0_0:
   4475		db_reg_offset = mmTPC0_QM_PQ_PI_0;
   4476		break;
   4477
   4478	case GAUDI_QUEUE_ID_TPC_0_1:
   4479		db_reg_offset = mmTPC0_QM_PQ_PI_1;
   4480		break;
   4481
   4482	case GAUDI_QUEUE_ID_TPC_0_2:
   4483		db_reg_offset = mmTPC0_QM_PQ_PI_2;
   4484		break;
   4485
   4486	case GAUDI_QUEUE_ID_TPC_0_3:
   4487		db_reg_offset = mmTPC0_QM_PQ_PI_3;
   4488		break;
   4489
   4490	case GAUDI_QUEUE_ID_TPC_1_0:
   4491		db_reg_offset = mmTPC1_QM_PQ_PI_0;
   4492		break;
   4493
   4494	case GAUDI_QUEUE_ID_TPC_1_1:
   4495		db_reg_offset = mmTPC1_QM_PQ_PI_1;
   4496		break;
   4497
   4498	case GAUDI_QUEUE_ID_TPC_1_2:
   4499		db_reg_offset = mmTPC1_QM_PQ_PI_2;
   4500		break;
   4501
   4502	case GAUDI_QUEUE_ID_TPC_1_3:
   4503		db_reg_offset = mmTPC1_QM_PQ_PI_3;
   4504		break;
   4505
   4506	case GAUDI_QUEUE_ID_TPC_2_0:
   4507		db_reg_offset = mmTPC2_QM_PQ_PI_0;
   4508		break;
   4509
   4510	case GAUDI_QUEUE_ID_TPC_2_1:
   4511		db_reg_offset = mmTPC2_QM_PQ_PI_1;
   4512		break;
   4513
   4514	case GAUDI_QUEUE_ID_TPC_2_2:
   4515		db_reg_offset = mmTPC2_QM_PQ_PI_2;
   4516		break;
   4517
   4518	case GAUDI_QUEUE_ID_TPC_2_3:
   4519		db_reg_offset = mmTPC2_QM_PQ_PI_3;
   4520		break;
   4521
   4522	case GAUDI_QUEUE_ID_TPC_3_0:
   4523		db_reg_offset = mmTPC3_QM_PQ_PI_0;
   4524		break;
   4525
   4526	case GAUDI_QUEUE_ID_TPC_3_1:
   4527		db_reg_offset = mmTPC3_QM_PQ_PI_1;
   4528		break;
   4529
   4530	case GAUDI_QUEUE_ID_TPC_3_2:
   4531		db_reg_offset = mmTPC3_QM_PQ_PI_2;
   4532		break;
   4533
   4534	case GAUDI_QUEUE_ID_TPC_3_3:
   4535		db_reg_offset = mmTPC3_QM_PQ_PI_3;
   4536		break;
   4537
   4538	case GAUDI_QUEUE_ID_TPC_4_0:
   4539		db_reg_offset = mmTPC4_QM_PQ_PI_0;
   4540		break;
   4541
   4542	case GAUDI_QUEUE_ID_TPC_4_1:
   4543		db_reg_offset = mmTPC4_QM_PQ_PI_1;
   4544		break;
   4545
   4546	case GAUDI_QUEUE_ID_TPC_4_2:
   4547		db_reg_offset = mmTPC4_QM_PQ_PI_2;
   4548		break;
   4549
   4550	case GAUDI_QUEUE_ID_TPC_4_3:
   4551		db_reg_offset = mmTPC4_QM_PQ_PI_3;
   4552		break;
   4553
   4554	case GAUDI_QUEUE_ID_TPC_5_0:
   4555		db_reg_offset = mmTPC5_QM_PQ_PI_0;
   4556		break;
   4557
   4558	case GAUDI_QUEUE_ID_TPC_5_1:
   4559		db_reg_offset = mmTPC5_QM_PQ_PI_1;
   4560		break;
   4561
   4562	case GAUDI_QUEUE_ID_TPC_5_2:
   4563		db_reg_offset = mmTPC5_QM_PQ_PI_2;
   4564		break;
   4565
   4566	case GAUDI_QUEUE_ID_TPC_5_3:
   4567		db_reg_offset = mmTPC5_QM_PQ_PI_3;
   4568		break;
   4569
   4570	case GAUDI_QUEUE_ID_TPC_6_0:
   4571		db_reg_offset = mmTPC6_QM_PQ_PI_0;
   4572		break;
   4573
   4574	case GAUDI_QUEUE_ID_TPC_6_1:
   4575		db_reg_offset = mmTPC6_QM_PQ_PI_1;
   4576		break;
   4577
   4578	case GAUDI_QUEUE_ID_TPC_6_2:
   4579		db_reg_offset = mmTPC6_QM_PQ_PI_2;
   4580		break;
   4581
   4582	case GAUDI_QUEUE_ID_TPC_6_3:
   4583		db_reg_offset = mmTPC6_QM_PQ_PI_3;
   4584		break;
   4585
   4586	case GAUDI_QUEUE_ID_TPC_7_0:
   4587		db_reg_offset = mmTPC7_QM_PQ_PI_0;
   4588		break;
   4589
   4590	case GAUDI_QUEUE_ID_TPC_7_1:
   4591		db_reg_offset = mmTPC7_QM_PQ_PI_1;
   4592		break;
   4593
   4594	case GAUDI_QUEUE_ID_TPC_7_2:
   4595		db_reg_offset = mmTPC7_QM_PQ_PI_2;
   4596		break;
   4597
   4598	case GAUDI_QUEUE_ID_TPC_7_3:
   4599		db_reg_offset = mmTPC7_QM_PQ_PI_3;
   4600		break;
   4601
   4602	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
   4603		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
   4604			invalid_queue = true;
   4605
   4606		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4607		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
   4608		break;
   4609
   4610	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
   4611		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
   4612			invalid_queue = true;
   4613
   4614		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4615		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
   4616		break;
   4617
   4618	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
   4619		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
   4620			invalid_queue = true;
   4621
   4622		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4623		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
   4624		break;
   4625
   4626	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
   4627		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
   4628			invalid_queue = true;
   4629
   4630		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4631		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
   4632		break;
   4633
   4634	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
   4635		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
   4636			invalid_queue = true;
   4637
   4638		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4639		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
   4640		break;
   4641
   4642	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
   4643		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
   4644			invalid_queue = true;
   4645
   4646		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4647		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
   4648		break;
   4649
   4650	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
   4651		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
   4652			invalid_queue = true;
   4653
   4654		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4655		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
   4656		break;
   4657
   4658	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
   4659		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
   4660			invalid_queue = true;
   4661
   4662		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4663		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
   4664		break;
   4665
   4666	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
   4667		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
   4668			invalid_queue = true;
   4669
   4670		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4671		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
   4672		break;
   4673
   4674	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
   4675		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
   4676			invalid_queue = true;
   4677
   4678		q_off = ((hw_queue_id - 1) & 0x3) * 4;
   4679		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
   4680		break;
   4681
   4682	default:
   4683		invalid_queue = true;
   4684	}
   4685
   4686	if (invalid_queue) {
   4687		/* Should never get here */
   4688		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
   4689			hw_queue_id);
   4690		return;
   4691	}
   4692
   4693	db_value = pi;
   4694
   4695	/* ring the doorbell */
   4696	WREG32(db_reg_offset, db_value);
   4697
   4698	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
   4699		/* make sure device CPU will read latest data from host */
   4700		mb();
   4701
   4702		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   4703				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   4704				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
   4705
   4706		WREG32(irq_handler_offset,
   4707			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
   4708	}
   4709}
   4710
   4711static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
   4712				struct hl_bd *bd)
   4713{
   4714	__le64 *pbd = (__le64 *) bd;
   4715
   4716	/* The QMANs are on the host memory so a simple copy suffice */
   4717	pqe[0] = pbd[0];
   4718	pqe[1] = pbd[1];
   4719}
   4720
   4721static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
   4722					dma_addr_t *dma_handle, gfp_t flags)
   4723{
   4724	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
   4725						dma_handle, flags);
   4726
   4727	/* Shift to the device's base physical address of host memory */
   4728	if (kernel_addr)
   4729		*dma_handle += HOST_PHYS_BASE;
   4730
   4731	return kernel_addr;
   4732}
   4733
   4734static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
   4735		void *cpu_addr, dma_addr_t dma_handle)
   4736{
   4737	/* Cancel the device's base physical address of host memory */
   4738	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
   4739
   4740	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
   4741}
   4742
   4743static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
   4744{
   4745	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4746	u64  cur_addr = DRAM_BASE_ADDR_USER;
   4747	u32 chunk_size, busy;
   4748	int rc, dma_id;
   4749
   4750	while (cur_addr < prop->dram_end_address) {
   4751		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
   4752			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
   4753
   4754			chunk_size =
   4755			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
   4756
   4757			dev_dbg(hdev->dev,
   4758				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
   4759				cur_addr, cur_addr + chunk_size);
   4760
   4761			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
   4762					lower_32_bits(val));
   4763			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
   4764					upper_32_bits(val));
   4765			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
   4766						lower_32_bits(cur_addr));
   4767			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
   4768						upper_32_bits(cur_addr));
   4769			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
   4770					chunk_size);
   4771			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
   4772					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
   4773					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
   4774
   4775			cur_addr += chunk_size;
   4776
   4777			if (cur_addr == prop->dram_end_address)
   4778				break;
   4779		}
   4780
   4781		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
   4782			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
   4783
   4784			rc = hl_poll_timeout(
   4785				hdev,
   4786				mmDMA0_CORE_STS0 + dma_offset,
   4787				busy,
   4788				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
   4789				1000,
   4790				HBM_SCRUBBING_TIMEOUT_US);
   4791
   4792			if (rc) {
   4793				dev_err(hdev->dev,
   4794					"DMA Timeout during HBM scrubbing of DMA #%d\n",
   4795					dma_id);
   4796				return -EIO;
   4797			}
   4798		}
   4799	}
   4800
   4801	return 0;
   4802}
   4803
   4804static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
   4805{
   4806	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4807	int rc = 0;
   4808	u64 val = 0;
   4809
   4810	if (!hdev->memory_scrub)
   4811		return 0;
   4812
   4813	if (!addr && !size) {
   4814		/* Wait till device is idle */
   4815		rc = hl_poll_timeout(
   4816				hdev,
   4817				mmDMA0_CORE_STS0/* dummy */,
   4818				val/* dummy */,
   4819				(hdev->asic_funcs->is_device_idle(hdev, NULL,
   4820						0, NULL)),
   4821						1000,
   4822						HBM_SCRUBBING_TIMEOUT_US);
   4823		if (rc) {
   4824			dev_err(hdev->dev, "waiting for idle timeout\n");
   4825			return -EIO;
   4826		}
   4827
   4828		/* Scrub SRAM */
   4829		addr = prop->sram_user_base_address;
   4830		size = hdev->pldm ? 0x10000 :
   4831				(prop->sram_size - SRAM_USER_BASE_OFFSET);
   4832		val = 0x7777777777777777ull;
   4833
   4834		rc = gaudi_memset_device_memory(hdev, addr, size, val);
   4835		if (rc) {
   4836			dev_err(hdev->dev,
   4837				"Failed to clear SRAM in mem scrub all\n");
   4838			return rc;
   4839		}
   4840
   4841		/* Scrub HBM using all DMA channels in parallel */
   4842		rc = gaudi_scrub_device_dram(hdev, 0xdeadbeaf);
   4843		if (rc)
   4844			dev_err(hdev->dev,
   4845				"Failed to clear HBM in mem scrub all\n");
   4846	}
   4847
   4848	return rc;
   4849}
   4850
   4851static void *gaudi_get_int_queue_base(struct hl_device *hdev,
   4852				u32 queue_id, dma_addr_t *dma_handle,
   4853				u16 *queue_len)
   4854{
   4855	struct gaudi_device *gaudi = hdev->asic_specific;
   4856	struct gaudi_internal_qman_info *q;
   4857
   4858	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
   4859			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
   4860		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
   4861		return NULL;
   4862	}
   4863
   4864	q = &gaudi->internal_qmans[queue_id];
   4865	*dma_handle = q->pq_dma_addr;
   4866	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
   4867
   4868	return q->pq_kernel_addr;
   4869}
   4870
   4871static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
   4872				u16 len, u32 timeout, u64 *result)
   4873{
   4874	struct gaudi_device *gaudi = hdev->asic_specific;
   4875
   4876	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
   4877		if (result)
   4878			*result = 0;
   4879		return 0;
   4880	}
   4881
   4882	if (!timeout)
   4883		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
   4884
   4885	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
   4886						timeout, result);
   4887}
   4888
   4889static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
   4890{
   4891	struct packet_msg_prot *fence_pkt;
   4892	dma_addr_t pkt_dma_addr;
   4893	u32 fence_val, tmp, timeout_usec;
   4894	dma_addr_t fence_dma_addr;
   4895	u32 *fence_ptr;
   4896	int rc;
   4897
   4898	if (hdev->pldm)
   4899		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
   4900	else
   4901		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
   4902
   4903	fence_val = GAUDI_QMAN0_FENCE_VAL;
   4904
   4905	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
   4906							&fence_dma_addr);
   4907	if (!fence_ptr) {
   4908		dev_err(hdev->dev,
   4909			"Failed to allocate memory for H/W queue %d testing\n",
   4910			hw_queue_id);
   4911		return -ENOMEM;
   4912	}
   4913
   4914	*fence_ptr = 0;
   4915
   4916	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
   4917					sizeof(struct packet_msg_prot),
   4918					GFP_KERNEL, &pkt_dma_addr);
   4919	if (!fence_pkt) {
   4920		dev_err(hdev->dev,
   4921			"Failed to allocate packet for H/W queue %d testing\n",
   4922			hw_queue_id);
   4923		rc = -ENOMEM;
   4924		goto free_fence_ptr;
   4925	}
   4926
   4927	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
   4928	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
   4929	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   4930
   4931	fence_pkt->ctl = cpu_to_le32(tmp);
   4932	fence_pkt->value = cpu_to_le32(fence_val);
   4933	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
   4934
   4935	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
   4936					sizeof(struct packet_msg_prot),
   4937					pkt_dma_addr);
   4938	if (rc) {
   4939		dev_err(hdev->dev,
   4940			"Failed to send fence packet to H/W queue %d\n",
   4941			hw_queue_id);
   4942		goto free_pkt;
   4943	}
   4944
   4945	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
   4946					1000, timeout_usec, true);
   4947
   4948	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
   4949
   4950	if (rc == -ETIMEDOUT) {
   4951		dev_err(hdev->dev,
   4952			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
   4953			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
   4954		rc = -EIO;
   4955	}
   4956
   4957free_pkt:
   4958	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
   4959					pkt_dma_addr);
   4960free_fence_ptr:
   4961	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
   4962					fence_dma_addr);
   4963	return rc;
   4964}
   4965
   4966static int gaudi_test_cpu_queue(struct hl_device *hdev)
   4967{
   4968	struct gaudi_device *gaudi = hdev->asic_specific;
   4969
   4970	/*
   4971	 * check capability here as send_cpu_message() won't update the result
   4972	 * value if no capability
   4973	 */
   4974	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
   4975		return 0;
   4976
   4977	return hl_fw_test_cpu_queue(hdev);
   4978}
   4979
   4980static int gaudi_test_queues(struct hl_device *hdev)
   4981{
   4982	int i, rc, ret_val = 0;
   4983
   4984	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
   4985		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
   4986			rc = gaudi_test_queue(hdev, i);
   4987			if (rc)
   4988				ret_val = -EINVAL;
   4989		}
   4990	}
   4991
   4992	rc = gaudi_test_cpu_queue(hdev);
   4993	if (rc)
   4994		ret_val = -EINVAL;
   4995
   4996	return ret_val;
   4997}
   4998
   4999static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
   5000		gfp_t mem_flags, dma_addr_t *dma_handle)
   5001{
   5002	void *kernel_addr;
   5003
   5004	if (size > GAUDI_DMA_POOL_BLK_SIZE)
   5005		return NULL;
   5006
   5007	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
   5008
   5009	/* Shift to the device's base physical address of host memory */
   5010	if (kernel_addr)
   5011		*dma_handle += HOST_PHYS_BASE;
   5012
   5013	return kernel_addr;
   5014}
   5015
   5016static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
   5017			dma_addr_t dma_addr)
   5018{
   5019	/* Cancel the device's base physical address of host memory */
   5020	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
   5021
   5022	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
   5023}
   5024
   5025static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
   5026					size_t size, dma_addr_t *dma_handle)
   5027{
   5028	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
   5029}
   5030
   5031static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
   5032						size_t size, void *vaddr)
   5033{
   5034	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
   5035}
   5036
   5037static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
   5038{
   5039	struct scatterlist *sg, *sg_next_iter;
   5040	u32 count, dma_desc_cnt;
   5041	u64 len, len_next;
   5042	dma_addr_t addr, addr_next;
   5043
   5044	dma_desc_cnt = 0;
   5045
   5046	for_each_sgtable_dma_sg(sgt, sg, count) {
   5047		len = sg_dma_len(sg);
   5048		addr = sg_dma_address(sg);
   5049
   5050		if (len == 0)
   5051			break;
   5052
   5053		while ((count + 1) < sgt->nents) {
   5054			sg_next_iter = sg_next(sg);
   5055			len_next = sg_dma_len(sg_next_iter);
   5056			addr_next = sg_dma_address(sg_next_iter);
   5057
   5058			if (len_next == 0)
   5059				break;
   5060
   5061			if ((addr + len == addr_next) &&
   5062				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
   5063				len += len_next;
   5064				count++;
   5065				sg = sg_next_iter;
   5066			} else {
   5067				break;
   5068			}
   5069		}
   5070
   5071		dma_desc_cnt++;
   5072	}
   5073
   5074	return dma_desc_cnt * sizeof(struct packet_lin_dma);
   5075}
   5076
   5077static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
   5078				struct hl_cs_parser *parser,
   5079				struct packet_lin_dma *user_dma_pkt,
   5080				u64 addr, enum dma_data_direction dir)
   5081{
   5082	struct hl_userptr *userptr;
   5083	int rc;
   5084
   5085	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
   5086			parser->job_userptr_list, &userptr))
   5087		goto already_pinned;
   5088
   5089	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
   5090	if (!userptr)
   5091		return -ENOMEM;
   5092
   5093	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
   5094				userptr);
   5095	if (rc)
   5096		goto free_userptr;
   5097
   5098	list_add_tail(&userptr->job_node, parser->job_userptr_list);
   5099
   5100	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
   5101	if (rc) {
   5102		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
   5103		goto unpin_memory;
   5104	}
   5105
   5106	userptr->dma_mapped = true;
   5107	userptr->dir = dir;
   5108
   5109already_pinned:
   5110	parser->patched_cb_size +=
   5111			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
   5112
   5113	return 0;
   5114
   5115unpin_memory:
   5116	list_del(&userptr->job_node);
   5117	hl_unpin_host_memory(hdev, userptr);
   5118free_userptr:
   5119	kfree(userptr);
   5120	return rc;
   5121}
   5122
   5123static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
   5124				struct hl_cs_parser *parser,
   5125				struct packet_lin_dma *user_dma_pkt,
   5126				bool src_in_host)
   5127{
   5128	enum dma_data_direction dir;
   5129	bool skip_host_mem_pin = false, user_memset;
   5130	u64 addr;
   5131	int rc = 0;
   5132
   5133	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
   5134			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
   5135			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
   5136
   5137	if (src_in_host) {
   5138		if (user_memset)
   5139			skip_host_mem_pin = true;
   5140
   5141		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
   5142		dir = DMA_TO_DEVICE;
   5143		addr = le64_to_cpu(user_dma_pkt->src_addr);
   5144	} else {
   5145		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
   5146		dir = DMA_FROM_DEVICE;
   5147		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
   5148				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
   5149				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
   5150	}
   5151
   5152	if (skip_host_mem_pin)
   5153		parser->patched_cb_size += sizeof(*user_dma_pkt);
   5154	else
   5155		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
   5156						addr, dir);
   5157
   5158	return rc;
   5159}
   5160
   5161static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
   5162				struct hl_cs_parser *parser,
   5163				struct packet_lin_dma *user_dma_pkt)
   5164{
   5165	bool src_in_host = false;
   5166	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
   5167			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
   5168			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
   5169
   5170	dev_dbg(hdev->dev, "DMA packet details:\n");
   5171	dev_dbg(hdev->dev, "source == 0x%llx\n",
   5172				le64_to_cpu(user_dma_pkt->src_addr));
   5173	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
   5174	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
   5175
   5176	/*
   5177	 * Special handling for DMA with size 0. Bypass all validations
   5178	 * because no transactions will be done except for WR_COMP, which
   5179	 * is not a security issue
   5180	 */
   5181	if (!le32_to_cpu(user_dma_pkt->tsize)) {
   5182		parser->patched_cb_size += sizeof(*user_dma_pkt);
   5183		return 0;
   5184	}
   5185
   5186	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
   5187		src_in_host = true;
   5188
   5189	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
   5190						src_in_host);
   5191}
   5192
   5193static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
   5194					struct hl_cs_parser *parser,
   5195					struct packet_load_and_exe *user_pkt)
   5196{
   5197	u32 cfg;
   5198
   5199	cfg = le32_to_cpu(user_pkt->cfg);
   5200
   5201	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
   5202		dev_err(hdev->dev,
   5203			"User not allowed to use Load and Execute\n");
   5204		return -EPERM;
   5205	}
   5206
   5207	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
   5208
   5209	return 0;
   5210}
   5211
   5212static int gaudi_validate_cb(struct hl_device *hdev,
   5213			struct hl_cs_parser *parser, bool is_mmu)
   5214{
   5215	u32 cb_parsed_length = 0;
   5216	int rc = 0;
   5217
   5218	parser->patched_cb_size = 0;
   5219
   5220	/* cb_user_size is more than 0 so loop will always be executed */
   5221	while (cb_parsed_length < parser->user_cb_size) {
   5222		enum packet_id pkt_id;
   5223		u16 pkt_size;
   5224		struct gaudi_packet *user_pkt;
   5225
   5226		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
   5227
   5228		pkt_id = (enum packet_id) (
   5229				(le64_to_cpu(user_pkt->header) &
   5230				PACKET_HEADER_PACKET_ID_MASK) >>
   5231					PACKET_HEADER_PACKET_ID_SHIFT);
   5232
   5233		if (!validate_packet_id(pkt_id)) {
   5234			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
   5235			rc = -EINVAL;
   5236			break;
   5237		}
   5238
   5239		pkt_size = gaudi_packet_sizes[pkt_id];
   5240		cb_parsed_length += pkt_size;
   5241		if (cb_parsed_length > parser->user_cb_size) {
   5242			dev_err(hdev->dev,
   5243				"packet 0x%x is out of CB boundary\n", pkt_id);
   5244			rc = -EINVAL;
   5245			break;
   5246		}
   5247
   5248		switch (pkt_id) {
   5249		case PACKET_MSG_PROT:
   5250			dev_err(hdev->dev,
   5251				"User not allowed to use MSG_PROT\n");
   5252			rc = -EPERM;
   5253			break;
   5254
   5255		case PACKET_CP_DMA:
   5256			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
   5257			rc = -EPERM;
   5258			break;
   5259
   5260		case PACKET_STOP:
   5261			dev_err(hdev->dev, "User not allowed to use STOP\n");
   5262			rc = -EPERM;
   5263			break;
   5264
   5265		case PACKET_WREG_BULK:
   5266			dev_err(hdev->dev,
   5267				"User not allowed to use WREG_BULK\n");
   5268			rc = -EPERM;
   5269			break;
   5270
   5271		case PACKET_LOAD_AND_EXE:
   5272			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
   5273				(struct packet_load_and_exe *) user_pkt);
   5274			break;
   5275
   5276		case PACKET_LIN_DMA:
   5277			parser->contains_dma_pkt = true;
   5278			if (is_mmu)
   5279				parser->patched_cb_size += pkt_size;
   5280			else
   5281				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
   5282					(struct packet_lin_dma *) user_pkt);
   5283			break;
   5284
   5285		case PACKET_WREG_32:
   5286		case PACKET_MSG_LONG:
   5287		case PACKET_MSG_SHORT:
   5288		case PACKET_REPEAT:
   5289		case PACKET_FENCE:
   5290		case PACKET_NOP:
   5291		case PACKET_ARB_POINT:
   5292			parser->patched_cb_size += pkt_size;
   5293			break;
   5294
   5295		default:
   5296			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
   5297				pkt_id);
   5298			rc = -EINVAL;
   5299			break;
   5300		}
   5301
   5302		if (rc)
   5303			break;
   5304	}
   5305
   5306	/*
   5307	 * The new CB should have space at the end for two MSG_PROT packets:
   5308	 * 1. A packet that will act as a completion packet
   5309	 * 2. A packet that will generate MSI-X interrupt
   5310	 */
   5311	if (parser->completion)
   5312		parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
   5313
   5314	return rc;
   5315}
   5316
   5317static int gaudi_patch_dma_packet(struct hl_device *hdev,
   5318				struct hl_cs_parser *parser,
   5319				struct packet_lin_dma *user_dma_pkt,
   5320				struct packet_lin_dma *new_dma_pkt,
   5321				u32 *new_dma_pkt_size)
   5322{
   5323	struct hl_userptr *userptr;
   5324	struct scatterlist *sg, *sg_next_iter;
   5325	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
   5326	u64 len, len_next;
   5327	dma_addr_t dma_addr, dma_addr_next;
   5328	u64 device_memory_addr, addr;
   5329	enum dma_data_direction dir;
   5330	struct sg_table *sgt;
   5331	bool src_in_host = false;
   5332	bool skip_host_mem_pin = false;
   5333	bool user_memset;
   5334
   5335	ctl = le32_to_cpu(user_dma_pkt->ctl);
   5336
   5337	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
   5338		src_in_host = true;
   5339
   5340	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
   5341			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
   5342
   5343	if (src_in_host) {
   5344		addr = le64_to_cpu(user_dma_pkt->src_addr);
   5345		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
   5346		dir = DMA_TO_DEVICE;
   5347		if (user_memset)
   5348			skip_host_mem_pin = true;
   5349	} else {
   5350		addr = le64_to_cpu(user_dma_pkt->dst_addr);
   5351		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
   5352		dir = DMA_FROM_DEVICE;
   5353	}
   5354
   5355	if ((!skip_host_mem_pin) &&
   5356		(!hl_userptr_is_pinned(hdev, addr,
   5357					le32_to_cpu(user_dma_pkt->tsize),
   5358					parser->job_userptr_list, &userptr))) {
   5359		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
   5360				addr, user_dma_pkt->tsize);
   5361		return -EFAULT;
   5362	}
   5363
   5364	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
   5365		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
   5366		*new_dma_pkt_size = sizeof(*user_dma_pkt);
   5367		return 0;
   5368	}
   5369
   5370	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
   5371
   5372	sgt = userptr->sgt;
   5373	dma_desc_cnt = 0;
   5374
   5375	for_each_sgtable_dma_sg(sgt, sg, count) {
   5376		len = sg_dma_len(sg);
   5377		dma_addr = sg_dma_address(sg);
   5378
   5379		if (len == 0)
   5380			break;
   5381
   5382		while ((count + 1) < sgt->nents) {
   5383			sg_next_iter = sg_next(sg);
   5384			len_next = sg_dma_len(sg_next_iter);
   5385			dma_addr_next = sg_dma_address(sg_next_iter);
   5386
   5387			if (len_next == 0)
   5388				break;
   5389
   5390			if ((dma_addr + len == dma_addr_next) &&
   5391				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
   5392				len += len_next;
   5393				count++;
   5394				sg = sg_next_iter;
   5395			} else {
   5396				break;
   5397			}
   5398		}
   5399
   5400		ctl = le32_to_cpu(user_dma_pkt->ctl);
   5401		if (likely(dma_desc_cnt))
   5402			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
   5403		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
   5404		new_dma_pkt->ctl = cpu_to_le32(ctl);
   5405		new_dma_pkt->tsize = cpu_to_le32(len);
   5406
   5407		if (dir == DMA_TO_DEVICE) {
   5408			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
   5409			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
   5410		} else {
   5411			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
   5412			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
   5413		}
   5414
   5415		if (!user_memset)
   5416			device_memory_addr += len;
   5417		dma_desc_cnt++;
   5418		new_dma_pkt++;
   5419	}
   5420
   5421	if (!dma_desc_cnt) {
   5422		dev_err(hdev->dev,
   5423			"Error of 0 SG entries when patching DMA packet\n");
   5424		return -EFAULT;
   5425	}
   5426
   5427	/* Fix the last dma packet - wrcomp must be as user set it */
   5428	new_dma_pkt--;
   5429	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
   5430
   5431	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
   5432
   5433	return 0;
   5434}
   5435
   5436static int gaudi_patch_cb(struct hl_device *hdev,
   5437				struct hl_cs_parser *parser)
   5438{
   5439	u32 cb_parsed_length = 0;
   5440	u32 cb_patched_cur_length = 0;
   5441	int rc = 0;
   5442
   5443	/* cb_user_size is more than 0 so loop will always be executed */
   5444	while (cb_parsed_length < parser->user_cb_size) {
   5445		enum packet_id pkt_id;
   5446		u16 pkt_size;
   5447		u32 new_pkt_size = 0;
   5448		struct gaudi_packet *user_pkt, *kernel_pkt;
   5449
   5450		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
   5451		kernel_pkt = parser->patched_cb->kernel_address +
   5452					cb_patched_cur_length;
   5453
   5454		pkt_id = (enum packet_id) (
   5455				(le64_to_cpu(user_pkt->header) &
   5456				PACKET_HEADER_PACKET_ID_MASK) >>
   5457					PACKET_HEADER_PACKET_ID_SHIFT);
   5458
   5459		if (!validate_packet_id(pkt_id)) {
   5460			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
   5461			rc = -EINVAL;
   5462			break;
   5463		}
   5464
   5465		pkt_size = gaudi_packet_sizes[pkt_id];
   5466		cb_parsed_length += pkt_size;
   5467		if (cb_parsed_length > parser->user_cb_size) {
   5468			dev_err(hdev->dev,
   5469				"packet 0x%x is out of CB boundary\n", pkt_id);
   5470			rc = -EINVAL;
   5471			break;
   5472		}
   5473
   5474		switch (pkt_id) {
   5475		case PACKET_LIN_DMA:
   5476			rc = gaudi_patch_dma_packet(hdev, parser,
   5477					(struct packet_lin_dma *) user_pkt,
   5478					(struct packet_lin_dma *) kernel_pkt,
   5479					&new_pkt_size);
   5480			cb_patched_cur_length += new_pkt_size;
   5481			break;
   5482
   5483		case PACKET_MSG_PROT:
   5484			dev_err(hdev->dev,
   5485				"User not allowed to use MSG_PROT\n");
   5486			rc = -EPERM;
   5487			break;
   5488
   5489		case PACKET_CP_DMA:
   5490			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
   5491			rc = -EPERM;
   5492			break;
   5493
   5494		case PACKET_STOP:
   5495			dev_err(hdev->dev, "User not allowed to use STOP\n");
   5496			rc = -EPERM;
   5497			break;
   5498
   5499		case PACKET_WREG_32:
   5500		case PACKET_WREG_BULK:
   5501		case PACKET_MSG_LONG:
   5502		case PACKET_MSG_SHORT:
   5503		case PACKET_REPEAT:
   5504		case PACKET_FENCE:
   5505		case PACKET_NOP:
   5506		case PACKET_ARB_POINT:
   5507		case PACKET_LOAD_AND_EXE:
   5508			memcpy(kernel_pkt, user_pkt, pkt_size);
   5509			cb_patched_cur_length += pkt_size;
   5510			break;
   5511
   5512		default:
   5513			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
   5514				pkt_id);
   5515			rc = -EINVAL;
   5516			break;
   5517		}
   5518
   5519		if (rc)
   5520			break;
   5521	}
   5522
   5523	return rc;
   5524}
   5525
   5526static int gaudi_parse_cb_mmu(struct hl_device *hdev,
   5527		struct hl_cs_parser *parser)
   5528{
   5529	u64 handle;
   5530	u32 patched_cb_size;
   5531	struct hl_cb *user_cb;
   5532	int rc;
   5533
   5534	/*
   5535	 * The new CB should have space at the end for two MSG_PROT pkt:
   5536	 * 1. A packet that will act as a completion packet
   5537	 * 2. A packet that will generate MSI interrupt
   5538	 */
   5539	if (parser->completion)
   5540		parser->patched_cb_size = parser->user_cb_size +
   5541				sizeof(struct packet_msg_prot) * 2;
   5542	else
   5543		parser->patched_cb_size = parser->user_cb_size;
   5544
   5545	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
   5546				parser->patched_cb_size, false, false,
   5547				&handle);
   5548
   5549	if (rc) {
   5550		dev_err(hdev->dev,
   5551			"Failed to allocate patched CB for DMA CS %d\n",
   5552			rc);
   5553		return rc;
   5554	}
   5555
   5556	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
   5557	/* hl_cb_get should never fail */
   5558	if (!parser->patched_cb) {
   5559		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
   5560		rc = -EFAULT;
   5561		goto out;
   5562	}
   5563
   5564	/*
   5565	 * The check that parser->user_cb_size <= parser->user_cb->size was done
   5566	 * in validate_queue_index().
   5567	 */
   5568	memcpy(parser->patched_cb->kernel_address,
   5569		parser->user_cb->kernel_address,
   5570		parser->user_cb_size);
   5571
   5572	patched_cb_size = parser->patched_cb_size;
   5573
   5574	/* Validate patched CB instead of user CB */
   5575	user_cb = parser->user_cb;
   5576	parser->user_cb = parser->patched_cb;
   5577	rc = gaudi_validate_cb(hdev, parser, true);
   5578	parser->user_cb = user_cb;
   5579
   5580	if (rc) {
   5581		hl_cb_put(parser->patched_cb);
   5582		goto out;
   5583	}
   5584
   5585	if (patched_cb_size != parser->patched_cb_size) {
   5586		dev_err(hdev->dev, "user CB size mismatch\n");
   5587		hl_cb_put(parser->patched_cb);
   5588		rc = -EINVAL;
   5589		goto out;
   5590	}
   5591
   5592out:
   5593	/*
   5594	 * Always call cb destroy here because we still have 1 reference
   5595	 * to it by calling cb_get earlier. After the job will be completed,
   5596	 * cb_put will release it, but here we want to remove it from the
   5597	 * idr
   5598	 */
   5599	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
   5600
   5601	return rc;
   5602}
   5603
   5604static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
   5605		struct hl_cs_parser *parser)
   5606{
   5607	u64 handle;
   5608	int rc;
   5609
   5610	rc = gaudi_validate_cb(hdev, parser, false);
   5611
   5612	if (rc)
   5613		goto free_userptr;
   5614
   5615	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
   5616				parser->patched_cb_size, false, false,
   5617				&handle);
   5618	if (rc) {
   5619		dev_err(hdev->dev,
   5620			"Failed to allocate patched CB for DMA CS %d\n", rc);
   5621		goto free_userptr;
   5622	}
   5623
   5624	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
   5625	/* hl_cb_get should never fail here */
   5626	if (!parser->patched_cb) {
   5627		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
   5628		rc = -EFAULT;
   5629		goto out;
   5630	}
   5631
   5632	rc = gaudi_patch_cb(hdev, parser);
   5633
   5634	if (rc)
   5635		hl_cb_put(parser->patched_cb);
   5636
   5637out:
   5638	/*
   5639	 * Always call cb destroy here because we still have 1 reference
   5640	 * to it by calling cb_get earlier. After the job will be completed,
   5641	 * cb_put will release it, but here we want to remove it from the
   5642	 * idr
   5643	 */
   5644	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
   5645
   5646free_userptr:
   5647	if (rc)
   5648		hl_userptr_delete_list(hdev, parser->job_userptr_list);
   5649	return rc;
   5650}
   5651
   5652static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
   5653					struct hl_cs_parser *parser)
   5654{
   5655	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
   5656	struct gaudi_device *gaudi = hdev->asic_specific;
   5657	u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
   5658		((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
   5659
   5660	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
   5661			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
   5662			(!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
   5663		dev_err(hdev->dev, "h/w queue %d is disabled\n",
   5664				parser->hw_queue_id);
   5665		return -EINVAL;
   5666	}
   5667
   5668	/* For internal queue jobs just check if CB address is valid */
   5669	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
   5670					parser->user_cb_size,
   5671					asic_prop->sram_user_base_address,
   5672					asic_prop->sram_end_address))
   5673		return 0;
   5674
   5675	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
   5676					parser->user_cb_size,
   5677					asic_prop->dram_user_base_address,
   5678					asic_prop->dram_end_address))
   5679		return 0;
   5680
   5681	/* PMMU and HPMMU addresses are equal, check only one of them */
   5682	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
   5683					parser->user_cb_size,
   5684					asic_prop->pmmu.start_addr,
   5685					asic_prop->pmmu.end_addr))
   5686		return 0;
   5687
   5688	dev_err(hdev->dev,
   5689		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
   5690		parser->user_cb, parser->user_cb_size);
   5691
   5692	return -EFAULT;
   5693}
   5694
   5695static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
   5696{
   5697	struct gaudi_device *gaudi = hdev->asic_specific;
   5698
   5699	if (parser->queue_type == QUEUE_TYPE_INT)
   5700		return gaudi_parse_cb_no_ext_queue(hdev, parser);
   5701
   5702	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
   5703		return gaudi_parse_cb_mmu(hdev, parser);
   5704	else
   5705		return gaudi_parse_cb_no_mmu(hdev, parser);
   5706}
   5707
   5708static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
   5709					void *kernel_address, u32 len,
   5710					u64 cq_addr, u32 cq_val, u32 msi_vec,
   5711					bool eb)
   5712{
   5713	struct gaudi_device *gaudi = hdev->asic_specific;
   5714	struct packet_msg_prot *cq_pkt;
   5715	u64 msi_addr;
   5716	u32 tmp;
   5717
   5718	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
   5719
   5720	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
   5721	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   5722
   5723	if (eb)
   5724		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
   5725
   5726	cq_pkt->ctl = cpu_to_le32(tmp);
   5727	cq_pkt->value = cpu_to_le32(cq_val);
   5728	cq_pkt->addr = cpu_to_le64(cq_addr);
   5729
   5730	cq_pkt++;
   5731
   5732	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
   5733	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   5734	cq_pkt->ctl = cpu_to_le32(tmp);
   5735	cq_pkt->value = cpu_to_le32(1);
   5736
   5737	if (gaudi->multi_msi_mode)
   5738		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
   5739	else
   5740		msi_addr = mmPCIE_CORE_MSI_REQ;
   5741
   5742	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
   5743}
   5744
   5745static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
   5746{
   5747	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
   5748}
   5749
   5750static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
   5751					u32 size, u64 val)
   5752{
   5753	struct packet_lin_dma *lin_dma_pkt;
   5754	struct hl_cs_job *job;
   5755	u32 cb_size, ctl, err_cause;
   5756	struct hl_cb *cb;
   5757	int rc;
   5758
   5759	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
   5760	if (!cb)
   5761		return -EFAULT;
   5762
   5763	lin_dma_pkt = cb->kernel_address;
   5764	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
   5765	cb_size = sizeof(*lin_dma_pkt);
   5766
   5767	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
   5768	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
   5769	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
   5770	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   5771	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
   5772
   5773	lin_dma_pkt->ctl = cpu_to_le32(ctl);
   5774	lin_dma_pkt->src_addr = cpu_to_le64(val);
   5775	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
   5776	lin_dma_pkt->tsize = cpu_to_le32(size);
   5777
   5778	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
   5779	if (!job) {
   5780		dev_err(hdev->dev, "Failed to allocate a new job\n");
   5781		rc = -ENOMEM;
   5782		goto release_cb;
   5783	}
   5784
   5785	/* Verify DMA is OK */
   5786	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
   5787	if (err_cause && !hdev->init_done) {
   5788		dev_dbg(hdev->dev,
   5789			"Clearing DMA0 engine from errors (cause 0x%x)\n",
   5790			err_cause);
   5791		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
   5792	}
   5793
   5794	job->id = 0;
   5795	job->user_cb = cb;
   5796	atomic_inc(&job->user_cb->cs_cnt);
   5797	job->user_cb_size = cb_size;
   5798	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
   5799	job->patched_cb = job->user_cb;
   5800	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
   5801
   5802	hl_debugfs_add_job(hdev, job);
   5803
   5804	rc = gaudi_send_job_on_qman0(hdev, job);
   5805	hl_debugfs_remove_job(hdev, job);
   5806	kfree(job);
   5807	atomic_dec(&cb->cs_cnt);
   5808
   5809	/* Verify DMA is OK */
   5810	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
   5811	if (err_cause) {
   5812		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
   5813		rc = -EIO;
   5814		if (!hdev->init_done) {
   5815			dev_dbg(hdev->dev,
   5816				"Clearing DMA0 engine from errors (cause 0x%x)\n",
   5817				err_cause);
   5818			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
   5819		}
   5820	}
   5821
   5822release_cb:
   5823	hl_cb_put(cb);
   5824	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
   5825
   5826	return rc;
   5827}
   5828
   5829static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
   5830					u32 num_regs, u32 val)
   5831{
   5832	struct packet_msg_long *pkt;
   5833	struct hl_cs_job *job;
   5834	u32 cb_size, ctl;
   5835	struct hl_cb *cb;
   5836	int i, rc;
   5837
   5838	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
   5839
   5840	if (cb_size > SZ_2M) {
   5841		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
   5842		return -ENOMEM;
   5843	}
   5844
   5845	cb = hl_cb_kernel_create(hdev, cb_size, false);
   5846	if (!cb)
   5847		return -EFAULT;
   5848
   5849	pkt = cb->kernel_address;
   5850
   5851	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
   5852	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
   5853	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
   5854	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
   5855	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   5856
   5857	for (i = 0; i < num_regs ; i++, pkt++) {
   5858		pkt->ctl = cpu_to_le32(ctl);
   5859		pkt->value = cpu_to_le32(val);
   5860		pkt->addr = cpu_to_le64(reg_base + (i * 4));
   5861	}
   5862
   5863	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
   5864	if (!job) {
   5865		dev_err(hdev->dev, "Failed to allocate a new job\n");
   5866		rc = -ENOMEM;
   5867		goto release_cb;
   5868	}
   5869
   5870	job->id = 0;
   5871	job->user_cb = cb;
   5872	atomic_inc(&job->user_cb->cs_cnt);
   5873	job->user_cb_size = cb_size;
   5874	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
   5875	job->patched_cb = job->user_cb;
   5876	job->job_cb_size = cb_size;
   5877
   5878	hl_debugfs_add_job(hdev, job);
   5879
   5880	rc = gaudi_send_job_on_qman0(hdev, job);
   5881	hl_debugfs_remove_job(hdev, job);
   5882	kfree(job);
   5883	atomic_dec(&cb->cs_cnt);
   5884
   5885release_cb:
   5886	hl_cb_put(cb);
   5887	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
   5888
   5889	return rc;
   5890}
   5891
   5892static int gaudi_restore_sm_registers(struct hl_device *hdev)
   5893{
   5894	u64 base_addr;
   5895	u32 num_regs;
   5896	int rc;
   5897
   5898	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
   5899	num_regs = NUM_OF_SOB_IN_BLOCK;
   5900	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5901	if (rc) {
   5902		dev_err(hdev->dev, "failed resetting SM registers");
   5903		return -ENOMEM;
   5904	}
   5905
   5906	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
   5907	num_regs = NUM_OF_SOB_IN_BLOCK;
   5908	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5909	if (rc) {
   5910		dev_err(hdev->dev, "failed resetting SM registers");
   5911		return -ENOMEM;
   5912	}
   5913
   5914	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
   5915	num_regs = NUM_OF_SOB_IN_BLOCK;
   5916	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5917	if (rc) {
   5918		dev_err(hdev->dev, "failed resetting SM registers");
   5919		return -ENOMEM;
   5920	}
   5921
   5922	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
   5923	num_regs = NUM_OF_MONITORS_IN_BLOCK;
   5924	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5925	if (rc) {
   5926		dev_err(hdev->dev, "failed resetting SM registers");
   5927		return -ENOMEM;
   5928	}
   5929
   5930	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
   5931	num_regs = NUM_OF_MONITORS_IN_BLOCK;
   5932	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5933	if (rc) {
   5934		dev_err(hdev->dev, "failed resetting SM registers");
   5935		return -ENOMEM;
   5936	}
   5937
   5938	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
   5939	num_regs = NUM_OF_MONITORS_IN_BLOCK;
   5940	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5941	if (rc) {
   5942		dev_err(hdev->dev, "failed resetting SM registers");
   5943		return -ENOMEM;
   5944	}
   5945
   5946	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
   5947			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
   5948	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
   5949	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5950	if (rc) {
   5951		dev_err(hdev->dev, "failed resetting SM registers");
   5952		return -ENOMEM;
   5953	}
   5954
   5955	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
   5956			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
   5957	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
   5958	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
   5959	if (rc) {
   5960		dev_err(hdev->dev, "failed resetting SM registers");
   5961		return -ENOMEM;
   5962	}
   5963
   5964	return 0;
   5965}
   5966
   5967static void gaudi_restore_dma_registers(struct hl_device *hdev)
   5968{
   5969	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
   5970			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
   5971	int i;
   5972
   5973	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
   5974		u64 sob_addr = CFG_BASE +
   5975				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
   5976				(i * sob_delta);
   5977		u32 dma_offset = i * DMA_CORE_OFFSET;
   5978
   5979		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
   5980				lower_32_bits(sob_addr));
   5981		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
   5982				upper_32_bits(sob_addr));
   5983		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
   5984
   5985		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
   5986		 * modified by the user for SRAM reduction
   5987		 */
   5988		if (i > 1)
   5989			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
   5990								0x00000001);
   5991	}
   5992}
   5993
   5994static void gaudi_restore_qm_registers(struct hl_device *hdev)
   5995{
   5996	u32 qman_offset;
   5997	int i;
   5998
   5999	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
   6000		qman_offset = i * DMA_QMAN_OFFSET;
   6001		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
   6002	}
   6003
   6004	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
   6005		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
   6006		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
   6007	}
   6008
   6009	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
   6010		qman_offset = i * TPC_QMAN_OFFSET;
   6011		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
   6012	}
   6013
   6014	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
   6015		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
   6016				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
   6017		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
   6018	}
   6019}
   6020
   6021static int gaudi_restore_user_registers(struct hl_device *hdev)
   6022{
   6023	int rc;
   6024
   6025	rc = gaudi_restore_sm_registers(hdev);
   6026	if (rc)
   6027		return rc;
   6028
   6029	gaudi_restore_dma_registers(hdev);
   6030	gaudi_restore_qm_registers(hdev);
   6031
   6032	return 0;
   6033}
   6034
   6035static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
   6036{
   6037	return 0;
   6038}
   6039
   6040static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
   6041{
   6042	struct asic_fixed_properties *prop = &hdev->asic_prop;
   6043	struct gaudi_device *gaudi = hdev->asic_specific;
   6044	u64 addr = prop->mmu_pgt_addr;
   6045	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
   6046
   6047	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
   6048		return 0;
   6049
   6050	return gaudi_memset_device_memory(hdev, addr, size, 0);
   6051}
   6052
   6053static void gaudi_restore_phase_topology(struct hl_device *hdev)
   6054{
   6055
   6056}
   6057
   6058static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
   6059					u32 size_to_dma, dma_addr_t dma_addr)
   6060{
   6061	u32 err_cause, val;
   6062	u64 dma_offset;
   6063	int rc;
   6064
   6065	dma_offset = dma_id * DMA_CORE_OFFSET;
   6066
   6067	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
   6068	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
   6069	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
   6070	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
   6071	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
   6072	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
   6073			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
   6074
   6075	rc = hl_poll_timeout(
   6076		hdev,
   6077		mmDMA0_CORE_STS0 + dma_offset,
   6078		val,
   6079		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
   6080		0,
   6081		1000000);
   6082
   6083	if (rc) {
   6084		dev_err(hdev->dev,
   6085			"DMA %d timed-out during reading of 0x%llx\n",
   6086			dma_id, addr);
   6087		return -EIO;
   6088	}
   6089
   6090	/* Verify DMA is OK */
   6091	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
   6092	if (err_cause) {
   6093		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
   6094		dev_dbg(hdev->dev,
   6095			"Clearing DMA0 engine from errors (cause 0x%x)\n",
   6096			err_cause);
   6097		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
   6098
   6099		return -EIO;
   6100	}
   6101
   6102	return 0;
   6103}
   6104
   6105static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
   6106				void *blob_addr)
   6107{
   6108	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
   6109	u32 qm_glbl_sts0, qm_cgm_sts;
   6110	u64 dma_offset, qm_offset;
   6111	dma_addr_t dma_addr;
   6112	void *kernel_addr;
   6113	bool is_eng_idle;
   6114	int rc = 0, dma_id;
   6115
   6116	kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
   6117						hdev, SZ_2M,
   6118						&dma_addr,
   6119						GFP_KERNEL | __GFP_ZERO);
   6120
   6121	if (!kernel_addr)
   6122		return -ENOMEM;
   6123
   6124	hdev->asic_funcs->hw_queues_lock(hdev);
   6125
   6126	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
   6127	dma_offset = dma_id * DMA_CORE_OFFSET;
   6128	qm_offset = dma_id * DMA_QMAN_OFFSET;
   6129	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
   6130	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
   6131	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
   6132	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
   6133		      IS_DMA_IDLE(dma_core_sts0);
   6134
   6135	if (!is_eng_idle) {
   6136		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
   6137		dma_offset = dma_id * DMA_CORE_OFFSET;
   6138		qm_offset = dma_id * DMA_QMAN_OFFSET;
   6139		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
   6140		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
   6141		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
   6142		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
   6143			      IS_DMA_IDLE(dma_core_sts0);
   6144
   6145		if (!is_eng_idle) {
   6146			dev_err_ratelimited(hdev->dev,
   6147				"Can't read via DMA because it is BUSY\n");
   6148			rc = -EAGAIN;
   6149			goto out;
   6150		}
   6151	}
   6152
   6153	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
   6154	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
   6155			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   6156
   6157	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
   6158	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
   6159	 * ASID
   6160	 */
   6161	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
   6162
   6163	/* Verify DMA is OK */
   6164	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
   6165	if (err_cause) {
   6166		dev_dbg(hdev->dev,
   6167			"Clearing DMA0 engine from errors (cause 0x%x)\n",
   6168			err_cause);
   6169		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
   6170	}
   6171
   6172	pos = 0;
   6173	size_left = size;
   6174	size_to_dma = SZ_2M;
   6175
   6176	while (size_left > 0) {
   6177
   6178		if (size_left < SZ_2M)
   6179			size_to_dma = size_left;
   6180
   6181		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
   6182						dma_addr);
   6183		if (rc)
   6184			break;
   6185
   6186		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
   6187
   6188		if (size_left <= SZ_2M)
   6189			break;
   6190
   6191		pos += SZ_2M;
   6192		addr += SZ_2M;
   6193		size_left -= SZ_2M;
   6194	}
   6195
   6196	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
   6197	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
   6198	 * ASID
   6199	 */
   6200	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
   6201			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
   6202
   6203	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
   6204
   6205out:
   6206	hdev->asic_funcs->hw_queues_unlock(hdev);
   6207
   6208	hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
   6209						dma_addr);
   6210
   6211	return rc;
   6212}
   6213
   6214static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
   6215{
   6216	struct gaudi_device *gaudi = hdev->asic_specific;
   6217
   6218	if (hdev->reset_info.hard_reset_pending)
   6219		return U64_MAX;
   6220
   6221	return readq(hdev->pcie_bar[HBM_BAR_ID] +
   6222			(addr - gaudi->hbm_bar_cur_addr));
   6223}
   6224
   6225static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
   6226{
   6227	struct gaudi_device *gaudi = hdev->asic_specific;
   6228
   6229	if (hdev->reset_info.hard_reset_pending)
   6230		return;
   6231
   6232	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
   6233			(addr - gaudi->hbm_bar_cur_addr));
   6234}
   6235
   6236void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
   6237{
   6238	/* mask to zero the MMBP and ASID bits */
   6239	WREG32_AND(reg, ~0x7FF);
   6240	WREG32_OR(reg, asid);
   6241}
   6242
   6243static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
   6244{
   6245	struct gaudi_device *gaudi = hdev->asic_specific;
   6246
   6247	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
   6248		return;
   6249
   6250	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
   6251		dev_crit(hdev->dev, "asid %u is too big\n", asid);
   6252		return;
   6253	}
   6254
   6255	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6256	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6257	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6258	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6259	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6260
   6261	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6262	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6263	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6264	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6265	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6266
   6267	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6268	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6269	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6270	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6271	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6272
   6273	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6274	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6275	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6276	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6277	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6278
   6279	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6280	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6281	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6282	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6283	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6284
   6285	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6286	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6287	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6288	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6289	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6290
   6291	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6292	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6293	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6294	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6295	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6296
   6297	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6298	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6299	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6300	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6301	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6302
   6303	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
   6304	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
   6305	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
   6306	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
   6307	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
   6308	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
   6309	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
   6310	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
   6311
   6312	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6313	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6314	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6315	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6316	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6317	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
   6318	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
   6319
   6320	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6321	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6322	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6323	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6324	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6325	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
   6326	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
   6327
   6328	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6329	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6330	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6331	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6332	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6333	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
   6334	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
   6335
   6336	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6337	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6338	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6339	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6340	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6341	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
   6342	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
   6343
   6344	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6345	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6346	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6347	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6348	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6349	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
   6350	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
   6351
   6352	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6353	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6354	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6355	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6356	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6357	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
   6358	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
   6359
   6360	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6361	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6362	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6363	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6364	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6365	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
   6366	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
   6367
   6368	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6369	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6370	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6371	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6372	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6373	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
   6374	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
   6375
   6376	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6377	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6378	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6379	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6380	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6381	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
   6382	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
   6383	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
   6384	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
   6385	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
   6386
   6387	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
   6388	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
   6389	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
   6390	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
   6391	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
   6392	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
   6393	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
   6394	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
   6395	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
   6396	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
   6397	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
   6398	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
   6399
   6400	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
   6401		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
   6402				asid);
   6403		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
   6404				asid);
   6405		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
   6406				asid);
   6407		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
   6408				asid);
   6409		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
   6410				asid);
   6411	}
   6412
   6413	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
   6414		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
   6415				asid);
   6416		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
   6417				asid);
   6418		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
   6419				asid);
   6420		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
   6421				asid);
   6422		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
   6423				asid);
   6424	}
   6425
   6426	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
   6427		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
   6428				asid);
   6429		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
   6430				asid);
   6431		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
   6432				asid);
   6433		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
   6434				asid);
   6435		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
   6436				asid);
   6437	}
   6438
   6439	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
   6440		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
   6441				asid);
   6442		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
   6443				asid);
   6444		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
   6445				asid);
   6446		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
   6447				asid);
   6448		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
   6449				asid);
   6450	}
   6451
   6452	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
   6453		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
   6454				asid);
   6455		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
   6456				asid);
   6457		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
   6458				asid);
   6459		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
   6460				asid);
   6461		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
   6462				asid);
   6463	}
   6464
   6465	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
   6466		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
   6467				asid);
   6468		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
   6469				asid);
   6470		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
   6471				asid);
   6472		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
   6473				asid);
   6474		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
   6475				asid);
   6476	}
   6477
   6478	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
   6479		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
   6480				asid);
   6481		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
   6482				asid);
   6483		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
   6484				asid);
   6485		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
   6486				asid);
   6487		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
   6488				asid);
   6489	}
   6490
   6491	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
   6492		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
   6493				asid);
   6494		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
   6495				asid);
   6496		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
   6497				asid);
   6498		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
   6499				asid);
   6500		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
   6501				asid);
   6502	}
   6503
   6504	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
   6505		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
   6506				asid);
   6507		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
   6508				asid);
   6509		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
   6510				asid);
   6511		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
   6512				asid);
   6513		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
   6514				asid);
   6515	}
   6516
   6517	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
   6518		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
   6519				asid);
   6520		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
   6521				asid);
   6522		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
   6523				asid);
   6524		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
   6525				asid);
   6526		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
   6527				asid);
   6528	}
   6529
   6530	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
   6531	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
   6532}
   6533
   6534static int gaudi_send_job_on_qman0(struct hl_device *hdev,
   6535		struct hl_cs_job *job)
   6536{
   6537	struct packet_msg_prot *fence_pkt;
   6538	u32 *fence_ptr;
   6539	dma_addr_t fence_dma_addr;
   6540	struct hl_cb *cb;
   6541	u32 tmp, timeout, dma_offset;
   6542	int rc;
   6543
   6544	if (hdev->pldm)
   6545		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
   6546	else
   6547		timeout = HL_DEVICE_TIMEOUT_USEC;
   6548
   6549	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
   6550		dev_err_ratelimited(hdev->dev,
   6551			"Can't send driver job on QMAN0 because the device is not idle\n");
   6552		return -EBUSY;
   6553	}
   6554
   6555	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
   6556							&fence_dma_addr);
   6557	if (!fence_ptr) {
   6558		dev_err(hdev->dev,
   6559			"Failed to allocate fence memory for QMAN0\n");
   6560		return -ENOMEM;
   6561	}
   6562
   6563	cb = job->patched_cb;
   6564
   6565	fence_pkt = cb->kernel_address +
   6566			job->job_cb_size - sizeof(struct packet_msg_prot);
   6567
   6568	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
   6569	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
   6570	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   6571
   6572	fence_pkt->ctl = cpu_to_le32(tmp);
   6573	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
   6574	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
   6575
   6576	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
   6577
   6578	WREG32(mmDMA0_CORE_PROT + dma_offset,
   6579			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
   6580
   6581	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
   6582					job->job_cb_size, cb->bus_address);
   6583	if (rc) {
   6584		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
   6585		goto free_fence_ptr;
   6586	}
   6587
   6588	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
   6589				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
   6590				timeout, true);
   6591
   6592	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
   6593
   6594	if (rc == -ETIMEDOUT) {
   6595		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
   6596		goto free_fence_ptr;
   6597	}
   6598
   6599free_fence_ptr:
   6600	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
   6601
   6602	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
   6603					fence_dma_addr);
   6604	return rc;
   6605}
   6606
   6607static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
   6608{
   6609	if (event_type >= GAUDI_EVENT_SIZE)
   6610		goto event_not_supported;
   6611
   6612	if (!gaudi_irq_map_table[event_type].valid)
   6613		goto event_not_supported;
   6614
   6615	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
   6616
   6617	return;
   6618
   6619event_not_supported:
   6620	snprintf(desc, size, "N/A");
   6621}
   6622
   6623static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
   6624							bool is_write, s32 *engine_id_1,
   6625							s32 *engine_id_2)
   6626{
   6627	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
   6628
   6629	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
   6630				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
   6631
   6632	switch (x_y) {
   6633	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
   6634	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
   6635		dma_id[0] = 0;
   6636		dma_id[1] = 2;
   6637		break;
   6638	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
   6639	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
   6640		dma_id[0] = 1;
   6641		dma_id[1] = 3;
   6642		break;
   6643	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
   6644	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
   6645		dma_id[0] = 4;
   6646		dma_id[1] = 6;
   6647		break;
   6648	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
   6649	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
   6650		dma_id[0] = 5;
   6651		dma_id[1] = 7;
   6652		break;
   6653	default:
   6654		goto unknown_initiator;
   6655	}
   6656
   6657	for (i = 0 ; i < 2 ; i++) {
   6658		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
   6659		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
   6660	}
   6661
   6662	switch (x_y) {
   6663	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
   6664	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
   6665		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
   6666			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
   6667			return "DMA0";
   6668		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
   6669			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
   6670			return "DMA2";
   6671		} else {
   6672			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
   6673			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
   6674			return "DMA0 or DMA2";
   6675		}
   6676	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
   6677	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
   6678		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
   6679			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
   6680			return "DMA1";
   6681		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
   6682			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
   6683			return "DMA3";
   6684		} else {
   6685			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
   6686			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
   6687			return "DMA1 or DMA3";
   6688		}
   6689	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
   6690	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
   6691		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
   6692			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
   6693			return "DMA4";
   6694		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
   6695			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
   6696			return "DMA6";
   6697		} else {
   6698			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
   6699			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
   6700			return "DMA4 or DMA6";
   6701		}
   6702	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
   6703	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
   6704		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
   6705			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
   6706			return "DMA5";
   6707		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
   6708			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
   6709			return "DMA7";
   6710		} else {
   6711			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
   6712			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
   6713			return "DMA5 or DMA7";
   6714		}
   6715	}
   6716
   6717unknown_initiator:
   6718	return "unknown initiator";
   6719}
   6720
   6721static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
   6722							u32 *engine_id_1, u32 *engine_id_2)
   6723{
   6724	u32 val, x_y, axi_id;
   6725
   6726	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
   6727				RREG32(mmMMU_UP_RAZWI_READ_ID);
   6728	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
   6729			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
   6730	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
   6731			RAZWI_INITIATOR_AXI_ID_SHIFT);
   6732
   6733	switch (x_y) {
   6734	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
   6735		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
   6736			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
   6737			return "TPC0";
   6738		}
   6739		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
   6740			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
   6741			return "NIC0";
   6742		}
   6743		break;
   6744	case RAZWI_INITIATOR_ID_X_Y_TPC1:
   6745		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
   6746		return "TPC1";
   6747	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
   6748	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
   6749		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
   6750		return "MME0";
   6751	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
   6752	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
   6753		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
   6754		return "MME1";
   6755	case RAZWI_INITIATOR_ID_X_Y_TPC2:
   6756		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
   6757		return "TPC2";
   6758	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
   6759		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
   6760			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
   6761			return "TPC3";
   6762		}
   6763		/* PCI, CPU or PSOC does not have engine id*/
   6764		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
   6765			return "PCI";
   6766		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
   6767			return "CPU";
   6768		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
   6769			return "PSOC";
   6770		break;
   6771	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
   6772	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
   6773	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
   6774	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
   6775	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
   6776	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
   6777	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
   6778	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
   6779		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
   6780				engine_id_1, engine_id_2);
   6781	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
   6782		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
   6783			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
   6784			return "TPC4";
   6785		}
   6786		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
   6787			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
   6788			return "NIC1";
   6789		}
   6790		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
   6791			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
   6792			return "NIC2";
   6793		}
   6794		break;
   6795	case RAZWI_INITIATOR_ID_X_Y_TPC5:
   6796		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
   6797		return "TPC5";
   6798	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
   6799	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
   6800		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
   6801		return "MME2";
   6802	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
   6803	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
   6804		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
   6805		return "MME3";
   6806	case RAZWI_INITIATOR_ID_X_Y_TPC6:
   6807		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
   6808		return "TPC6";
   6809	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
   6810		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
   6811			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
   6812			return "TPC7";
   6813		}
   6814		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
   6815			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
   6816			return "NIC4";
   6817		}
   6818		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
   6819			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
   6820			return "NIC5";
   6821		}
   6822		break;
   6823	default:
   6824		break;
   6825	}
   6826
   6827	dev_err(hdev->dev,
   6828		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
   6829		val,
   6830		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
   6831		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
   6832		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
   6833			RAZWI_INITIATOR_AXI_ID_MASK);
   6834
   6835	return "unknown initiator";
   6836}
   6837
   6838static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
   6839						u32 *engine_id_2)
   6840{
   6841
   6842	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
   6843		dev_err_ratelimited(hdev->dev,
   6844			"RAZWI event caused by illegal write of %s\n",
   6845			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
   6846		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
   6847	}
   6848
   6849	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
   6850		dev_err_ratelimited(hdev->dev,
   6851			"RAZWI event caused by illegal read of %s\n",
   6852			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
   6853		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
   6854	}
   6855}
   6856
   6857static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
   6858{
   6859	struct gaudi_device *gaudi = hdev->asic_specific;
   6860	u32 val;
   6861
   6862	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
   6863		return;
   6864
   6865	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
   6866	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
   6867		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
   6868		*addr <<= 32;
   6869		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
   6870
   6871		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
   6872		*type = HL_RAZWI_PAGE_FAULT;
   6873
   6874		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
   6875	}
   6876
   6877	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
   6878	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
   6879		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
   6880		*addr <<= 32;
   6881		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
   6882
   6883		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
   6884		*type = HL_RAZWI_MMU_ACCESS_ERROR;
   6885
   6886		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
   6887	}
   6888}
   6889
   6890/*
   6891 *  +-------------------+------------------------------------------------------+
   6892 *  | Configuration Reg |                     Description                      |
   6893 *  |      Address      |                                                      |
   6894 *  +-------------------+------------------------------------------------------+
   6895 *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
   6896 *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
   6897 *  |                   |0xF34 memory wrappers 63:32                           |
   6898 *  |                   |0xF38 memory wrappers 95:64                           |
   6899 *  |                   |0xF3C memory wrappers 127:96                          |
   6900 *  +-------------------+------------------------------------------------------+
   6901 *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
   6902 *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
   6903 *  |                   |0xF44 memory wrappers 63:32                           |
   6904 *  |                   |0xF48 memory wrappers 95:64                           |
   6905 *  |                   |0xF4C memory wrappers 127:96                          |
   6906 *  +-------------------+------------------------------------------------------+
   6907 */
   6908static int gaudi_extract_ecc_info(struct hl_device *hdev,
   6909		struct ecc_info_extract_params *params, u64 *ecc_address,
   6910		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
   6911{
   6912	u32 i, num_mem_regs, reg, err_bit;
   6913	u64 err_addr, err_word = 0;
   6914
   6915	num_mem_regs = params->num_memories / 32 +
   6916			((params->num_memories % 32) ? 1 : 0);
   6917
   6918	if (params->block_address >= CFG_BASE)
   6919		params->block_address -= CFG_BASE;
   6920
   6921	if (params->derr)
   6922		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
   6923	else
   6924		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
   6925
   6926	/* Set invalid wrapper index */
   6927	*memory_wrapper_idx = 0xFF;
   6928
   6929	/* Iterate through memory wrappers, a single bit must be set */
   6930	for (i = 0 ; i < num_mem_regs ; i++) {
   6931		err_addr += i * 4;
   6932		err_word = RREG32(err_addr);
   6933		if (err_word) {
   6934			err_bit = __ffs(err_word);
   6935			*memory_wrapper_idx = err_bit + (32 * i);
   6936			break;
   6937		}
   6938	}
   6939
   6940	if (*memory_wrapper_idx == 0xFF) {
   6941		dev_err(hdev->dev, "ECC error information cannot be found\n");
   6942		return -EINVAL;
   6943	}
   6944
   6945	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
   6946			*memory_wrapper_idx);
   6947
   6948	*ecc_address =
   6949		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
   6950	*ecc_syndrom =
   6951		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
   6952
   6953	/* Clear error indication */
   6954	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
   6955	if (params->derr)
   6956		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
   6957	else
   6958		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
   6959
   6960	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
   6961
   6962	return 0;
   6963}
   6964
   6965/*
   6966 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
   6967 *
   6968 * @idx: the current pi/ci value
   6969 * @q_len: the queue length (power of 2)
   6970 *
   6971 * @return the cyclically decremented index
   6972 */
   6973static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
   6974{
   6975	u32 mask = q_len - 1;
   6976
   6977	/*
   6978	 * modular decrement is equivalent to adding (queue_size -1)
   6979	 * later we take LSBs to make sure the value is in the
   6980	 * range [0, queue_len - 1]
   6981	 */
   6982	return (idx + q_len - 1) & mask;
   6983}
   6984
   6985/**
   6986 * gaudi_print_sw_config_stream_data - print SW config stream data
   6987 *
   6988 * @hdev: pointer to the habanalabs device structure
   6989 * @stream: the QMAN's stream
   6990 * @qman_base: base address of QMAN registers block
   6991 */
   6992static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
   6993						u64 qman_base)
   6994{
   6995	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
   6996	u32 cq_ptr_lo_off, size;
   6997
   6998	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
   6999
   7000	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
   7001						stream * cq_ptr_lo_off;
   7002	cq_ptr_hi = cq_ptr_lo +
   7003				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
   7004	cq_tsize = cq_ptr_lo +
   7005				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
   7006
   7007	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
   7008	size = RREG32(cq_tsize);
   7009	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
   7010							stream, cq_ptr, size);
   7011}
   7012
   7013/**
   7014 * gaudi_print_last_pqes_on_err - print last PQEs on error
   7015 *
   7016 * @hdev: pointer to the habanalabs device structure
   7017 * @qid_base: first QID of the QMAN (out of 4 streams)
   7018 * @stream: the QMAN's stream
   7019 * @qman_base: base address of QMAN registers block
   7020 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
   7021 */
   7022static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
   7023						u32 stream, u64 qman_base,
   7024						bool pr_sw_conf)
   7025{
   7026	u32 ci, qm_ci_stream_off, queue_len;
   7027	struct hl_hw_queue *q;
   7028	u64 pq_ci;
   7029	int i;
   7030
   7031	q = &hdev->kernel_queues[qid_base + stream];
   7032
   7033	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
   7034	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
   7035						stream * qm_ci_stream_off;
   7036
   7037	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
   7038					q->int_queue_len : HL_QUEUE_LENGTH;
   7039
   7040	hdev->asic_funcs->hw_queues_lock(hdev);
   7041
   7042	if (pr_sw_conf)
   7043		gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
   7044
   7045	ci = RREG32(pq_ci);
   7046
   7047	/* we should start printing form ci -1 */
   7048	ci = gaudi_queue_idx_dec(ci, queue_len);
   7049
   7050	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
   7051		struct hl_bd *bd;
   7052		u64 addr;
   7053		u32 len;
   7054
   7055		bd = q->kernel_address;
   7056		bd += ci;
   7057
   7058		len = le32_to_cpu(bd->len);
   7059		/* len 0 means uninitialized entry- break */
   7060		if (!len)
   7061			break;
   7062
   7063		addr = le64_to_cpu(bd->ptr);
   7064
   7065		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
   7066							stream, ci, addr, len);
   7067
   7068		/* get previous ci, wrap if needed */
   7069		ci = gaudi_queue_idx_dec(ci, queue_len);
   7070	}
   7071
   7072	hdev->asic_funcs->hw_queues_unlock(hdev);
   7073}
   7074
   7075/**
   7076 * print_qman_data_on_err - extract QMAN data on error
   7077 *
   7078 * @hdev: pointer to the habanalabs device structure
   7079 * @qid_base: first QID of the QMAN (out of 4 streams)
   7080 * @stream: the QMAN's stream
   7081 * @qman_base: base address of QMAN registers block
   7082 *
   7083 * This function attempt to exatract as much data as possible on QMAN error.
   7084 * On upper CP print the SW config stream data and last 8 PQEs.
   7085 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
   7086 */
   7087static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
   7088						u32 stream, u64 qman_base)
   7089{
   7090	u32 i;
   7091
   7092	if (stream != QMAN_STREAMS) {
   7093		gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
   7094									true);
   7095		return;
   7096	}
   7097
   7098	gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
   7099
   7100	for (i = 0; i < QMAN_STREAMS; i++)
   7101		gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
   7102									false);
   7103}
   7104
   7105static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
   7106					  const char *qm_name,
   7107					  u64 qman_base,
   7108					  u32 qid_base)
   7109{
   7110	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
   7111	u64 glbl_sts_addr, arb_err_addr;
   7112	char reg_desc[32];
   7113
   7114	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
   7115	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
   7116
   7117	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
   7118	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
   7119		glbl_sts_clr_val = 0;
   7120		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
   7121
   7122		if (!glbl_sts_val)
   7123			continue;
   7124
   7125		if (i == QMAN_STREAMS)
   7126			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
   7127		else
   7128			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
   7129
   7130		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
   7131			if (glbl_sts_val & BIT(j)) {
   7132				dev_err_ratelimited(hdev->dev,
   7133						"%s %s. err cause: %s\n",
   7134						qm_name, reg_desc,
   7135						gaudi_qman_error_cause[j]);
   7136				glbl_sts_clr_val |= BIT(j);
   7137			}
   7138		}
   7139
   7140		/* Write 1 clear errors */
   7141		if (!hdev->stop_on_err)
   7142			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
   7143		else
   7144			print_qman_data_on_err(hdev, qid_base, i, qman_base);
   7145	}
   7146
   7147	arb_err_val = RREG32(arb_err_addr);
   7148
   7149	if (!arb_err_val)
   7150		return;
   7151
   7152	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
   7153		if (arb_err_val & BIT(j)) {
   7154			dev_err_ratelimited(hdev->dev,
   7155					"%s ARB_ERR. err cause: %s\n",
   7156					qm_name,
   7157					gaudi_qman_arb_error_cause[j]);
   7158		}
   7159	}
   7160}
   7161
   7162static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
   7163		struct hl_eq_sm_sei_data *sei_data)
   7164{
   7165	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
   7166
   7167	/* Flip the bits as the enum is ordered in the opposite way */
   7168	index = (index ^ 0x3) & 0x3;
   7169
   7170	switch (sei_data->sei_cause) {
   7171	case SM_SEI_SO_OVERFLOW:
   7172		dev_err_ratelimited(hdev->dev,
   7173			"%s SEI Error: SOB Group %u overflow/underflow",
   7174			gaudi_sync_manager_names[index],
   7175			le32_to_cpu(sei_data->sei_log));
   7176		break;
   7177	case SM_SEI_LBW_4B_UNALIGNED:
   7178		dev_err_ratelimited(hdev->dev,
   7179			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
   7180			gaudi_sync_manager_names[index],
   7181			le32_to_cpu(sei_data->sei_log));
   7182		break;
   7183	case SM_SEI_AXI_RESPONSE_ERR:
   7184		dev_err_ratelimited(hdev->dev,
   7185			"%s SEI Error: AXI ID %u response error",
   7186			gaudi_sync_manager_names[index],
   7187			le32_to_cpu(sei_data->sei_log));
   7188		break;
   7189	default:
   7190		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
   7191				le32_to_cpu(sei_data->sei_log));
   7192		break;
   7193	}
   7194}
   7195
   7196static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
   7197		struct hl_eq_ecc_data *ecc_data)
   7198{
   7199	struct ecc_info_extract_params params;
   7200	u64 ecc_address = 0, ecc_syndrom = 0;
   7201	u8 index, memory_wrapper_idx = 0;
   7202	bool extract_info_from_fw;
   7203	int rc;
   7204
   7205	if (hdev->asic_prop.fw_security_enabled) {
   7206		extract_info_from_fw = true;
   7207		goto extract_ecc_info;
   7208	}
   7209
   7210	switch (event_type) {
   7211	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
   7212	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
   7213		extract_info_from_fw = true;
   7214		break;
   7215	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
   7216		index = event_type - GAUDI_EVENT_TPC0_SERR;
   7217		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
   7218		params.num_memories = 90;
   7219		params.derr = false;
   7220		extract_info_from_fw = false;
   7221		break;
   7222	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
   7223		index = event_type - GAUDI_EVENT_TPC0_DERR;
   7224		params.block_address =
   7225			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
   7226		params.num_memories = 90;
   7227		params.derr = true;
   7228		extract_info_from_fw = false;
   7229		break;
   7230	case GAUDI_EVENT_MME0_ACC_SERR:
   7231	case GAUDI_EVENT_MME1_ACC_SERR:
   7232	case GAUDI_EVENT_MME2_ACC_SERR:
   7233	case GAUDI_EVENT_MME3_ACC_SERR:
   7234		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
   7235		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
   7236		params.num_memories = 128;
   7237		params.derr = false;
   7238		extract_info_from_fw = false;
   7239		break;
   7240	case GAUDI_EVENT_MME0_ACC_DERR:
   7241	case GAUDI_EVENT_MME1_ACC_DERR:
   7242	case GAUDI_EVENT_MME2_ACC_DERR:
   7243	case GAUDI_EVENT_MME3_ACC_DERR:
   7244		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
   7245		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
   7246		params.num_memories = 128;
   7247		params.derr = true;
   7248		extract_info_from_fw = false;
   7249		break;
   7250	case GAUDI_EVENT_MME0_SBAB_SERR:
   7251	case GAUDI_EVENT_MME1_SBAB_SERR:
   7252	case GAUDI_EVENT_MME2_SBAB_SERR:
   7253	case GAUDI_EVENT_MME3_SBAB_SERR:
   7254		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
   7255		params.block_address =
   7256			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
   7257		params.num_memories = 33;
   7258		params.derr = false;
   7259		extract_info_from_fw = false;
   7260		break;
   7261	case GAUDI_EVENT_MME0_SBAB_DERR:
   7262	case GAUDI_EVENT_MME1_SBAB_DERR:
   7263	case GAUDI_EVENT_MME2_SBAB_DERR:
   7264	case GAUDI_EVENT_MME3_SBAB_DERR:
   7265		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
   7266		params.block_address =
   7267			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
   7268		params.num_memories = 33;
   7269		params.derr = true;
   7270		extract_info_from_fw = false;
   7271		break;
   7272	default:
   7273		return;
   7274	}
   7275
   7276extract_ecc_info:
   7277	if (extract_info_from_fw) {
   7278		ecc_address = le64_to_cpu(ecc_data->ecc_address);
   7279		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
   7280		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
   7281	} else {
   7282		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
   7283				&ecc_syndrom, &memory_wrapper_idx);
   7284		if (rc)
   7285			return;
   7286	}
   7287
   7288	dev_err(hdev->dev,
   7289		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
   7290		ecc_address, ecc_syndrom, memory_wrapper_idx);
   7291}
   7292
   7293static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
   7294{
   7295	u64 qman_base;
   7296	char desc[32];
   7297	u32 qid_base;
   7298	u8 index;
   7299
   7300	switch (event_type) {
   7301	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
   7302		index = event_type - GAUDI_EVENT_TPC0_QM;
   7303		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
   7304		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
   7305		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
   7306		break;
   7307	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
   7308		index = event_type - GAUDI_EVENT_MME0_QM;
   7309		qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
   7310		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
   7311		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
   7312		break;
   7313	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
   7314		index = event_type - GAUDI_EVENT_DMA0_QM;
   7315		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
   7316		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
   7317		if (index > 1)
   7318			qid_base++;
   7319		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
   7320		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
   7321		break;
   7322	case GAUDI_EVENT_NIC0_QM0:
   7323		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
   7324		qman_base = mmNIC0_QM0_BASE;
   7325		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
   7326		break;
   7327	case GAUDI_EVENT_NIC0_QM1:
   7328		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
   7329		qman_base = mmNIC0_QM1_BASE;
   7330		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
   7331		break;
   7332	case GAUDI_EVENT_NIC1_QM0:
   7333		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
   7334		qman_base = mmNIC1_QM0_BASE;
   7335		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
   7336		break;
   7337	case GAUDI_EVENT_NIC1_QM1:
   7338		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
   7339		qman_base = mmNIC1_QM1_BASE;
   7340		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
   7341		break;
   7342	case GAUDI_EVENT_NIC2_QM0:
   7343		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
   7344		qman_base = mmNIC2_QM0_BASE;
   7345		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
   7346		break;
   7347	case GAUDI_EVENT_NIC2_QM1:
   7348		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
   7349		qman_base = mmNIC2_QM1_BASE;
   7350		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
   7351		break;
   7352	case GAUDI_EVENT_NIC3_QM0:
   7353		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
   7354		qman_base = mmNIC3_QM0_BASE;
   7355		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
   7356		break;
   7357	case GAUDI_EVENT_NIC3_QM1:
   7358		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
   7359		qman_base = mmNIC3_QM1_BASE;
   7360		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
   7361		break;
   7362	case GAUDI_EVENT_NIC4_QM0:
   7363		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
   7364		qman_base = mmNIC4_QM0_BASE;
   7365		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
   7366		break;
   7367	case GAUDI_EVENT_NIC4_QM1:
   7368		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
   7369		qman_base = mmNIC4_QM1_BASE;
   7370		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
   7371		break;
   7372	default:
   7373		return;
   7374	}
   7375
   7376	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
   7377}
   7378
   7379static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
   7380					bool razwi)
   7381{
   7382	u32 engine_id_1, engine_id_2;
   7383	char desc[64] = "";
   7384	u64 razwi_addr = 0;
   7385	u8 razwi_type;
   7386	int rc;
   7387
   7388	/*
   7389	 * Init engine id by default as not valid and only if razwi initiated from engine with
   7390	 * engine id it will get valid value.
   7391	 * Init razwi type to default, will be changed only if razwi caused by page fault of
   7392	 * MMU access error
   7393	 */
   7394	engine_id_1 = U16_MAX;
   7395	engine_id_2 = U16_MAX;
   7396	razwi_type = U8_MAX;
   7397
   7398	gaudi_get_event_desc(event_type, desc, sizeof(desc));
   7399	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
   7400		event_type, desc);
   7401
   7402	if (razwi) {
   7403		gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
   7404		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
   7405
   7406		/* In case it's the first razwi, save its parameters*/
   7407		rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1);
   7408		if (!rc) {
   7409			hdev->last_error.razwi.timestamp = ktime_get();
   7410			hdev->last_error.razwi.addr = razwi_addr;
   7411			hdev->last_error.razwi.engine_id_1 = engine_id_1;
   7412			hdev->last_error.razwi.engine_id_2 = engine_id_2;
   7413			/*
   7414			 * If first engine id holds non valid value the razwi initiator
   7415			 * does not have engine id
   7416			 */
   7417			hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
   7418			hdev->last_error.razwi.type = razwi_type;
   7419
   7420		}
   7421	}
   7422}
   7423
   7424static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
   7425					struct cpucp_pkt_sync_err *sync_err)
   7426{
   7427	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
   7428
   7429	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
   7430			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
   7431}
   7432
   7433static void gaudi_print_fw_alive_info(struct hl_device *hdev,
   7434					struct hl_eq_fw_alive *fw_alive)
   7435{
   7436	dev_err(hdev->dev,
   7437		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
   7438		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
   7439		"Minor" : "Critical", fw_alive->process_id,
   7440		fw_alive->thread_id, fw_alive->uptime_seconds);
   7441}
   7442
   7443static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
   7444						void *data)
   7445{
   7446	char desc[64] = "", *type;
   7447	struct eq_nic_sei_event *eq_nic_sei = data;
   7448	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
   7449
   7450	switch (eq_nic_sei->axi_error_cause) {
   7451	case RXB:
   7452		type = "RXB";
   7453		break;
   7454	case RXE:
   7455		type = "RXE";
   7456		break;
   7457	case TXS:
   7458		type = "TXS";
   7459		break;
   7460	case TXE:
   7461		type = "TXE";
   7462		break;
   7463	case QPC_RESP:
   7464		type = "QPC_RESP";
   7465		break;
   7466	case NON_AXI_ERR:
   7467		type = "NON_AXI_ERR";
   7468		break;
   7469	case TMR:
   7470		type = "TMR";
   7471		break;
   7472	default:
   7473		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
   7474			eq_nic_sei->axi_error_cause);
   7475		type = "N/A";
   7476		break;
   7477	}
   7478
   7479	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
   7480			eq_nic_sei->id);
   7481	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
   7482		event_type, desc);
   7483}
   7484
   7485static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
   7486{
   7487	/* GAUDI doesn't support any reset except hard-reset */
   7488	return -EPERM;
   7489}
   7490
   7491static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
   7492			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
   7493{
   7494	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
   7495	int rc = 0;
   7496
   7497	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
   7498					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
   7499		if (!hbm_ecc_data) {
   7500			dev_err(hdev->dev, "No FW ECC data");
   7501			return 0;
   7502		}
   7503
   7504		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
   7505				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7506		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
   7507				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7508		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
   7509				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7510		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
   7511				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7512		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
   7513				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7514		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
   7515				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7516		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
   7517				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
   7518
   7519		dev_err(hdev->dev,
   7520			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
   7521			device, ch, wr_par, rd_par, ca_par, serr, derr);
   7522		dev_err(hdev->dev,
   7523			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
   7524			device, ch, hbm_ecc_data->first_addr, type,
   7525			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
   7526			hbm_ecc_data->dec_cnt);
   7527		return 0;
   7528	}
   7529
   7530	if (hdev->asic_prop.fw_security_enabled) {
   7531		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
   7532		return 0;
   7533	}
   7534
   7535	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
   7536	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
   7537		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
   7538		val = (val & 0xFF) | ((val >> 8) & 0xFF);
   7539		if (val) {
   7540			rc = -EIO;
   7541			dev_err(hdev->dev,
   7542				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
   7543				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
   7544				(val >> 2) & 0x1, (val >> 3) & 0x1,
   7545				(val >> 4) & 0x1);
   7546
   7547			val2 = RREG32(base + ch * 0x1000 + 0x060);
   7548			dev_err(hdev->dev,
   7549				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
   7550				device, ch * 2,
   7551				RREG32(base + ch * 0x1000 + 0x064),
   7552				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
   7553				(val2 & 0xFF0000) >> 16,
   7554				(val2 & 0xFF000000) >> 24);
   7555		}
   7556
   7557		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
   7558		val = (val & 0xFF) | ((val >> 8) & 0xFF);
   7559		if (val) {
   7560			rc = -EIO;
   7561			dev_err(hdev->dev,
   7562				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
   7563				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
   7564				(val >> 2) & 0x1, (val >> 3) & 0x1,
   7565				(val >> 4) & 0x1);
   7566
   7567			val2 = RREG32(base + ch * 0x1000 + 0x070);
   7568			dev_err(hdev->dev,
   7569				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
   7570				device, ch * 2 + 1,
   7571				RREG32(base + ch * 0x1000 + 0x074),
   7572				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
   7573				(val2 & 0xFF0000) >> 16,
   7574				(val2 & 0xFF000000) >> 24);
   7575		}
   7576
   7577		/* Clear interrupts */
   7578		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
   7579		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
   7580		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
   7581		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
   7582		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
   7583		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
   7584	}
   7585
   7586	val  = RREG32(base + 0x8F30);
   7587	val2 = RREG32(base + 0x8F34);
   7588	if (val | val2) {
   7589		rc = -EIO;
   7590		dev_err(hdev->dev,
   7591			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
   7592			device, val, val2);
   7593	}
   7594	val  = RREG32(base + 0x8F40);
   7595	val2 = RREG32(base + 0x8F44);
   7596	if (val | val2) {
   7597		rc = -EIO;
   7598		dev_err(hdev->dev,
   7599			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
   7600			device, val, val2);
   7601	}
   7602
   7603	return rc;
   7604}
   7605
   7606static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
   7607{
   7608	switch (hbm_event_type) {
   7609	case GAUDI_EVENT_HBM0_SPI_0:
   7610	case GAUDI_EVENT_HBM0_SPI_1:
   7611		return 0;
   7612	case GAUDI_EVENT_HBM1_SPI_0:
   7613	case GAUDI_EVENT_HBM1_SPI_1:
   7614		return 1;
   7615	case GAUDI_EVENT_HBM2_SPI_0:
   7616	case GAUDI_EVENT_HBM2_SPI_1:
   7617		return 2;
   7618	case GAUDI_EVENT_HBM3_SPI_0:
   7619	case GAUDI_EVENT_HBM3_SPI_1:
   7620		return 3;
   7621	default:
   7622		break;
   7623	}
   7624
   7625	/* Should never happen */
   7626	return 0;
   7627}
   7628
   7629static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
   7630					char *interrupt_name)
   7631{
   7632	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
   7633	bool soft_reset_required = false;
   7634
   7635	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
   7636				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
   7637
   7638	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
   7639		if (tpc_interrupts_cause & BIT(i)) {
   7640			dev_err_ratelimited(hdev->dev,
   7641					"TPC%d_%s interrupt cause: %s\n",
   7642					tpc_id, interrupt_name,
   7643					gaudi_tpc_interrupts_cause[i]);
   7644			/* If this is QM error, we need to soft-reset */
   7645			if (i == 15)
   7646				soft_reset_required = true;
   7647		}
   7648
   7649	/* Clear interrupts */
   7650	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
   7651
   7652	return soft_reset_required;
   7653}
   7654
   7655static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
   7656{
   7657	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
   7658}
   7659
   7660static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
   7661{
   7662	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
   7663}
   7664
   7665static void gaudi_print_clk_change_info(struct hl_device *hdev,
   7666					u16 event_type)
   7667{
   7668	ktime_t zero_time = ktime_set(0, 0);
   7669
   7670	mutex_lock(&hdev->clk_throttling.lock);
   7671
   7672	switch (event_type) {
   7673	case GAUDI_EVENT_FIX_POWER_ENV_S:
   7674		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
   7675		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
   7676		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
   7677		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
   7678		dev_info_ratelimited(hdev->dev,
   7679			"Clock throttling due to power consumption\n");
   7680		break;
   7681
   7682	case GAUDI_EVENT_FIX_POWER_ENV_E:
   7683		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
   7684		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
   7685		dev_info_ratelimited(hdev->dev,
   7686			"Power envelop is safe, back to optimal clock\n");
   7687		break;
   7688
   7689	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
   7690		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
   7691		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
   7692		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
   7693		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
   7694		dev_info_ratelimited(hdev->dev,
   7695			"Clock throttling due to overheating\n");
   7696		break;
   7697
   7698	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
   7699		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
   7700		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
   7701		dev_info_ratelimited(hdev->dev,
   7702			"Thermal envelop is safe, back to optimal clock\n");
   7703		break;
   7704
   7705	default:
   7706		dev_err(hdev->dev, "Received invalid clock change event %d\n",
   7707			event_type);
   7708		break;
   7709	}
   7710
   7711	mutex_unlock(&hdev->clk_throttling.lock);
   7712}
   7713
   7714static void gaudi_handle_eqe(struct hl_device *hdev,
   7715				struct hl_eq_entry *eq_entry)
   7716{
   7717	struct gaudi_device *gaudi = hdev->asic_specific;
   7718	u64 data = le64_to_cpu(eq_entry->data[0]);
   7719	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
   7720	u32 fw_fatal_err_flag = 0;
   7721	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
   7722			>> EQ_CTL_EVENT_TYPE_SHIFT);
   7723	bool reset_required;
   7724	u8 cause;
   7725	int rc;
   7726
   7727	if (event_type >= GAUDI_EVENT_SIZE) {
   7728		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
   7729				event_type, GAUDI_EVENT_SIZE - 1);
   7730		return;
   7731	}
   7732
   7733	gaudi->events_stat[event_type]++;
   7734	gaudi->events_stat_aggregate[event_type]++;
   7735
   7736	switch (event_type) {
   7737	case GAUDI_EVENT_PCIE_CORE_DERR:
   7738	case GAUDI_EVENT_PCIE_IF_DERR:
   7739	case GAUDI_EVENT_PCIE_PHY_DERR:
   7740	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
   7741	case GAUDI_EVENT_MME0_ACC_DERR:
   7742	case GAUDI_EVENT_MME0_SBAB_DERR:
   7743	case GAUDI_EVENT_MME1_ACC_DERR:
   7744	case GAUDI_EVENT_MME1_SBAB_DERR:
   7745	case GAUDI_EVENT_MME2_ACC_DERR:
   7746	case GAUDI_EVENT_MME2_SBAB_DERR:
   7747	case GAUDI_EVENT_MME3_ACC_DERR:
   7748	case GAUDI_EVENT_MME3_SBAB_DERR:
   7749	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
   7750		fallthrough;
   7751	case GAUDI_EVENT_CPU_IF_ECC_DERR:
   7752	case GAUDI_EVENT_PSOC_MEM_DERR:
   7753	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
   7754	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
   7755	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
   7756	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
   7757	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
   7758	case GAUDI_EVENT_MMU_DERR:
   7759	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
   7760		gaudi_print_irq_info(hdev, event_type, true);
   7761		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
   7762		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
   7763		goto reset_device;
   7764
   7765	case GAUDI_EVENT_GIC500:
   7766	case GAUDI_EVENT_AXI_ECC:
   7767	case GAUDI_EVENT_L2_RAM_ECC:
   7768	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
   7769		gaudi_print_irq_info(hdev, event_type, false);
   7770		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
   7771		goto reset_device;
   7772
   7773	case GAUDI_EVENT_HBM0_SPI_0:
   7774	case GAUDI_EVENT_HBM1_SPI_0:
   7775	case GAUDI_EVENT_HBM2_SPI_0:
   7776	case GAUDI_EVENT_HBM3_SPI_0:
   7777		gaudi_print_irq_info(hdev, event_type, false);
   7778		gaudi_hbm_read_interrupts(hdev,
   7779				gaudi_hbm_event_to_dev(event_type),
   7780				&eq_entry->hbm_ecc_data);
   7781		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
   7782		goto reset_device;
   7783
   7784	case GAUDI_EVENT_HBM0_SPI_1:
   7785	case GAUDI_EVENT_HBM1_SPI_1:
   7786	case GAUDI_EVENT_HBM2_SPI_1:
   7787	case GAUDI_EVENT_HBM3_SPI_1:
   7788		gaudi_print_irq_info(hdev, event_type, false);
   7789		gaudi_hbm_read_interrupts(hdev,
   7790				gaudi_hbm_event_to_dev(event_type),
   7791				&eq_entry->hbm_ecc_data);
   7792		hl_fw_unmask_irq(hdev, event_type);
   7793		break;
   7794
   7795	case GAUDI_EVENT_TPC0_DEC:
   7796	case GAUDI_EVENT_TPC1_DEC:
   7797	case GAUDI_EVENT_TPC2_DEC:
   7798	case GAUDI_EVENT_TPC3_DEC:
   7799	case GAUDI_EVENT_TPC4_DEC:
   7800	case GAUDI_EVENT_TPC5_DEC:
   7801	case GAUDI_EVENT_TPC6_DEC:
   7802	case GAUDI_EVENT_TPC7_DEC:
   7803		gaudi_print_irq_info(hdev, event_type, true);
   7804		reset_required = gaudi_tpc_read_interrupts(hdev,
   7805					tpc_dec_event_to_tpc_id(event_type),
   7806					"AXI_SLV_DEC_Error");
   7807		if (reset_required) {
   7808			dev_err(hdev->dev, "reset required due to %s\n",
   7809				gaudi_irq_map_table[event_type].name);
   7810
   7811			hl_device_reset(hdev, 0);
   7812		} else {
   7813			hl_fw_unmask_irq(hdev, event_type);
   7814		}
   7815		break;
   7816
   7817	case GAUDI_EVENT_TPC0_KRN_ERR:
   7818	case GAUDI_EVENT_TPC1_KRN_ERR:
   7819	case GAUDI_EVENT_TPC2_KRN_ERR:
   7820	case GAUDI_EVENT_TPC3_KRN_ERR:
   7821	case GAUDI_EVENT_TPC4_KRN_ERR:
   7822	case GAUDI_EVENT_TPC5_KRN_ERR:
   7823	case GAUDI_EVENT_TPC6_KRN_ERR:
   7824	case GAUDI_EVENT_TPC7_KRN_ERR:
   7825		gaudi_print_irq_info(hdev, event_type, true);
   7826		reset_required = gaudi_tpc_read_interrupts(hdev,
   7827					tpc_krn_event_to_tpc_id(event_type),
   7828					"KRN_ERR");
   7829		if (reset_required) {
   7830			dev_err(hdev->dev, "reset required due to %s\n",
   7831				gaudi_irq_map_table[event_type].name);
   7832
   7833			hl_device_reset(hdev, 0);
   7834		} else {
   7835			hl_fw_unmask_irq(hdev, event_type);
   7836		}
   7837		break;
   7838
   7839	case GAUDI_EVENT_PCIE_CORE_SERR:
   7840	case GAUDI_EVENT_PCIE_IF_SERR:
   7841	case GAUDI_EVENT_PCIE_PHY_SERR:
   7842	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
   7843	case GAUDI_EVENT_MME0_ACC_SERR:
   7844	case GAUDI_EVENT_MME0_SBAB_SERR:
   7845	case GAUDI_EVENT_MME1_ACC_SERR:
   7846	case GAUDI_EVENT_MME1_SBAB_SERR:
   7847	case GAUDI_EVENT_MME2_ACC_SERR:
   7848	case GAUDI_EVENT_MME2_SBAB_SERR:
   7849	case GAUDI_EVENT_MME3_ACC_SERR:
   7850	case GAUDI_EVENT_MME3_SBAB_SERR:
   7851	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
   7852	case GAUDI_EVENT_CPU_IF_ECC_SERR:
   7853	case GAUDI_EVENT_PSOC_MEM_SERR:
   7854	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
   7855	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
   7856	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
   7857	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
   7858	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
   7859		fallthrough;
   7860	case GAUDI_EVENT_MMU_SERR:
   7861		gaudi_print_irq_info(hdev, event_type, true);
   7862		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
   7863		hl_fw_unmask_irq(hdev, event_type);
   7864		break;
   7865
   7866	case GAUDI_EVENT_PCIE_DEC:
   7867	case GAUDI_EVENT_MME0_WBC_RSP:
   7868	case GAUDI_EVENT_MME0_SBAB0_RSP:
   7869	case GAUDI_EVENT_MME1_WBC_RSP:
   7870	case GAUDI_EVENT_MME1_SBAB0_RSP:
   7871	case GAUDI_EVENT_MME2_WBC_RSP:
   7872	case GAUDI_EVENT_MME2_SBAB0_RSP:
   7873	case GAUDI_EVENT_MME3_WBC_RSP:
   7874	case GAUDI_EVENT_MME3_SBAB0_RSP:
   7875	case GAUDI_EVENT_CPU_AXI_SPLITTER:
   7876	case GAUDI_EVENT_PSOC_AXI_DEC:
   7877	case GAUDI_EVENT_PSOC_PRSTN_FALL:
   7878	case GAUDI_EVENT_MMU_PAGE_FAULT:
   7879	case GAUDI_EVENT_MMU_WR_PERM:
   7880	case GAUDI_EVENT_RAZWI_OR_ADC:
   7881	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
   7882	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
   7883		fallthrough;
   7884	case GAUDI_EVENT_NIC0_QM0:
   7885	case GAUDI_EVENT_NIC0_QM1:
   7886	case GAUDI_EVENT_NIC1_QM0:
   7887	case GAUDI_EVENT_NIC1_QM1:
   7888	case GAUDI_EVENT_NIC2_QM0:
   7889	case GAUDI_EVENT_NIC2_QM1:
   7890	case GAUDI_EVENT_NIC3_QM0:
   7891	case GAUDI_EVENT_NIC3_QM1:
   7892	case GAUDI_EVENT_NIC4_QM0:
   7893	case GAUDI_EVENT_NIC4_QM1:
   7894	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
   7895		gaudi_print_irq_info(hdev, event_type, true);
   7896		gaudi_handle_qman_err(hdev, event_type);
   7897		hl_fw_unmask_irq(hdev, event_type);
   7898		break;
   7899
   7900	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
   7901		gaudi_print_irq_info(hdev, event_type, true);
   7902		gaudi_handle_qman_err(hdev, event_type);
   7903		hl_fw_unmask_irq(hdev, event_type);
   7904
   7905		/* In TPC QM event, notify on TPC assertion. While there isn't
   7906		 * a specific event for assertion yet, the FW generates QM event.
   7907		 * The SW upper layer will inspect an internal mapped area to indicate
   7908		 * if the event is a tpc assertion or tpc QM.
   7909		 */
   7910		hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT);
   7911		break;
   7912
   7913	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
   7914		gaudi_print_irq_info(hdev, event_type, true);
   7915		goto reset_device;
   7916
   7917	case GAUDI_EVENT_TPC0_BMON_SPMU:
   7918	case GAUDI_EVENT_TPC1_BMON_SPMU:
   7919	case GAUDI_EVENT_TPC2_BMON_SPMU:
   7920	case GAUDI_EVENT_TPC3_BMON_SPMU:
   7921	case GAUDI_EVENT_TPC4_BMON_SPMU:
   7922	case GAUDI_EVENT_TPC5_BMON_SPMU:
   7923	case GAUDI_EVENT_TPC6_BMON_SPMU:
   7924	case GAUDI_EVENT_TPC7_BMON_SPMU:
   7925	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
   7926		gaudi_print_irq_info(hdev, event_type, false);
   7927		hl_fw_unmask_irq(hdev, event_type);
   7928		break;
   7929
   7930	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
   7931		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
   7932		hl_fw_unmask_irq(hdev, event_type);
   7933		break;
   7934
   7935	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
   7936		gaudi_print_irq_info(hdev, event_type, false);
   7937		gaudi_print_sm_sei_info(hdev, event_type,
   7938					&eq_entry->sm_sei_data);
   7939		rc = hl_state_dump(hdev);
   7940		if (rc)
   7941			dev_err(hdev->dev,
   7942				"Error during system state dump %d\n", rc);
   7943		hl_fw_unmask_irq(hdev, event_type);
   7944		break;
   7945
   7946	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
   7947		break;
   7948
   7949	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
   7950		gaudi_print_clk_change_info(hdev, event_type);
   7951		hl_fw_unmask_irq(hdev, event_type);
   7952		break;
   7953
   7954	case GAUDI_EVENT_PSOC_GPIO_U16_0:
   7955		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
   7956		dev_err(hdev->dev,
   7957			"Received high temp H/W interrupt %d (cause %d)\n",
   7958			event_type, cause);
   7959		break;
   7960
   7961	case GAUDI_EVENT_DEV_RESET_REQ:
   7962		gaudi_print_irq_info(hdev, event_type, false);
   7963		goto reset_device;
   7964
   7965	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
   7966		gaudi_print_irq_info(hdev, event_type, false);
   7967		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
   7968		goto reset_device;
   7969
   7970	case GAUDI_EVENT_FW_ALIVE_S:
   7971		gaudi_print_irq_info(hdev, event_type, false);
   7972		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
   7973		goto reset_device;
   7974
   7975	default:
   7976		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
   7977				event_type);
   7978		break;
   7979	}
   7980
   7981	return;
   7982
   7983reset_device:
   7984	if (hdev->asic_prop.fw_security_enabled)
   7985		hl_device_reset(hdev, HL_DRV_RESET_HARD
   7986					| HL_DRV_RESET_BYPASS_REQ_TO_FW
   7987					| fw_fatal_err_flag);
   7988	else if (hdev->hard_reset_on_fw_events)
   7989		hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag);
   7990	else
   7991		hl_fw_unmask_irq(hdev, event_type);
   7992}
   7993
   7994static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
   7995					u32 *size)
   7996{
   7997	struct gaudi_device *gaudi = hdev->asic_specific;
   7998
   7999	if (aggregate) {
   8000		*size = (u32) sizeof(gaudi->events_stat_aggregate);
   8001		return gaudi->events_stat_aggregate;
   8002	}
   8003
   8004	*size = (u32) sizeof(gaudi->events_stat);
   8005	return gaudi->events_stat;
   8006}
   8007
   8008static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
   8009					u32 flags)
   8010{
   8011	struct gaudi_device *gaudi = hdev->asic_specific;
   8012	u32 status, timeout_usec;
   8013	int rc;
   8014
   8015	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
   8016		hdev->reset_info.hard_reset_pending)
   8017		return 0;
   8018
   8019	if (hdev->pldm)
   8020		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
   8021	else
   8022		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
   8023
   8024	/* L0 & L1 invalidation */
   8025	WREG32(mmSTLB_INV_PS, 3);
   8026	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
   8027	WREG32(mmSTLB_INV_PS, 2);
   8028
   8029	rc = hl_poll_timeout(
   8030		hdev,
   8031		mmSTLB_INV_PS,
   8032		status,
   8033		!status,
   8034		1000,
   8035		timeout_usec);
   8036
   8037	WREG32(mmSTLB_INV_SET, 0);
   8038
   8039	return rc;
   8040}
   8041
   8042static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
   8043						bool is_hard, u32 flags,
   8044						u32 asid, u64 va, u64 size)
   8045{
   8046	/* Treat as invalidate all because there is no range invalidation
   8047	 * in Gaudi
   8048	 */
   8049	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
   8050}
   8051
   8052static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
   8053					u32 asid, u64 phys_addr)
   8054{
   8055	u32 status, timeout_usec;
   8056	int rc;
   8057
   8058	if (hdev->pldm)
   8059		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
   8060	else
   8061		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
   8062
   8063	WREG32(MMU_ASID, asid);
   8064	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
   8065	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
   8066	WREG32(MMU_BUSY, 0x80000000);
   8067
   8068	rc = hl_poll_timeout(
   8069		hdev,
   8070		MMU_BUSY,
   8071		status,
   8072		!(status & 0x80000000),
   8073		1000,
   8074		timeout_usec);
   8075
   8076	if (rc) {
   8077		dev_err(hdev->dev,
   8078			"Timeout during MMU hop0 config of asid %d\n", asid);
   8079		return rc;
   8080	}
   8081
   8082	return 0;
   8083}
   8084
   8085static int gaudi_send_heartbeat(struct hl_device *hdev)
   8086{
   8087	struct gaudi_device *gaudi = hdev->asic_specific;
   8088
   8089	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
   8090		return 0;
   8091
   8092	return hl_fw_send_heartbeat(hdev);
   8093}
   8094
   8095static int gaudi_cpucp_info_get(struct hl_device *hdev)
   8096{
   8097	struct gaudi_device *gaudi = hdev->asic_specific;
   8098	struct asic_fixed_properties *prop = &hdev->asic_prop;
   8099	int rc;
   8100
   8101	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
   8102		return 0;
   8103
   8104	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
   8105					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
   8106					mmCPU_BOOT_ERR1);
   8107	if (rc)
   8108		return rc;
   8109
   8110	if (!strlen(prop->cpucp_info.card_name))
   8111		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
   8112				CARD_NAME_MAX_LEN);
   8113
   8114	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
   8115
   8116	set_default_power_values(hdev);
   8117
   8118	return 0;
   8119}
   8120
   8121static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
   8122					u8 mask_len, struct seq_file *s)
   8123{
   8124	struct gaudi_device *gaudi = hdev->asic_specific;
   8125	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
   8126	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
   8127	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
   8128	unsigned long *mask = (unsigned long *)mask_arr;
   8129	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
   8130	bool is_idle = true, is_eng_idle, is_slave;
   8131	u64 offset;
   8132	int i, dma_id, port;
   8133
   8134	if (s)
   8135		seq_puts(s,
   8136			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
   8137			"---  -------  ------------  ----------  -------------\n");
   8138
   8139	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
   8140		dma_id = gaudi_dma_assignment[i];
   8141		offset = dma_id * DMA_QMAN_OFFSET;
   8142
   8143		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
   8144		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
   8145		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
   8146		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
   8147				IS_DMA_IDLE(dma_core_sts0);
   8148		is_idle &= is_eng_idle;
   8149
   8150		if (mask && !is_eng_idle)
   8151			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
   8152		if (s)
   8153			seq_printf(s, fmt, dma_id,
   8154				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
   8155				qm_cgm_sts, dma_core_sts0);
   8156	}
   8157
   8158	if (s)
   8159		seq_puts(s,
   8160			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
   8161			"---  -------  ------------  ----------  ----------\n");
   8162
   8163	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
   8164		offset = i * TPC_QMAN_OFFSET;
   8165		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
   8166		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
   8167		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
   8168		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
   8169				IS_TPC_IDLE(tpc_cfg_sts);
   8170		is_idle &= is_eng_idle;
   8171
   8172		if (mask && !is_eng_idle)
   8173			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
   8174		if (s)
   8175			seq_printf(s, fmt, i,
   8176				is_eng_idle ? "Y" : "N",
   8177				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
   8178	}
   8179
   8180	if (s)
   8181		seq_puts(s,
   8182			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
   8183			"---  -------  ------------  ----------  -----------\n");
   8184
   8185	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
   8186		offset = i * MME_QMAN_OFFSET;
   8187		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
   8188		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
   8189
   8190		/* MME 1 & 3 are slaves, no need to check their QMANs */
   8191		is_slave = i % 2;
   8192		if (!is_slave) {
   8193			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
   8194			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
   8195			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
   8196		}
   8197
   8198		is_idle &= is_eng_idle;
   8199
   8200		if (mask && !is_eng_idle)
   8201			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
   8202		if (s) {
   8203			if (!is_slave)
   8204				seq_printf(s, fmt, i,
   8205					is_eng_idle ? "Y" : "N",
   8206					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
   8207			else
   8208				seq_printf(s, mme_slave_fmt, i,
   8209					is_eng_idle ? "Y" : "N", "-",
   8210					"-", mme_arch_sts);
   8211		}
   8212	}
   8213
   8214	if (s)
   8215		seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
   8216				"---  -------  ------------  ----------\n");
   8217
   8218	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
   8219		offset = i * NIC_MACRO_QMAN_OFFSET;
   8220		port = 2 * i;
   8221		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
   8222			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
   8223			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
   8224			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
   8225			is_idle &= is_eng_idle;
   8226
   8227			if (mask && !is_eng_idle)
   8228				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
   8229			if (s)
   8230				seq_printf(s, nic_fmt, port,
   8231						is_eng_idle ? "Y" : "N",
   8232						qm_glbl_sts0, qm_cgm_sts);
   8233		}
   8234
   8235		port = 2 * i + 1;
   8236		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
   8237			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
   8238			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
   8239			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
   8240			is_idle &= is_eng_idle;
   8241
   8242			if (mask && !is_eng_idle)
   8243				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
   8244			if (s)
   8245				seq_printf(s, nic_fmt, port,
   8246						is_eng_idle ? "Y" : "N",
   8247						qm_glbl_sts0, qm_cgm_sts);
   8248		}
   8249	}
   8250
   8251	if (s)
   8252		seq_puts(s, "\n");
   8253
   8254	return is_idle;
   8255}
   8256
   8257static void gaudi_hw_queues_lock(struct hl_device *hdev)
   8258	__acquires(&gaudi->hw_queues_lock)
   8259{
   8260	struct gaudi_device *gaudi = hdev->asic_specific;
   8261
   8262	spin_lock(&gaudi->hw_queues_lock);
   8263}
   8264
   8265static void gaudi_hw_queues_unlock(struct hl_device *hdev)
   8266	__releases(&gaudi->hw_queues_lock)
   8267{
   8268	struct gaudi_device *gaudi = hdev->asic_specific;
   8269
   8270	spin_unlock(&gaudi->hw_queues_lock);
   8271}
   8272
   8273static u32 gaudi_get_pci_id(struct hl_device *hdev)
   8274{
   8275	return hdev->pdev->device;
   8276}
   8277
   8278static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
   8279				size_t max_size)
   8280{
   8281	struct gaudi_device *gaudi = hdev->asic_specific;
   8282
   8283	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
   8284		return 0;
   8285
   8286	return hl_fw_get_eeprom_data(hdev, data, max_size);
   8287}
   8288
   8289static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
   8290{
   8291	struct gaudi_device *gaudi = hdev->asic_specific;
   8292
   8293	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
   8294		return 0;
   8295
   8296	return hl_fw_get_monitor_dump(hdev, data);
   8297}
   8298
   8299/*
   8300 * this function should be used only during initialization and/or after reset,
   8301 * when there are no active users.
   8302 */
   8303static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
   8304{
   8305	u64 kernel_timeout;
   8306	u32 status, offset;
   8307	int rc;
   8308
   8309	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
   8310
   8311	if (hdev->pldm)
   8312		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
   8313	else
   8314		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
   8315
   8316	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
   8317			lower_32_bits(tpc_kernel));
   8318	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
   8319			upper_32_bits(tpc_kernel));
   8320
   8321	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
   8322			lower_32_bits(tpc_kernel));
   8323	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
   8324			upper_32_bits(tpc_kernel));
   8325	/* set a valid LUT pointer, content is of no significance */
   8326	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
   8327			lower_32_bits(tpc_kernel));
   8328	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
   8329			upper_32_bits(tpc_kernel));
   8330
   8331	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
   8332			lower_32_bits(CFG_BASE +
   8333				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
   8334
   8335	WREG32(mmTPC0_CFG_TPC_CMD + offset,
   8336			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
   8337			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
   8338	/* wait a bit for the engine to start executing */
   8339	usleep_range(1000, 1500);
   8340
   8341	/* wait until engine has finished executing */
   8342	rc = hl_poll_timeout(
   8343		hdev,
   8344		mmTPC0_CFG_STATUS + offset,
   8345		status,
   8346		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
   8347				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
   8348		1000,
   8349		kernel_timeout);
   8350
   8351	if (rc) {
   8352		dev_err(hdev->dev,
   8353			"Timeout while waiting for TPC%d icache prefetch\n",
   8354			tpc_id);
   8355		return -EIO;
   8356	}
   8357
   8358	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
   8359			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
   8360
   8361	/* wait a bit for the engine to start executing */
   8362	usleep_range(1000, 1500);
   8363
   8364	/* wait until engine has finished executing */
   8365	rc = hl_poll_timeout(
   8366		hdev,
   8367		mmTPC0_CFG_STATUS + offset,
   8368		status,
   8369		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
   8370				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
   8371		1000,
   8372		kernel_timeout);
   8373
   8374	if (rc) {
   8375		dev_err(hdev->dev,
   8376			"Timeout while waiting for TPC%d vector pipe\n",
   8377			tpc_id);
   8378		return -EIO;
   8379	}
   8380
   8381	rc = hl_poll_timeout(
   8382		hdev,
   8383		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
   8384		status,
   8385		(status == 0),
   8386		1000,
   8387		kernel_timeout);
   8388
   8389	if (rc) {
   8390		dev_err(hdev->dev,
   8391			"Timeout while waiting for TPC%d kernel to execute\n",
   8392			tpc_id);
   8393		return -EIO;
   8394	}
   8395
   8396	return 0;
   8397}
   8398
   8399static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
   8400		struct hl_ctx *ctx)
   8401{
   8402	struct gaudi_device *gaudi = hdev->asic_specific;
   8403	int min_alloc_order, rc, collective_cb_size;
   8404
   8405	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
   8406		return 0;
   8407
   8408	hdev->internal_cb_pool_virt_addr =
   8409			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
   8410					HOST_SPACE_INTERNAL_CB_SZ,
   8411					&hdev->internal_cb_pool_dma_addr,
   8412					GFP_KERNEL | __GFP_ZERO);
   8413
   8414	if (!hdev->internal_cb_pool_virt_addr)
   8415		return -ENOMEM;
   8416
   8417	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
   8418			sizeof(struct packet_fence);
   8419	min_alloc_order = ilog2(collective_cb_size);
   8420
   8421	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
   8422	if (!hdev->internal_cb_pool) {
   8423		dev_err(hdev->dev,
   8424			"Failed to create internal CB pool\n");
   8425		rc = -ENOMEM;
   8426		goto free_internal_cb_pool;
   8427	}
   8428
   8429	rc = gen_pool_add(hdev->internal_cb_pool,
   8430				(uintptr_t) hdev->internal_cb_pool_virt_addr,
   8431				HOST_SPACE_INTERNAL_CB_SZ, -1);
   8432	if (rc) {
   8433		dev_err(hdev->dev,
   8434			"Failed to add memory to internal CB pool\n");
   8435		rc = -EFAULT;
   8436		goto destroy_internal_cb_pool;
   8437	}
   8438
   8439	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
   8440			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
   8441			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
   8442
   8443	if (!hdev->internal_cb_va_base) {
   8444		rc = -ENOMEM;
   8445		goto destroy_internal_cb_pool;
   8446	}
   8447
   8448	mutex_lock(&ctx->mmu_lock);
   8449	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
   8450			hdev->internal_cb_pool_dma_addr,
   8451			HOST_SPACE_INTERNAL_CB_SZ);
   8452
   8453	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
   8454	mutex_unlock(&ctx->mmu_lock);
   8455
   8456	if (rc)
   8457		goto unreserve_internal_cb_pool;
   8458
   8459	return 0;
   8460
   8461unreserve_internal_cb_pool:
   8462	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
   8463			HOST_SPACE_INTERNAL_CB_SZ);
   8464destroy_internal_cb_pool:
   8465	gen_pool_destroy(hdev->internal_cb_pool);
   8466free_internal_cb_pool:
   8467	hdev->asic_funcs->asic_dma_free_coherent(hdev,
   8468			HOST_SPACE_INTERNAL_CB_SZ,
   8469			hdev->internal_cb_pool_virt_addr,
   8470			hdev->internal_cb_pool_dma_addr);
   8471
   8472	return rc;
   8473}
   8474
   8475static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
   8476		struct hl_ctx *ctx)
   8477{
   8478	struct gaudi_device *gaudi = hdev->asic_specific;
   8479
   8480	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
   8481		return;
   8482
   8483	mutex_lock(&ctx->mmu_lock);
   8484	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
   8485			HOST_SPACE_INTERNAL_CB_SZ);
   8486	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
   8487			HOST_SPACE_INTERNAL_CB_SZ);
   8488	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
   8489	mutex_unlock(&ctx->mmu_lock);
   8490
   8491	gen_pool_destroy(hdev->internal_cb_pool);
   8492
   8493	hdev->asic_funcs->asic_dma_free_coherent(hdev,
   8494			HOST_SPACE_INTERNAL_CB_SZ,
   8495			hdev->internal_cb_pool_virt_addr,
   8496			hdev->internal_cb_pool_dma_addr);
   8497}
   8498
   8499static int gaudi_ctx_init(struct hl_ctx *ctx)
   8500{
   8501	int rc;
   8502
   8503	if (ctx->asid == HL_KERNEL_ASID_ID)
   8504		return 0;
   8505
   8506	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
   8507	if (rc)
   8508		return rc;
   8509
   8510	rc = gaudi_restore_user_registers(ctx->hdev);
   8511	if (rc)
   8512		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
   8513
   8514	return rc;
   8515}
   8516
   8517static void gaudi_ctx_fini(struct hl_ctx *ctx)
   8518{
   8519	if (ctx->asid == HL_KERNEL_ASID_ID)
   8520		return;
   8521
   8522	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
   8523}
   8524
   8525static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
   8526{
   8527	return gaudi_cq_assignment[cq_idx];
   8528}
   8529
   8530static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
   8531{
   8532	return sizeof(struct packet_msg_short) +
   8533			sizeof(struct packet_msg_prot) * 2;
   8534}
   8535
   8536static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
   8537{
   8538	return sizeof(struct packet_msg_short) * 4 +
   8539			sizeof(struct packet_fence) +
   8540			sizeof(struct packet_msg_prot) * 2;
   8541}
   8542
   8543static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
   8544{
   8545	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
   8546}
   8547
   8548static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
   8549				u32 size, bool eb)
   8550{
   8551	struct hl_cb *cb = (struct hl_cb *) data;
   8552	struct packet_msg_short *pkt;
   8553	u32 value, ctl, pkt_size = sizeof(*pkt);
   8554
   8555	pkt = cb->kernel_address + size;
   8556	memset(pkt, 0, pkt_size);
   8557
   8558	/* Inc by 1, Mode ADD */
   8559	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
   8560	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
   8561
   8562	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
   8563	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
   8564	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
   8565	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
   8566	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
   8567	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
   8568	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   8569
   8570	pkt->value = cpu_to_le32(value);
   8571	pkt->ctl = cpu_to_le32(ctl);
   8572
   8573	return size + pkt_size;
   8574}
   8575
   8576static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
   8577					u16 addr)
   8578{
   8579	u32 ctl, pkt_size = sizeof(*pkt);
   8580
   8581	memset(pkt, 0, pkt_size);
   8582
   8583	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
   8584	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
   8585	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
   8586	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
   8587	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
   8588	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
   8589
   8590	pkt->value = cpu_to_le32(value);
   8591	pkt->ctl = cpu_to_le32(ctl);
   8592
   8593	return pkt_size;
   8594}
   8595
   8596static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
   8597		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
   8598		u16 sob_val, u16 mon_id)
   8599{
   8600	u64 monitor_base;
   8601	u32 ctl, value, pkt_size = sizeof(*pkt);
   8602	u16 msg_addr_offset;
   8603	u8 mask;
   8604
   8605	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
   8606		dev_err(hdev->dev,
   8607			"sob_base %u (mask %#x) is not valid\n",
   8608			sob_base, sob_mask);
   8609		return 0;
   8610	}
   8611
   8612	/*
   8613	 * monitor_base should be the content of the base0 address registers,
   8614	 * so it will be added to the msg short offsets
   8615	 */
   8616	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
   8617
   8618	msg_addr_offset =
   8619		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
   8620				monitor_base;
   8621
   8622	memset(pkt, 0, pkt_size);
   8623
   8624	/* Monitor config packet: bind the monitor to a sync object */
   8625	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
   8626	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
   8627	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
   8628			0); /* GREATER OR EQUAL*/
   8629	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
   8630
   8631	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
   8632	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
   8633	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
   8634	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
   8635	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
   8636	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
   8637	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   8638
   8639	pkt->value = cpu_to_le32(value);
   8640	pkt->ctl = cpu_to_le32(ctl);
   8641
   8642	return pkt_size;
   8643}
   8644
   8645static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
   8646{
   8647	u32 ctl, cfg, pkt_size = sizeof(*pkt);
   8648
   8649	memset(pkt, 0, pkt_size);
   8650
   8651	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
   8652	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
   8653	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
   8654
   8655	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
   8656	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
   8657	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
   8658	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
   8659
   8660	pkt->cfg = cpu_to_le32(cfg);
   8661	pkt->ctl = cpu_to_le32(ctl);
   8662
   8663	return pkt_size;
   8664}
   8665
   8666static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
   8667{
   8668	u32 offset, nic_index;
   8669
   8670	switch (queue_id) {
   8671	case GAUDI_QUEUE_ID_DMA_0_0:
   8672		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
   8673		break;
   8674	case GAUDI_QUEUE_ID_DMA_0_1:
   8675		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
   8676		break;
   8677	case GAUDI_QUEUE_ID_DMA_0_2:
   8678		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
   8679		break;
   8680	case GAUDI_QUEUE_ID_DMA_0_3:
   8681		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
   8682		break;
   8683	case GAUDI_QUEUE_ID_DMA_1_0:
   8684		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
   8685		break;
   8686	case GAUDI_QUEUE_ID_DMA_1_1:
   8687		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
   8688		break;
   8689	case GAUDI_QUEUE_ID_DMA_1_2:
   8690		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
   8691		break;
   8692	case GAUDI_QUEUE_ID_DMA_1_3:
   8693		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
   8694		break;
   8695	case GAUDI_QUEUE_ID_DMA_5_0:
   8696		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
   8697		break;
   8698	case GAUDI_QUEUE_ID_DMA_5_1:
   8699		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
   8700		break;
   8701	case GAUDI_QUEUE_ID_DMA_5_2:
   8702		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
   8703		break;
   8704	case GAUDI_QUEUE_ID_DMA_5_3:
   8705		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
   8706		break;
   8707	case GAUDI_QUEUE_ID_TPC_7_0:
   8708		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
   8709		break;
   8710	case GAUDI_QUEUE_ID_TPC_7_1:
   8711		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
   8712		break;
   8713	case GAUDI_QUEUE_ID_TPC_7_2:
   8714		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
   8715		break;
   8716	case GAUDI_QUEUE_ID_TPC_7_3:
   8717		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
   8718		break;
   8719	case GAUDI_QUEUE_ID_NIC_0_0:
   8720	case GAUDI_QUEUE_ID_NIC_1_0:
   8721	case GAUDI_QUEUE_ID_NIC_2_0:
   8722	case GAUDI_QUEUE_ID_NIC_3_0:
   8723	case GAUDI_QUEUE_ID_NIC_4_0:
   8724	case GAUDI_QUEUE_ID_NIC_5_0:
   8725	case GAUDI_QUEUE_ID_NIC_6_0:
   8726	case GAUDI_QUEUE_ID_NIC_7_0:
   8727	case GAUDI_QUEUE_ID_NIC_8_0:
   8728	case GAUDI_QUEUE_ID_NIC_9_0:
   8729		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
   8730		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
   8731				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
   8732				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
   8733		break;
   8734	case GAUDI_QUEUE_ID_NIC_0_1:
   8735	case GAUDI_QUEUE_ID_NIC_1_1:
   8736	case GAUDI_QUEUE_ID_NIC_2_1:
   8737	case GAUDI_QUEUE_ID_NIC_3_1:
   8738	case GAUDI_QUEUE_ID_NIC_4_1:
   8739	case GAUDI_QUEUE_ID_NIC_5_1:
   8740	case GAUDI_QUEUE_ID_NIC_6_1:
   8741	case GAUDI_QUEUE_ID_NIC_7_1:
   8742	case GAUDI_QUEUE_ID_NIC_8_1:
   8743	case GAUDI_QUEUE_ID_NIC_9_1:
   8744		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
   8745		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
   8746				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
   8747				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
   8748		break;
   8749	case GAUDI_QUEUE_ID_NIC_0_2:
   8750	case GAUDI_QUEUE_ID_NIC_1_2:
   8751	case GAUDI_QUEUE_ID_NIC_2_2:
   8752	case GAUDI_QUEUE_ID_NIC_3_2:
   8753	case GAUDI_QUEUE_ID_NIC_4_2:
   8754	case GAUDI_QUEUE_ID_NIC_5_2:
   8755	case GAUDI_QUEUE_ID_NIC_6_2:
   8756	case GAUDI_QUEUE_ID_NIC_7_2:
   8757	case GAUDI_QUEUE_ID_NIC_8_2:
   8758	case GAUDI_QUEUE_ID_NIC_9_2:
   8759		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
   8760		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
   8761				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
   8762				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
   8763		break;
   8764	case GAUDI_QUEUE_ID_NIC_0_3:
   8765	case GAUDI_QUEUE_ID_NIC_1_3:
   8766	case GAUDI_QUEUE_ID_NIC_2_3:
   8767	case GAUDI_QUEUE_ID_NIC_3_3:
   8768	case GAUDI_QUEUE_ID_NIC_4_3:
   8769	case GAUDI_QUEUE_ID_NIC_5_3:
   8770	case GAUDI_QUEUE_ID_NIC_6_3:
   8771	case GAUDI_QUEUE_ID_NIC_7_3:
   8772	case GAUDI_QUEUE_ID_NIC_8_3:
   8773	case GAUDI_QUEUE_ID_NIC_9_3:
   8774		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
   8775		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
   8776				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
   8777				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
   8778		break;
   8779	default:
   8780		return -EINVAL;
   8781	}
   8782
   8783	*addr = CFG_BASE + offset;
   8784
   8785	return 0;
   8786}
   8787
   8788static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
   8789{
   8790	u64 monitor_base;
   8791	u32 size = 0;
   8792	u16 msg_addr_offset;
   8793
   8794	/*
   8795	 * monitor_base should be the content of the base0 address registers,
   8796	 * so it will be added to the msg short offsets
   8797	 */
   8798	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
   8799
   8800	/* First monitor config packet: low address of the sync */
   8801	msg_addr_offset =
   8802		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
   8803				monitor_base;
   8804
   8805	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
   8806					msg_addr_offset);
   8807
   8808	/* Second monitor config packet: high address of the sync */
   8809	msg_addr_offset =
   8810		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
   8811				monitor_base;
   8812
   8813	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
   8814					msg_addr_offset);
   8815
   8816	/*
   8817	 * Third monitor config packet: the payload, i.e. what to write when the
   8818	 * sync triggers
   8819	 */
   8820	msg_addr_offset =
   8821		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
   8822				monitor_base;
   8823
   8824	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
   8825
   8826	return size;
   8827}
   8828
   8829static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
   8830				struct hl_gen_wait_properties *prop)
   8831{
   8832	struct hl_cb *cb = (struct hl_cb *) prop->data;
   8833	void *buf = cb->kernel_address;
   8834	u64 fence_addr = 0;
   8835	u32 size = prop->size;
   8836
   8837	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
   8838		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
   8839				prop->q_idx);
   8840		return 0;
   8841	}
   8842
   8843	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
   8844	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
   8845			prop->sob_mask, prop->sob_val, prop->mon_id);
   8846	size += gaudi_add_fence_pkt(buf + size);
   8847
   8848	return size;
   8849}
   8850
   8851static void gaudi_reset_sob(struct hl_device *hdev, void *data)
   8852{
   8853	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
   8854
   8855	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
   8856		hw_sob->sob_id);
   8857
   8858	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
   8859			hw_sob->sob_id * 4, 0);
   8860
   8861	kref_init(&hw_sob->kref);
   8862}
   8863
   8864static u64 gaudi_get_device_time(struct hl_device *hdev)
   8865{
   8866	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
   8867
   8868	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
   8869}
   8870
   8871static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
   8872				u32 *block_size, u32 *block_id)
   8873{
   8874	return -EPERM;
   8875}
   8876
   8877static int gaudi_block_mmap(struct hl_device *hdev,
   8878				struct vm_area_struct *vma,
   8879				u32 block_id, u32 block_size)
   8880{
   8881	return -EPERM;
   8882}
   8883
   8884static void gaudi_enable_events_from_fw(struct hl_device *hdev)
   8885{
   8886	struct cpu_dyn_regs *dyn_regs =
   8887			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
   8888	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
   8889			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
   8890			le32_to_cpu(dyn_regs->gic_host_ints_irq);
   8891
   8892	WREG32(irq_handler_offset,
   8893		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
   8894}
   8895
   8896static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
   8897{
   8898	switch (pll_idx) {
   8899	case HL_GAUDI_CPU_PLL: return CPU_PLL;
   8900	case HL_GAUDI_PCI_PLL: return PCI_PLL;
   8901	case HL_GAUDI_NIC_PLL: return NIC_PLL;
   8902	case HL_GAUDI_DMA_PLL: return DMA_PLL;
   8903	case HL_GAUDI_MESH_PLL: return MESH_PLL;
   8904	case HL_GAUDI_MME_PLL: return MME_PLL;
   8905	case HL_GAUDI_TPC_PLL: return TPC_PLL;
   8906	case HL_GAUDI_IF_PLL: return IF_PLL;
   8907	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
   8908	case HL_GAUDI_HBM_PLL: return HBM_PLL;
   8909	default: return -EINVAL;
   8910	}
   8911}
   8912
   8913static int gaudi_add_sync_to_engine_map_entry(
   8914	struct hl_sync_to_engine_map *map, u32 reg_value,
   8915	enum hl_sync_engine_type engine_type, u32 engine_id)
   8916{
   8917	struct hl_sync_to_engine_map_entry *entry;
   8918
   8919	/* Reg value represents a partial address of sync object,
   8920	 * it is used as unique identifier. For this we need to
   8921	 * clear the cutoff cfg base bits from the value.
   8922	 */
   8923	if (reg_value == 0 || reg_value == 0xffffffff)
   8924		return 0;
   8925	reg_value -= lower_32_bits(CFG_BASE);
   8926
   8927	/* create a new hash entry */
   8928	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
   8929	if (!entry)
   8930		return -ENOMEM;
   8931	entry->engine_type = engine_type;
   8932	entry->engine_id = engine_id;
   8933	entry->sync_id = reg_value;
   8934	hash_add(map->tb, &entry->node, reg_value);
   8935
   8936	return 0;
   8937}
   8938
   8939static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
   8940				struct hl_sync_to_engine_map *map)
   8941{
   8942	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
   8943	int i, j, rc;
   8944	u32 reg_value;
   8945
   8946	/* Iterate over TPC engines */
   8947	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
   8948
   8949		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
   8950					sds->props[SP_NEXT_TPC] * i);
   8951
   8952		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
   8953							ENGINE_TPC, i);
   8954		if (rc)
   8955			goto free_sync_to_engine_map;
   8956	}
   8957
   8958	/* Iterate over MME engines */
   8959	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
   8960		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
   8961
   8962			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
   8963						sds->props[SP_NEXT_MME] * i +
   8964						j * sizeof(u32));
   8965
   8966			rc = gaudi_add_sync_to_engine_map_entry(
   8967				map, reg_value, ENGINE_MME,
   8968				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
   8969			if (rc)
   8970				goto free_sync_to_engine_map;
   8971		}
   8972	}
   8973
   8974	/* Iterate over DMA engines */
   8975	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
   8976		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
   8977					sds->props[SP_DMA_QUEUES_OFFSET] * i);
   8978		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
   8979							ENGINE_DMA, i);
   8980		if (rc)
   8981			goto free_sync_to_engine_map;
   8982	}
   8983
   8984	return 0;
   8985
   8986free_sync_to_engine_map:
   8987	hl_state_dump_free_sync_to_engine_map(map);
   8988
   8989	return rc;
   8990}
   8991
   8992static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
   8993{
   8994	return FIELD_GET(
   8995		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
   8996		mon->status);
   8997}
   8998
   8999static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
   9000{
   9001	const size_t max_write = 10;
   9002	u32 gid, mask, sob;
   9003	int i, offset;
   9004
   9005	/* Sync object ID is calculated as follows:
   9006	 * (8 * group_id + cleared bits in mask)
   9007	 */
   9008	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
   9009			mon->arm_data);
   9010	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
   9011			mon->arm_data);
   9012
   9013	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
   9014		max_write; mask >>= 1, i++) {
   9015		if (!(mask & 1)) {
   9016			sob = gid * MONITOR_MAX_SOBS + i;
   9017
   9018			if (offset > 0)
   9019				offset += snprintf(sobs + offset, max_write,
   9020							", ");
   9021
   9022			offset += snprintf(sobs + offset, max_write, "%u", sob);
   9023		}
   9024	}
   9025}
   9026
   9027static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
   9028				struct hl_device *hdev,
   9029				struct hl_mon_state_dump *mon)
   9030{
   9031	const char *name;
   9032	char scratch_buf1[BIN_REG_STRING_SIZE],
   9033		scratch_buf2[BIN_REG_STRING_SIZE];
   9034	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
   9035
   9036	name = hl_state_dump_get_monitor_name(hdev, mon);
   9037	if (!name)
   9038		name = "";
   9039
   9040	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
   9041
   9042	return hl_snprintf_resize(
   9043		buf, size, offset,
   9044		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
   9045		mon->id, name,
   9046		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
   9047				mon->arm_data),
   9048		hl_format_as_binary(
   9049			scratch_buf1, sizeof(scratch_buf1),
   9050			FIELD_GET(
   9051				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
   9052				mon->arm_data)),
   9053		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
   9054				mon->arm_data),
   9055		mon->wr_data,
   9056		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
   9057		hl_format_as_binary(
   9058			scratch_buf2, sizeof(scratch_buf2),
   9059			FIELD_GET(
   9060				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
   9061				mon->status)),
   9062		monitored_sobs);
   9063}
   9064
   9065
   9066static int gaudi_print_fences_single_engine(
   9067	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
   9068	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
   9069	size_t *size, size_t *offset)
   9070{
   9071	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
   9072	int rc = -ENOMEM, i;
   9073	u32 *statuses, *fences;
   9074
   9075	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
   9076			sizeof(*statuses), GFP_KERNEL);
   9077	if (!statuses)
   9078		goto out;
   9079
   9080	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
   9081				sds->props[SP_ENGINE_NUM_OF_QUEUES],
   9082			 sizeof(*fences), GFP_KERNEL);
   9083	if (!fences)
   9084		goto free_status;
   9085
   9086	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
   9087		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
   9088
   9089	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
   9090				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
   9091		fences[i] = RREG32(base_offset + i * sizeof(u32));
   9092
   9093	/* The actual print */
   9094	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
   9095		u32 fence_id;
   9096		u64 fence_cnt, fence_rdata;
   9097		const char *engine_name;
   9098
   9099		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
   9100			statuses[i]))
   9101			continue;
   9102
   9103		fence_id =
   9104			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
   9105		fence_cnt = base_offset + CFG_BASE +
   9106			sizeof(u32) *
   9107			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
   9108		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
   9109				sds->props[SP_FENCE0_RDATA_OFFSET];
   9110		engine_name = hl_sync_engine_to_string(engine_type);
   9111
   9112		rc = hl_snprintf_resize(
   9113			buf, size, offset,
   9114			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
   9115			engine_name, engine_id,
   9116			i, fence_id,
   9117			fence_cnt, engine_name, engine_id, fence_id, i,
   9118			fence_rdata, engine_name, engine_id, fence_id, i,
   9119			fences[fence_id],
   9120			statuses[i]);
   9121		if (rc)
   9122			goto free_fences;
   9123	}
   9124
   9125	rc = 0;
   9126
   9127free_fences:
   9128	kfree(fences);
   9129free_status:
   9130	kfree(statuses);
   9131out:
   9132	return rc;
   9133}
   9134
   9135
   9136static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
   9137	.monitor_valid = gaudi_monitor_valid,
   9138	.print_single_monitor = gaudi_print_single_monitor,
   9139	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
   9140	.print_fences_single_engine = gaudi_print_fences_single_engine,
   9141};
   9142
   9143static void gaudi_state_dump_init(struct hl_device *hdev)
   9144{
   9145	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
   9146	int i;
   9147
   9148	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
   9149		hash_add(sds->so_id_to_str_tb,
   9150			&gaudi_so_id_to_str[i].node,
   9151			gaudi_so_id_to_str[i].id);
   9152
   9153	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
   9154		hash_add(sds->monitor_id_to_str_tb,
   9155			&gaudi_monitor_id_to_str[i].node,
   9156			gaudi_monitor_id_to_str[i].id);
   9157
   9158	sds->props = gaudi_state_dump_specs_props;
   9159
   9160	sds->sync_namager_names = gaudi_sync_manager_names;
   9161
   9162	sds->funcs = gaudi_state_dump_funcs;
   9163}
   9164
   9165static u32 *gaudi_get_stream_master_qid_arr(void)
   9166{
   9167	return gaudi_stream_master;
   9168}
   9169
   9170static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info)
   9171{
   9172	/* set 0 since multiple pages are not supported */
   9173	info->page_order_bitmask = 0;
   9174}
   9175
   9176static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
   9177{
   9178	struct hl_device *hdev = dev_get_drvdata(dev);
   9179	struct cpucp_info *cpucp_info;
   9180
   9181	cpucp_info = &hdev->asic_prop.cpucp_info;
   9182
   9183	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
   9184}
   9185
   9186static DEVICE_ATTR_RO(infineon_ver);
   9187
   9188static struct attribute *gaudi_vrm_dev_attrs[] = {
   9189	&dev_attr_infineon_ver.attr,
   9190};
   9191
   9192static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
   9193					struct attribute_group *dev_vrm_attr_grp)
   9194{
   9195	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
   9196	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
   9197}
   9198
   9199static const struct hl_asic_funcs gaudi_funcs = {
   9200	.early_init = gaudi_early_init,
   9201	.early_fini = gaudi_early_fini,
   9202	.late_init = gaudi_late_init,
   9203	.late_fini = gaudi_late_fini,
   9204	.sw_init = gaudi_sw_init,
   9205	.sw_fini = gaudi_sw_fini,
   9206	.hw_init = gaudi_hw_init,
   9207	.hw_fini = gaudi_hw_fini,
   9208	.halt_engines = gaudi_halt_engines,
   9209	.suspend = gaudi_suspend,
   9210	.resume = gaudi_resume,
   9211	.mmap = gaudi_mmap,
   9212	.ring_doorbell = gaudi_ring_doorbell,
   9213	.pqe_write = gaudi_pqe_write,
   9214	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
   9215	.asic_dma_free_coherent = gaudi_dma_free_coherent,
   9216	.scrub_device_mem = gaudi_scrub_device_mem,
   9217	.scrub_device_dram = gaudi_scrub_device_dram,
   9218	.get_int_queue_base = gaudi_get_int_queue_base,
   9219	.test_queues = gaudi_test_queues,
   9220	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
   9221	.asic_dma_pool_free = gaudi_dma_pool_free,
   9222	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
   9223	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
   9224	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
   9225	.cs_parser = gaudi_cs_parser,
   9226	.asic_dma_map_sgtable = hl_dma_map_sgtable,
   9227	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
   9228	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
   9229	.update_eq_ci = gaudi_update_eq_ci,
   9230	.context_switch = gaudi_context_switch,
   9231	.restore_phase_topology = gaudi_restore_phase_topology,
   9232	.debugfs_read_dma = gaudi_debugfs_read_dma,
   9233	.add_device_attr = gaudi_add_device_attr,
   9234	.handle_eqe = gaudi_handle_eqe,
   9235	.get_events_stat = gaudi_get_events_stat,
   9236	.read_pte = gaudi_read_pte,
   9237	.write_pte = gaudi_write_pte,
   9238	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
   9239	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
   9240	.mmu_prefetch_cache_range = NULL,
   9241	.send_heartbeat = gaudi_send_heartbeat,
   9242	.debug_coresight = gaudi_debug_coresight,
   9243	.is_device_idle = gaudi_is_device_idle,
   9244	.non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
   9245	.hw_queues_lock = gaudi_hw_queues_lock,
   9246	.hw_queues_unlock = gaudi_hw_queues_unlock,
   9247	.get_pci_id = gaudi_get_pci_id,
   9248	.get_eeprom_data = gaudi_get_eeprom_data,
   9249	.get_monitor_dump = gaudi_get_monitor_dump,
   9250	.send_cpu_message = gaudi_send_cpu_message,
   9251	.pci_bars_map = gaudi_pci_bars_map,
   9252	.init_iatu = gaudi_init_iatu,
   9253	.rreg = hl_rreg,
   9254	.wreg = hl_wreg,
   9255	.halt_coresight = gaudi_halt_coresight,
   9256	.ctx_init = gaudi_ctx_init,
   9257	.ctx_fini = gaudi_ctx_fini,
   9258	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
   9259	.load_firmware_to_device = gaudi_load_firmware_to_device,
   9260	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
   9261	.get_signal_cb_size = gaudi_get_signal_cb_size,
   9262	.get_wait_cb_size = gaudi_get_wait_cb_size,
   9263	.gen_signal_cb = gaudi_gen_signal_cb,
   9264	.gen_wait_cb = gaudi_gen_wait_cb,
   9265	.reset_sob = gaudi_reset_sob,
   9266	.reset_sob_group = gaudi_reset_sob_group,
   9267	.get_device_time = gaudi_get_device_time,
   9268	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
   9269	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
   9270	.scramble_addr = hl_mmu_scramble_addr,
   9271	.descramble_addr = hl_mmu_descramble_addr,
   9272	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
   9273	.get_hw_block_id = gaudi_get_hw_block_id,
   9274	.hw_block_mmap = gaudi_block_mmap,
   9275	.enable_events_from_fw = gaudi_enable_events_from_fw,
   9276	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
   9277	.init_firmware_loader = gaudi_init_firmware_loader,
   9278	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
   9279	.state_dump_init = gaudi_state_dump_init,
   9280	.get_sob_addr = gaudi_get_sob_addr,
   9281	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
   9282	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
   9283	.is_valid_dram_page_size = NULL,
   9284	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
   9285	.get_valid_dram_page_orders = gaudi_get_valid_dram_page_orders,
   9286	.access_dev_mem = hl_access_dev_mem,
   9287	.set_dram_bar_base = gaudi_set_hbm_bar_base,
   9288};
   9289
   9290/**
   9291 * gaudi_set_asic_funcs - set GAUDI function pointers
   9292 *
   9293 * @hdev: pointer to hl_device structure
   9294 *
   9295 */
   9296void gaudi_set_asic_funcs(struct hl_device *hdev)
   9297{
   9298	hdev->asic_funcs = &gaudi_funcs;
   9299}