cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

goya.c (155241B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3/*
      4 * Copyright 2016-2022 HabanaLabs, Ltd.
      5 * All Rights Reserved.
      6 */
      7
      8#include "goyaP.h"
      9#include "../include/hw_ip/mmu/mmu_general.h"
     10#include "../include/hw_ip/mmu/mmu_v1_0.h"
     11#include "../include/goya/asic_reg/goya_masks.h"
     12#include "../include/goya/goya_reg_map.h"
     13
     14#include <linux/pci.h>
     15#include <linux/hwmon.h>
     16#include <linux/iommu.h>
     17#include <linux/seq_file.h>
     18
     19/*
     20 * GOYA security scheme:
     21 *
     22 * 1. Host is protected by:
     23 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
     24 *        - MMU
     25 *
     26 * 2. DRAM is protected by:
     27 *        - Range registers (protect the first 512MB)
     28 *        - MMU (isolation between users)
     29 *
     30 * 3. Configuration is protected by:
     31 *        - Range registers
     32 *        - Protection bits
     33 *
     34 * When MMU is disabled:
     35 *
     36 * QMAN DMA: PQ, CQ, CP, DMA are secured.
     37 * PQ, CB and the data are on the host.
     38 *
     39 * QMAN TPC/MME:
     40 * PQ, CQ and CP are not secured.
     41 * PQ, CB and the data are on the SRAM/DRAM.
     42 *
     43 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
     44 *     - checks DMA pointer
     45 *     - WREG, MSG_PROT are not allowed.
     46 *     - MSG_LONG/SHORT are allowed.
     47 *
     48 * A read/write transaction by the QMAN to a protected area will succeed if
     49 * and only if the QMAN's CP is secured and MSG_PROT is used
     50 *
     51 *
     52 * When MMU is enabled:
     53 *
     54 * QMAN DMA: PQ, CQ and CP are secured.
     55 * MMU is set to bypass on the Secure props register of the QMAN.
     56 * The reasons we don't enable MMU for PQ, CQ and CP are:
     57 *     - PQ entry is in kernel address space and the driver doesn't map it.
     58 *     - CP writes to MSIX register and to kernel address space (completion
     59 *       queue).
     60 *
     61 * DMA is not secured but because CP is secured, the driver still needs to parse
     62 * the CB, but doesn't need to check the DMA addresses.
     63 *
     64 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
     65 * the driver doesn't map memory in MMU.
     66 *
     67 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
     68 *
     69 * DMA RR does NOT protect host because DMA is not secured
     70 *
     71 */
     72
     73#define GOYA_BOOT_FIT_FILE	"habanalabs/goya/goya-boot-fit.itb"
     74#define GOYA_LINUX_FW_FILE	"habanalabs/goya/goya-fit.itb"
     75
     76#define GOYA_MMU_REGS_NUM		63
     77
     78#define GOYA_DMA_POOL_BLK_SIZE		0x100		/* 256 bytes */
     79
     80#define GOYA_RESET_TIMEOUT_MSEC		500		/* 500ms */
     81#define GOYA_PLDM_RESET_TIMEOUT_MSEC	20000		/* 20s */
     82#define GOYA_RESET_WAIT_MSEC		1		/* 1ms */
     83#define GOYA_CPU_RESET_WAIT_MSEC	100		/* 100ms */
     84#define GOYA_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
     85#define GOYA_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
     86#define GOYA_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
     87#define GOYA_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
     88#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
     89#define GOYA_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
     90#define GOYA_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
     91
     92#define GOYA_QMAN0_FENCE_VAL		0xD169B243
     93
     94#define GOYA_MAX_STRING_LEN		20
     95
     96#define GOYA_CB_POOL_CB_CNT		512
     97#define GOYA_CB_POOL_CB_SIZE		0x20000		/* 128KB */
     98
     99#define IS_QM_IDLE(engine, qm_glbl_sts0) \
    100	(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
    101#define IS_DMA_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(DMA, qm_glbl_sts0)
    102#define IS_TPC_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(TPC, qm_glbl_sts0)
    103#define IS_MME_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(MME, qm_glbl_sts0)
    104
    105#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
    106	(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
    107			engine##_CMDQ_IDLE_MASK)
    108#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
    109	IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
    110#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
    111	IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
    112
    113#define IS_DMA_IDLE(dma_core_sts0) \
    114	!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
    115
    116#define IS_TPC_IDLE(tpc_cfg_sts) \
    117	(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
    118
    119#define IS_MME_IDLE(mme_arch_sts) \
    120	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
    121
    122static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
    123		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
    124		"goya cq 4", "goya cpu eq"
    125};
    126
    127static u16 goya_packet_sizes[MAX_PACKET_ID] = {
    128	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
    129	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
    130	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
    131	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
    132	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
    133	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
    134	[PACKET_FENCE]		= sizeof(struct packet_fence),
    135	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
    136	[PACKET_NOP]		= sizeof(struct packet_nop),
    137	[PACKET_STOP]		= sizeof(struct packet_stop)
    138};
    139
    140static inline bool validate_packet_id(enum packet_id id)
    141{
    142	switch (id) {
    143	case PACKET_WREG_32:
    144	case PACKET_WREG_BULK:
    145	case PACKET_MSG_LONG:
    146	case PACKET_MSG_SHORT:
    147	case PACKET_CP_DMA:
    148	case PACKET_MSG_PROT:
    149	case PACKET_FENCE:
    150	case PACKET_LIN_DMA:
    151	case PACKET_NOP:
    152	case PACKET_STOP:
    153		return true;
    154	default:
    155		return false;
    156	}
    157}
    158
    159static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
    160	mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
    161	mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
    162	mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
    163	mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
    164	mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
    165	mmTPC0_QM_GLBL_SECURE_PROPS,
    166	mmTPC0_QM_GLBL_NON_SECURE_PROPS,
    167	mmTPC0_CMDQ_GLBL_SECURE_PROPS,
    168	mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
    169	mmTPC0_CFG_ARUSER,
    170	mmTPC0_CFG_AWUSER,
    171	mmTPC1_QM_GLBL_SECURE_PROPS,
    172	mmTPC1_QM_GLBL_NON_SECURE_PROPS,
    173	mmTPC1_CMDQ_GLBL_SECURE_PROPS,
    174	mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
    175	mmTPC1_CFG_ARUSER,
    176	mmTPC1_CFG_AWUSER,
    177	mmTPC2_QM_GLBL_SECURE_PROPS,
    178	mmTPC2_QM_GLBL_NON_SECURE_PROPS,
    179	mmTPC2_CMDQ_GLBL_SECURE_PROPS,
    180	mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
    181	mmTPC2_CFG_ARUSER,
    182	mmTPC2_CFG_AWUSER,
    183	mmTPC3_QM_GLBL_SECURE_PROPS,
    184	mmTPC3_QM_GLBL_NON_SECURE_PROPS,
    185	mmTPC3_CMDQ_GLBL_SECURE_PROPS,
    186	mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
    187	mmTPC3_CFG_ARUSER,
    188	mmTPC3_CFG_AWUSER,
    189	mmTPC4_QM_GLBL_SECURE_PROPS,
    190	mmTPC4_QM_GLBL_NON_SECURE_PROPS,
    191	mmTPC4_CMDQ_GLBL_SECURE_PROPS,
    192	mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
    193	mmTPC4_CFG_ARUSER,
    194	mmTPC4_CFG_AWUSER,
    195	mmTPC5_QM_GLBL_SECURE_PROPS,
    196	mmTPC5_QM_GLBL_NON_SECURE_PROPS,
    197	mmTPC5_CMDQ_GLBL_SECURE_PROPS,
    198	mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
    199	mmTPC5_CFG_ARUSER,
    200	mmTPC5_CFG_AWUSER,
    201	mmTPC6_QM_GLBL_SECURE_PROPS,
    202	mmTPC6_QM_GLBL_NON_SECURE_PROPS,
    203	mmTPC6_CMDQ_GLBL_SECURE_PROPS,
    204	mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
    205	mmTPC6_CFG_ARUSER,
    206	mmTPC6_CFG_AWUSER,
    207	mmTPC7_QM_GLBL_SECURE_PROPS,
    208	mmTPC7_QM_GLBL_NON_SECURE_PROPS,
    209	mmTPC7_CMDQ_GLBL_SECURE_PROPS,
    210	mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
    211	mmTPC7_CFG_ARUSER,
    212	mmTPC7_CFG_AWUSER,
    213	mmMME_QM_GLBL_SECURE_PROPS,
    214	mmMME_QM_GLBL_NON_SECURE_PROPS,
    215	mmMME_CMDQ_GLBL_SECURE_PROPS,
    216	mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
    217	mmMME_SBA_CONTROL_DATA,
    218	mmMME_SBB_CONTROL_DATA,
    219	mmMME_SBC_CONTROL_DATA,
    220	mmMME_WBC_CONTROL_DATA,
    221	mmPCIE_WRAP_PSOC_ARUSER,
    222	mmPCIE_WRAP_PSOC_AWUSER
    223};
    224
    225static u32 goya_all_events[] = {
    226	GOYA_ASYNC_EVENT_ID_PCIE_IF,
    227	GOYA_ASYNC_EVENT_ID_TPC0_ECC,
    228	GOYA_ASYNC_EVENT_ID_TPC1_ECC,
    229	GOYA_ASYNC_EVENT_ID_TPC2_ECC,
    230	GOYA_ASYNC_EVENT_ID_TPC3_ECC,
    231	GOYA_ASYNC_EVENT_ID_TPC4_ECC,
    232	GOYA_ASYNC_EVENT_ID_TPC5_ECC,
    233	GOYA_ASYNC_EVENT_ID_TPC6_ECC,
    234	GOYA_ASYNC_EVENT_ID_TPC7_ECC,
    235	GOYA_ASYNC_EVENT_ID_MME_ECC,
    236	GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
    237	GOYA_ASYNC_EVENT_ID_MMU_ECC,
    238	GOYA_ASYNC_EVENT_ID_DMA_MACRO,
    239	GOYA_ASYNC_EVENT_ID_DMA_ECC,
    240	GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
    241	GOYA_ASYNC_EVENT_ID_PSOC_MEM,
    242	GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
    243	GOYA_ASYNC_EVENT_ID_SRAM0,
    244	GOYA_ASYNC_EVENT_ID_SRAM1,
    245	GOYA_ASYNC_EVENT_ID_SRAM2,
    246	GOYA_ASYNC_EVENT_ID_SRAM3,
    247	GOYA_ASYNC_EVENT_ID_SRAM4,
    248	GOYA_ASYNC_EVENT_ID_SRAM5,
    249	GOYA_ASYNC_EVENT_ID_SRAM6,
    250	GOYA_ASYNC_EVENT_ID_SRAM7,
    251	GOYA_ASYNC_EVENT_ID_SRAM8,
    252	GOYA_ASYNC_EVENT_ID_SRAM9,
    253	GOYA_ASYNC_EVENT_ID_SRAM10,
    254	GOYA_ASYNC_EVENT_ID_SRAM11,
    255	GOYA_ASYNC_EVENT_ID_SRAM12,
    256	GOYA_ASYNC_EVENT_ID_SRAM13,
    257	GOYA_ASYNC_EVENT_ID_SRAM14,
    258	GOYA_ASYNC_EVENT_ID_SRAM15,
    259	GOYA_ASYNC_EVENT_ID_SRAM16,
    260	GOYA_ASYNC_EVENT_ID_SRAM17,
    261	GOYA_ASYNC_EVENT_ID_SRAM18,
    262	GOYA_ASYNC_EVENT_ID_SRAM19,
    263	GOYA_ASYNC_EVENT_ID_SRAM20,
    264	GOYA_ASYNC_EVENT_ID_SRAM21,
    265	GOYA_ASYNC_EVENT_ID_SRAM22,
    266	GOYA_ASYNC_EVENT_ID_SRAM23,
    267	GOYA_ASYNC_EVENT_ID_SRAM24,
    268	GOYA_ASYNC_EVENT_ID_SRAM25,
    269	GOYA_ASYNC_EVENT_ID_SRAM26,
    270	GOYA_ASYNC_EVENT_ID_SRAM27,
    271	GOYA_ASYNC_EVENT_ID_SRAM28,
    272	GOYA_ASYNC_EVENT_ID_SRAM29,
    273	GOYA_ASYNC_EVENT_ID_GIC500,
    274	GOYA_ASYNC_EVENT_ID_PLL0,
    275	GOYA_ASYNC_EVENT_ID_PLL1,
    276	GOYA_ASYNC_EVENT_ID_PLL3,
    277	GOYA_ASYNC_EVENT_ID_PLL4,
    278	GOYA_ASYNC_EVENT_ID_PLL5,
    279	GOYA_ASYNC_EVENT_ID_PLL6,
    280	GOYA_ASYNC_EVENT_ID_AXI_ECC,
    281	GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
    282	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
    283	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
    284	GOYA_ASYNC_EVENT_ID_PCIE_DEC,
    285	GOYA_ASYNC_EVENT_ID_TPC0_DEC,
    286	GOYA_ASYNC_EVENT_ID_TPC1_DEC,
    287	GOYA_ASYNC_EVENT_ID_TPC2_DEC,
    288	GOYA_ASYNC_EVENT_ID_TPC3_DEC,
    289	GOYA_ASYNC_EVENT_ID_TPC4_DEC,
    290	GOYA_ASYNC_EVENT_ID_TPC5_DEC,
    291	GOYA_ASYNC_EVENT_ID_TPC6_DEC,
    292	GOYA_ASYNC_EVENT_ID_TPC7_DEC,
    293	GOYA_ASYNC_EVENT_ID_MME_WACS,
    294	GOYA_ASYNC_EVENT_ID_MME_WACSD,
    295	GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
    296	GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
    297	GOYA_ASYNC_EVENT_ID_PSOC,
    298	GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
    299	GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
    300	GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
    301	GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
    302	GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
    303	GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
    304	GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
    305	GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
    306	GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
    307	GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
    308	GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
    309	GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
    310	GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
    311	GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
    312	GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
    313	GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
    314	GOYA_ASYNC_EVENT_ID_TPC0_QM,
    315	GOYA_ASYNC_EVENT_ID_TPC1_QM,
    316	GOYA_ASYNC_EVENT_ID_TPC2_QM,
    317	GOYA_ASYNC_EVENT_ID_TPC3_QM,
    318	GOYA_ASYNC_EVENT_ID_TPC4_QM,
    319	GOYA_ASYNC_EVENT_ID_TPC5_QM,
    320	GOYA_ASYNC_EVENT_ID_TPC6_QM,
    321	GOYA_ASYNC_EVENT_ID_TPC7_QM,
    322	GOYA_ASYNC_EVENT_ID_MME_QM,
    323	GOYA_ASYNC_EVENT_ID_MME_CMDQ,
    324	GOYA_ASYNC_EVENT_ID_DMA0_QM,
    325	GOYA_ASYNC_EVENT_ID_DMA1_QM,
    326	GOYA_ASYNC_EVENT_ID_DMA2_QM,
    327	GOYA_ASYNC_EVENT_ID_DMA3_QM,
    328	GOYA_ASYNC_EVENT_ID_DMA4_QM,
    329	GOYA_ASYNC_EVENT_ID_DMA0_CH,
    330	GOYA_ASYNC_EVENT_ID_DMA1_CH,
    331	GOYA_ASYNC_EVENT_ID_DMA2_CH,
    332	GOYA_ASYNC_EVENT_ID_DMA3_CH,
    333	GOYA_ASYNC_EVENT_ID_DMA4_CH,
    334	GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
    335	GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
    336	GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
    337	GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
    338	GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
    339	GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
    340	GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
    341	GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
    342	GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
    343	GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
    344	GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
    345	GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
    346	GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
    347	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
    348	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
    349	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
    350	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
    351};
    352
    353static s64 goya_state_dump_specs_props[SP_MAX] = {0};
    354
    355static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
    356static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
    357static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
    358static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
    359
    360int goya_set_fixed_properties(struct hl_device *hdev)
    361{
    362	struct asic_fixed_properties *prop = &hdev->asic_prop;
    363	int i;
    364
    365	prop->max_queues = GOYA_QUEUE_ID_SIZE;
    366	prop->hw_queues_props = kcalloc(prop->max_queues,
    367			sizeof(struct hw_queue_properties),
    368			GFP_KERNEL);
    369
    370	if (!prop->hw_queues_props)
    371		return -ENOMEM;
    372
    373	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
    374		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
    375		prop->hw_queues_props[i].driver_only = 0;
    376		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
    377	}
    378
    379	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
    380		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
    381		prop->hw_queues_props[i].driver_only = 1;
    382		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
    383	}
    384
    385	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
    386			NUMBER_OF_INT_HW_QUEUES; i++) {
    387		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
    388		prop->hw_queues_props[i].driver_only = 0;
    389		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
    390	}
    391
    392	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
    393	prop->host_base_address = HOST_PHYS_BASE;
    394	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
    395	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
    396
    397	prop->dram_base_address = DRAM_PHYS_BASE;
    398	prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
    399	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
    400	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
    401
    402	prop->sram_base_address = SRAM_BASE_ADDR;
    403	prop->sram_size = SRAM_SIZE;
    404	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
    405	prop->sram_user_base_address = prop->sram_base_address +
    406						SRAM_USER_BASE_OFFSET;
    407
    408	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
    409	prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
    410	if (hdev->pldm)
    411		prop->mmu_pgt_size = 0x800000; /* 8MB */
    412	else
    413		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
    414	prop->mmu_pte_size = HL_PTE_SIZE;
    415	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
    416	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
    417	prop->dram_page_size = PAGE_SIZE_2MB;
    418	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
    419	prop->dram_supports_virtual_memory = true;
    420
    421	prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT;
    422	prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT;
    423	prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT;
    424	prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT;
    425	prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT;
    426	prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK;
    427	prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK;
    428	prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK;
    429	prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK;
    430	prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK;
    431	prop->dmmu.start_addr = VA_DDR_SPACE_START;
    432	prop->dmmu.end_addr = VA_DDR_SPACE_END;
    433	prop->dmmu.page_size = PAGE_SIZE_2MB;
    434	prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
    435	prop->dmmu.last_mask = LAST_MASK;
    436	/* TODO: will be duplicated until implementing per-MMU props */
    437	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
    438	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
    439
    440	/* shifts and masks are the same in PMMU and DMMU */
    441	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
    442	prop->pmmu.start_addr = VA_HOST_SPACE_START;
    443	prop->pmmu.end_addr = VA_HOST_SPACE_END;
    444	prop->pmmu.page_size = PAGE_SIZE_4KB;
    445	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
    446	prop->pmmu.last_mask = LAST_MASK;
    447	/* TODO: will be duplicated until implementing per-MMU props */
    448	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
    449	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
    450
    451	/* PMMU and HPMMU are the same except of page size */
    452	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
    453	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
    454
    455	prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
    456	prop->cfg_size = CFG_SIZE;
    457	prop->max_asid = MAX_ASID;
    458	prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
    459	prop->high_pll = PLL_HIGH_DEFAULT;
    460	prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
    461	prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
    462	prop->max_power_default = MAX_POWER_DEFAULT;
    463	prop->dc_power_default = DC_POWER_DEFAULT;
    464	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
    465	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
    466	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
    467
    468	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
    469		CARD_NAME_MAX_LEN);
    470
    471	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
    472
    473	prop->first_available_user_msix_interrupt = USHRT_MAX;
    474
    475	for (i = 0 ; i < HL_MAX_DCORES ; i++)
    476		prop->first_available_cq[i] = USHRT_MAX;
    477
    478	prop->fw_cpu_boot_dev_sts0_valid = false;
    479	prop->fw_cpu_boot_dev_sts1_valid = false;
    480	prop->hard_reset_done_by_fw = false;
    481	prop->gic_interrupts_enable = true;
    482
    483	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
    484
    485	prop->clk_pll_index = HL_GOYA_MME_PLL;
    486
    487	prop->use_get_power_for_reset_history = true;
    488
    489	prop->configurable_stop_on_err = true;
    490
    491	prop->set_max_power_on_device_init = true;
    492
    493	prop->dma_mask = 48;
    494
    495	return 0;
    496}
    497
    498/*
    499 * goya_pci_bars_map - Map PCI BARS of Goya device
    500 *
    501 * @hdev: pointer to hl_device structure
    502 *
    503 * Request PCI regions and map them to kernel virtual addresses.
    504 * Returns 0 on success
    505 *
    506 */
    507static int goya_pci_bars_map(struct hl_device *hdev)
    508{
    509	static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
    510	bool is_wc[3] = {false, false, true};
    511	int rc;
    512
    513	rc = hl_pci_bars_map(hdev, name, is_wc);
    514	if (rc)
    515		return rc;
    516
    517	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
    518			(CFG_BASE - SRAM_BASE_ADDR);
    519
    520	return 0;
    521}
    522
    523static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
    524{
    525	struct goya_device *goya = hdev->asic_specific;
    526	struct hl_inbound_pci_region pci_region;
    527	u64 old_addr = addr;
    528	int rc;
    529
    530	if ((goya) && (goya->ddr_bar_cur_addr == addr))
    531		return old_addr;
    532
    533	/* Inbound Region 1 - Bar 4 - Point to DDR */
    534	pci_region.mode = PCI_BAR_MATCH_MODE;
    535	pci_region.bar = DDR_BAR_ID;
    536	pci_region.addr = addr;
    537	rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
    538	if (rc)
    539		return U64_MAX;
    540
    541	if (goya) {
    542		old_addr = goya->ddr_bar_cur_addr;
    543		goya->ddr_bar_cur_addr = addr;
    544	}
    545
    546	return old_addr;
    547}
    548
    549/*
    550 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
    551 *
    552 * @hdev: pointer to hl_device structure
    553 *
    554 * This is needed in case the firmware doesn't initialize the iATU
    555 *
    556 */
    557static int goya_init_iatu(struct hl_device *hdev)
    558{
    559	struct hl_inbound_pci_region inbound_region;
    560	struct hl_outbound_pci_region outbound_region;
    561	int rc;
    562
    563	if (hdev->asic_prop.iatu_done_by_fw)
    564		return 0;
    565
    566	/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
    567	inbound_region.mode = PCI_BAR_MATCH_MODE;
    568	inbound_region.bar = SRAM_CFG_BAR_ID;
    569	inbound_region.addr = SRAM_BASE_ADDR;
    570	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
    571	if (rc)
    572		goto done;
    573
    574	/* Inbound Region 1 - Bar 4 - Point to DDR */
    575	inbound_region.mode = PCI_BAR_MATCH_MODE;
    576	inbound_region.bar = DDR_BAR_ID;
    577	inbound_region.addr = DRAM_PHYS_BASE;
    578	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
    579	if (rc)
    580		goto done;
    581
    582	/* Outbound Region 0 - Point to Host  */
    583	outbound_region.addr = HOST_PHYS_BASE;
    584	outbound_region.size = HOST_PHYS_SIZE;
    585	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
    586
    587done:
    588	return rc;
    589}
    590
    591static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
    592{
    593	return RREG32(mmHW_STATE);
    594}
    595
    596/*
    597 * goya_early_init - GOYA early initialization code
    598 *
    599 * @hdev: pointer to hl_device structure
    600 *
    601 * Verify PCI bars
    602 * Set DMA masks
    603 * PCI controller initialization
    604 * Map PCI bars
    605 *
    606 */
    607static int goya_early_init(struct hl_device *hdev)
    608{
    609	struct asic_fixed_properties *prop = &hdev->asic_prop;
    610	struct pci_dev *pdev = hdev->pdev;
    611	u32 fw_boot_status, val;
    612	int rc;
    613
    614	rc = goya_set_fixed_properties(hdev);
    615	if (rc) {
    616		dev_err(hdev->dev, "Failed to get fixed properties\n");
    617		return rc;
    618	}
    619
    620	/* Check BAR sizes */
    621	if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
    622		dev_err(hdev->dev,
    623			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
    624			SRAM_CFG_BAR_ID,
    625			(unsigned long long) pci_resource_len(pdev,
    626							SRAM_CFG_BAR_ID),
    627			CFG_BAR_SIZE);
    628		rc = -ENODEV;
    629		goto free_queue_props;
    630	}
    631
    632	if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
    633		dev_err(hdev->dev,
    634			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
    635			MSIX_BAR_ID,
    636			(unsigned long long) pci_resource_len(pdev,
    637								MSIX_BAR_ID),
    638			MSIX_BAR_SIZE);
    639		rc = -ENODEV;
    640		goto free_queue_props;
    641	}
    642
    643	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
    644	hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
    645
    646	/* If FW security is enabled at this point it means no access to ELBI */
    647	if (hdev->asic_prop.fw_security_enabled) {
    648		hdev->asic_prop.iatu_done_by_fw = true;
    649		goto pci_init;
    650	}
    651
    652	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
    653				&fw_boot_status);
    654	if (rc)
    655		goto free_queue_props;
    656
    657	/* Check whether FW is configuring iATU */
    658	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
    659			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
    660		hdev->asic_prop.iatu_done_by_fw = true;
    661
    662pci_init:
    663	rc = hl_pci_init(hdev);
    664	if (rc)
    665		goto free_queue_props;
    666
    667	/* Before continuing in the initialization, we need to read the preboot
    668	 * version to determine whether we run with a security-enabled firmware
    669	 */
    670	rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
    671					mmCPU_BOOT_DEV_STS0,
    672					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
    673					mmCPU_BOOT_ERR1,
    674					GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
    675	if (rc) {
    676		if (hdev->reset_on_preboot_fail)
    677			hdev->asic_funcs->hw_fini(hdev, true, false);
    678		goto pci_fini;
    679	}
    680
    681	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
    682		dev_info(hdev->dev,
    683			"H/W state is dirty, must reset before initializing\n");
    684		hdev->asic_funcs->hw_fini(hdev, true, false);
    685	}
    686
    687	if (!hdev->pldm) {
    688		val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
    689		if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
    690			dev_warn(hdev->dev,
    691				"PCI strap is not configured correctly, PCI bus errors may occur\n");
    692	}
    693
    694	return 0;
    695
    696pci_fini:
    697	hl_pci_fini(hdev);
    698free_queue_props:
    699	kfree(hdev->asic_prop.hw_queues_props);
    700	return rc;
    701}
    702
    703/*
    704 * goya_early_fini - GOYA early finalization code
    705 *
    706 * @hdev: pointer to hl_device structure
    707 *
    708 * Unmap PCI bars
    709 *
    710 */
    711static int goya_early_fini(struct hl_device *hdev)
    712{
    713	kfree(hdev->asic_prop.hw_queues_props);
    714	hl_pci_fini(hdev);
    715
    716	return 0;
    717}
    718
    719static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
    720{
    721	/* mask to zero the MMBP and ASID bits */
    722	WREG32_AND(reg, ~0x7FF);
    723	WREG32_OR(reg, asid);
    724}
    725
    726static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
    727{
    728	struct goya_device *goya = hdev->asic_specific;
    729
    730	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
    731		return;
    732
    733	if (secure)
    734		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
    735	else
    736		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
    737
    738	RREG32(mmDMA_QM_0_GLBL_PROT);
    739}
    740
    741/*
    742 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
    743 *
    744 * @hdev: pointer to hl_device structure
    745 *
    746 */
    747static void goya_fetch_psoc_frequency(struct hl_device *hdev)
    748{
    749	struct asic_fixed_properties *prop = &hdev->asic_prop;
    750	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
    751	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
    752	int rc;
    753
    754	if (hdev->asic_prop.fw_security_enabled) {
    755		struct goya_device *goya = hdev->asic_specific;
    756
    757		if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
    758			return;
    759
    760		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
    761				pll_freq_arr);
    762
    763		if (rc)
    764			return;
    765
    766		freq = pll_freq_arr[1];
    767	} else {
    768		div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
    769		div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
    770		nr = RREG32(mmPSOC_PCI_PLL_NR);
    771		nf = RREG32(mmPSOC_PCI_PLL_NF);
    772		od = RREG32(mmPSOC_PCI_PLL_OD);
    773
    774		if (div_sel == DIV_SEL_REF_CLK ||
    775				div_sel == DIV_SEL_DIVIDED_REF) {
    776			if (div_sel == DIV_SEL_REF_CLK)
    777				freq = PLL_REF_CLK;
    778			else
    779				freq = PLL_REF_CLK / (div_fctr + 1);
    780		} else if (div_sel == DIV_SEL_PLL_CLK ||
    781				div_sel == DIV_SEL_DIVIDED_PLL) {
    782			pll_clk = PLL_REF_CLK * (nf + 1) /
    783					((nr + 1) * (od + 1));
    784			if (div_sel == DIV_SEL_PLL_CLK)
    785				freq = pll_clk;
    786			else
    787				freq = pll_clk / (div_fctr + 1);
    788		} else {
    789			dev_warn(hdev->dev,
    790				"Received invalid div select value: %d",
    791				div_sel);
    792			freq = 0;
    793		}
    794	}
    795
    796	prop->psoc_timestamp_frequency = freq;
    797	prop->psoc_pci_pll_nr = nr;
    798	prop->psoc_pci_pll_nf = nf;
    799	prop->psoc_pci_pll_od = od;
    800	prop->psoc_pci_pll_div_factor = div_fctr;
    801}
    802
    803/*
    804 * goya_set_frequency - set the frequency of the device
    805 *
    806 * @hdev: pointer to habanalabs device structure
    807 * @freq: the new frequency value
    808 *
    809 * Change the frequency if needed. This function has no protection against
    810 * concurrency, therefore it is assumed that the calling function has protected
    811 * itself against the case of calling this function from multiple threads with
    812 * different values
    813 *
    814 * Returns 0 if no change was done, otherwise returns 1
    815 */
    816int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
    817{
    818	struct goya_device *goya = hdev->asic_specific;
    819
    820	if ((goya->pm_mng_profile == PM_MANUAL) ||
    821			(goya->curr_pll_profile == freq))
    822		return 0;
    823
    824	dev_dbg(hdev->dev, "Changing device frequency to %s\n",
    825		freq == PLL_HIGH ? "high" : "low");
    826
    827	goya_set_pll_profile(hdev, freq);
    828
    829	goya->curr_pll_profile = freq;
    830
    831	return 1;
    832}
    833
    834static void goya_set_freq_to_low_job(struct work_struct *work)
    835{
    836	struct goya_work_freq *goya_work = container_of(work,
    837						struct goya_work_freq,
    838						work_freq.work);
    839	struct hl_device *hdev = goya_work->hdev;
    840
    841	mutex_lock(&hdev->fpriv_list_lock);
    842
    843	if (!hdev->is_compute_ctx_active)
    844		goya_set_frequency(hdev, PLL_LOW);
    845
    846	mutex_unlock(&hdev->fpriv_list_lock);
    847
    848	schedule_delayed_work(&goya_work->work_freq,
    849			usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
    850}
    851
    852int goya_late_init(struct hl_device *hdev)
    853{
    854	struct asic_fixed_properties *prop = &hdev->asic_prop;
    855	struct goya_device *goya = hdev->asic_specific;
    856	int rc;
    857
    858	goya_fetch_psoc_frequency(hdev);
    859
    860	rc = goya_mmu_clear_pgt_range(hdev);
    861	if (rc) {
    862		dev_err(hdev->dev,
    863			"Failed to clear MMU page tables range %d\n", rc);
    864		return rc;
    865	}
    866
    867	rc = goya_mmu_set_dram_default_page(hdev);
    868	if (rc) {
    869		dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
    870		return rc;
    871	}
    872
    873	rc = goya_mmu_add_mappings_for_device_cpu(hdev);
    874	if (rc)
    875		return rc;
    876
    877	rc = goya_init_cpu_queues(hdev);
    878	if (rc)
    879		return rc;
    880
    881	rc = goya_test_cpu_queue(hdev);
    882	if (rc)
    883		return rc;
    884
    885	rc = goya_cpucp_info_get(hdev);
    886	if (rc) {
    887		dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
    888		return rc;
    889	}
    890
    891	/* Now that we have the DRAM size in ASIC prop, we need to check
    892	 * its size and configure the DMA_IF DDR wrap protection (which is in
    893	 * the MMU block) accordingly. The value is the log2 of the DRAM size
    894	 */
    895	WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
    896
    897	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
    898	if (rc) {
    899		dev_err(hdev->dev,
    900			"Failed to enable PCI access from CPU %d\n", rc);
    901		return rc;
    902	}
    903
    904	/* force setting to low frequency */
    905	goya->curr_pll_profile = PLL_LOW;
    906
    907	goya->pm_mng_profile = PM_AUTO;
    908
    909	goya_set_pll_profile(hdev, PLL_LOW);
    910
    911	schedule_delayed_work(&goya->goya_work->work_freq,
    912		usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
    913
    914	return 0;
    915}
    916
    917/*
    918 * goya_late_fini - GOYA late tear-down code
    919 *
    920 * @hdev: pointer to hl_device structure
    921 *
    922 * Free sensors allocated structures
    923 */
    924void goya_late_fini(struct hl_device *hdev)
    925{
    926	const struct hwmon_channel_info **channel_info_arr;
    927	struct goya_device *goya = hdev->asic_specific;
    928	int i = 0;
    929
    930	cancel_delayed_work_sync(&goya->goya_work->work_freq);
    931
    932	if (!hdev->hl_chip_info->info)
    933		return;
    934
    935	channel_info_arr = hdev->hl_chip_info->info;
    936
    937	while (channel_info_arr[i]) {
    938		kfree(channel_info_arr[i]->config);
    939		kfree(channel_info_arr[i]);
    940		i++;
    941	}
    942
    943	kfree(channel_info_arr);
    944
    945	hdev->hl_chip_info->info = NULL;
    946}
    947
    948static void goya_set_pci_memory_regions(struct hl_device *hdev)
    949{
    950	struct asic_fixed_properties *prop = &hdev->asic_prop;
    951	struct pci_mem_region *region;
    952
    953	/* CFG */
    954	region = &hdev->pci_mem_region[PCI_REGION_CFG];
    955	region->region_base = CFG_BASE;
    956	region->region_size = CFG_SIZE;
    957	region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
    958	region->bar_size = CFG_BAR_SIZE;
    959	region->bar_id = SRAM_CFG_BAR_ID;
    960	region->used = 1;
    961
    962	/* SRAM */
    963	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
    964	region->region_base = SRAM_BASE_ADDR;
    965	region->region_size = SRAM_SIZE;
    966	region->offset_in_bar = 0;
    967	region->bar_size = CFG_BAR_SIZE;
    968	region->bar_id = SRAM_CFG_BAR_ID;
    969	region->used = 1;
    970
    971	/* DRAM */
    972	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
    973	region->region_base = DRAM_PHYS_BASE;
    974	region->region_size = hdev->asic_prop.dram_size;
    975	region->offset_in_bar = 0;
    976	region->bar_size = prop->dram_pci_bar_size;
    977	region->bar_id = DDR_BAR_ID;
    978	region->used = 1;
    979}
    980
    981/*
    982 * goya_sw_init - Goya software initialization code
    983 *
    984 * @hdev: pointer to hl_device structure
    985 *
    986 */
    987static int goya_sw_init(struct hl_device *hdev)
    988{
    989	struct goya_device *goya;
    990	int rc;
    991
    992	/* Allocate device structure */
    993	goya = kzalloc(sizeof(*goya), GFP_KERNEL);
    994	if (!goya)
    995		return -ENOMEM;
    996
    997	/* according to goya_init_iatu */
    998	goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
    999
   1000	goya->mme_clk = GOYA_PLL_FREQ_LOW;
   1001	goya->tpc_clk = GOYA_PLL_FREQ_LOW;
   1002	goya->ic_clk = GOYA_PLL_FREQ_LOW;
   1003
   1004	hdev->asic_specific = goya;
   1005
   1006	/* Create DMA pool for small allocations */
   1007	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
   1008			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
   1009	if (!hdev->dma_pool) {
   1010		dev_err(hdev->dev, "failed to create DMA pool\n");
   1011		rc = -ENOMEM;
   1012		goto free_goya_device;
   1013	}
   1014
   1015	hdev->cpu_accessible_dma_mem =
   1016			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
   1017					HL_CPU_ACCESSIBLE_MEM_SIZE,
   1018					&hdev->cpu_accessible_dma_address,
   1019					GFP_KERNEL | __GFP_ZERO);
   1020
   1021	if (!hdev->cpu_accessible_dma_mem) {
   1022		rc = -ENOMEM;
   1023		goto free_dma_pool;
   1024	}
   1025
   1026	dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
   1027		&hdev->cpu_accessible_dma_address);
   1028
   1029	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
   1030	if (!hdev->cpu_accessible_dma_pool) {
   1031		dev_err(hdev->dev,
   1032			"Failed to create CPU accessible DMA pool\n");
   1033		rc = -ENOMEM;
   1034		goto free_cpu_dma_mem;
   1035	}
   1036
   1037	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
   1038				(uintptr_t) hdev->cpu_accessible_dma_mem,
   1039				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
   1040	if (rc) {
   1041		dev_err(hdev->dev,
   1042			"Failed to add memory to CPU accessible DMA pool\n");
   1043		rc = -EFAULT;
   1044		goto free_cpu_accessible_dma_pool;
   1045	}
   1046
   1047	spin_lock_init(&goya->hw_queues_lock);
   1048	hdev->supports_coresight = true;
   1049	hdev->asic_prop.supports_soft_reset = true;
   1050	hdev->asic_prop.allow_inference_soft_reset = true;
   1051	hdev->supports_wait_for_multi_cs = false;
   1052
   1053	hdev->asic_funcs->set_pci_memory_regions(hdev);
   1054
   1055	goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
   1056	if (!goya->goya_work) {
   1057		rc = -ENOMEM;
   1058		goto free_cpu_accessible_dma_pool;
   1059	}
   1060
   1061	goya->goya_work->hdev = hdev;
   1062	INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
   1063
   1064	return 0;
   1065
   1066free_cpu_accessible_dma_pool:
   1067	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
   1068free_cpu_dma_mem:
   1069	hdev->asic_funcs->asic_dma_free_coherent(hdev,
   1070			HL_CPU_ACCESSIBLE_MEM_SIZE,
   1071			hdev->cpu_accessible_dma_mem,
   1072			hdev->cpu_accessible_dma_address);
   1073free_dma_pool:
   1074	dma_pool_destroy(hdev->dma_pool);
   1075free_goya_device:
   1076	kfree(goya);
   1077
   1078	return rc;
   1079}
   1080
   1081/*
   1082 * goya_sw_fini - Goya software tear-down code
   1083 *
   1084 * @hdev: pointer to hl_device structure
   1085 *
   1086 */
   1087static int goya_sw_fini(struct hl_device *hdev)
   1088{
   1089	struct goya_device *goya = hdev->asic_specific;
   1090
   1091	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
   1092
   1093	hdev->asic_funcs->asic_dma_free_coherent(hdev,
   1094			HL_CPU_ACCESSIBLE_MEM_SIZE,
   1095			hdev->cpu_accessible_dma_mem,
   1096			hdev->cpu_accessible_dma_address);
   1097
   1098	dma_pool_destroy(hdev->dma_pool);
   1099
   1100	kfree(goya->goya_work);
   1101	kfree(goya);
   1102
   1103	return 0;
   1104}
   1105
   1106static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
   1107		dma_addr_t bus_address)
   1108{
   1109	struct goya_device *goya = hdev->asic_specific;
   1110	u32 mtr_base_lo, mtr_base_hi;
   1111	u32 so_base_lo, so_base_hi;
   1112	u32 gic_base_lo, gic_base_hi;
   1113	u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
   1114	u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
   1115
   1116	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1117	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1118	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1119	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1120
   1121	gic_base_lo =
   1122		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1123	gic_base_hi =
   1124		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1125
   1126	WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
   1127	WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
   1128
   1129	WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
   1130	WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
   1131	WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
   1132
   1133	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
   1134	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
   1135	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
   1136	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
   1137	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
   1138	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
   1139	WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
   1140			GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
   1141
   1142	/* PQ has buffer of 2 cache lines, while CQ has 8 lines */
   1143	WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
   1144	WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
   1145
   1146	if (goya->hw_cap_initialized & HW_CAP_MMU)
   1147		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
   1148	else
   1149		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
   1150
   1151	if (hdev->stop_on_err)
   1152		dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
   1153
   1154	WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
   1155	WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
   1156}
   1157
   1158static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
   1159{
   1160	u32 gic_base_lo, gic_base_hi;
   1161	u64 sob_addr;
   1162	u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
   1163
   1164	gic_base_lo =
   1165		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1166	gic_base_hi =
   1167		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1168
   1169	WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
   1170	WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
   1171	WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
   1172			GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
   1173
   1174	if (dma_id)
   1175		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
   1176				(dma_id - 1) * 4;
   1177	else
   1178		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
   1179
   1180	WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
   1181	WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
   1182}
   1183
   1184/*
   1185 * goya_init_dma_qmans - Initialize QMAN DMA registers
   1186 *
   1187 * @hdev: pointer to hl_device structure
   1188 *
   1189 * Initialize the H/W registers of the QMAN DMA channels
   1190 *
   1191 */
   1192void goya_init_dma_qmans(struct hl_device *hdev)
   1193{
   1194	struct goya_device *goya = hdev->asic_specific;
   1195	struct hl_hw_queue *q;
   1196	int i;
   1197
   1198	if (goya->hw_cap_initialized & HW_CAP_DMA)
   1199		return;
   1200
   1201	q = &hdev->kernel_queues[0];
   1202
   1203	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
   1204		q->cq_id = q->msi_vec = i;
   1205		goya_init_dma_qman(hdev, i, q->bus_address);
   1206		goya_init_dma_ch(hdev, i);
   1207	}
   1208
   1209	goya->hw_cap_initialized |= HW_CAP_DMA;
   1210}
   1211
   1212/*
   1213 * goya_disable_external_queues - Disable external queues
   1214 *
   1215 * @hdev: pointer to hl_device structure
   1216 *
   1217 */
   1218static void goya_disable_external_queues(struct hl_device *hdev)
   1219{
   1220	struct goya_device *goya = hdev->asic_specific;
   1221
   1222	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
   1223		return;
   1224
   1225	WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
   1226	WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
   1227	WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
   1228	WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
   1229	WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
   1230}
   1231
   1232static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
   1233				u32 cp_sts_reg, u32 glbl_sts0_reg)
   1234{
   1235	int rc;
   1236	u32 status;
   1237
   1238	/* use the values of TPC0 as they are all the same*/
   1239
   1240	WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
   1241
   1242	status = RREG32(cp_sts_reg);
   1243	if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
   1244		rc = hl_poll_timeout(
   1245			hdev,
   1246			cp_sts_reg,
   1247			status,
   1248			!(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
   1249			1000,
   1250			QMAN_FENCE_TIMEOUT_USEC);
   1251
   1252		/* if QMAN is stuck in fence no need to check for stop */
   1253		if (rc)
   1254			return 0;
   1255	}
   1256
   1257	rc = hl_poll_timeout(
   1258		hdev,
   1259		glbl_sts0_reg,
   1260		status,
   1261		(status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
   1262		1000,
   1263		QMAN_STOP_TIMEOUT_USEC);
   1264
   1265	if (rc) {
   1266		dev_err(hdev->dev,
   1267			"Timeout while waiting for QMAN to stop\n");
   1268		return -EINVAL;
   1269	}
   1270
   1271	return 0;
   1272}
   1273
   1274/*
   1275 * goya_stop_external_queues - Stop external queues
   1276 *
   1277 * @hdev: pointer to hl_device structure
   1278 *
   1279 * Returns 0 on success
   1280 *
   1281 */
   1282static int goya_stop_external_queues(struct hl_device *hdev)
   1283{
   1284	int rc, retval = 0;
   1285
   1286	struct goya_device *goya = hdev->asic_specific;
   1287
   1288	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
   1289		return retval;
   1290
   1291	rc = goya_stop_queue(hdev,
   1292			mmDMA_QM_0_GLBL_CFG1,
   1293			mmDMA_QM_0_CP_STS,
   1294			mmDMA_QM_0_GLBL_STS0);
   1295
   1296	if (rc) {
   1297		dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
   1298		retval = -EIO;
   1299	}
   1300
   1301	rc = goya_stop_queue(hdev,
   1302			mmDMA_QM_1_GLBL_CFG1,
   1303			mmDMA_QM_1_CP_STS,
   1304			mmDMA_QM_1_GLBL_STS0);
   1305
   1306	if (rc) {
   1307		dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
   1308		retval = -EIO;
   1309	}
   1310
   1311	rc = goya_stop_queue(hdev,
   1312			mmDMA_QM_2_GLBL_CFG1,
   1313			mmDMA_QM_2_CP_STS,
   1314			mmDMA_QM_2_GLBL_STS0);
   1315
   1316	if (rc) {
   1317		dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
   1318		retval = -EIO;
   1319	}
   1320
   1321	rc = goya_stop_queue(hdev,
   1322			mmDMA_QM_3_GLBL_CFG1,
   1323			mmDMA_QM_3_CP_STS,
   1324			mmDMA_QM_3_GLBL_STS0);
   1325
   1326	if (rc) {
   1327		dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
   1328		retval = -EIO;
   1329	}
   1330
   1331	rc = goya_stop_queue(hdev,
   1332			mmDMA_QM_4_GLBL_CFG1,
   1333			mmDMA_QM_4_CP_STS,
   1334			mmDMA_QM_4_GLBL_STS0);
   1335
   1336	if (rc) {
   1337		dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
   1338		retval = -EIO;
   1339	}
   1340
   1341	return retval;
   1342}
   1343
   1344/*
   1345 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
   1346 *
   1347 * @hdev: pointer to hl_device structure
   1348 *
   1349 * Returns 0 on success
   1350 *
   1351 */
   1352int goya_init_cpu_queues(struct hl_device *hdev)
   1353{
   1354	struct goya_device *goya = hdev->asic_specific;
   1355	struct asic_fixed_properties *prop = &hdev->asic_prop;
   1356	struct hl_eq *eq;
   1357	u32 status;
   1358	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
   1359	int err;
   1360
   1361	if (!hdev->cpu_queues_enable)
   1362		return 0;
   1363
   1364	if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
   1365		return 0;
   1366
   1367	eq = &hdev->event_queue;
   1368
   1369	WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
   1370	WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
   1371
   1372	WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
   1373	WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
   1374
   1375	WREG32(mmCPU_CQ_BASE_ADDR_LOW,
   1376			lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
   1377	WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
   1378			upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
   1379
   1380	WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
   1381	WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
   1382	WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
   1383
   1384	/* Used for EQ CI */
   1385	WREG32(mmCPU_EQ_CI, 0);
   1386
   1387	WREG32(mmCPU_IF_PF_PQ_PI, 0);
   1388
   1389	WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
   1390
   1391	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
   1392			GOYA_ASYNC_EVENT_ID_PI_UPDATE);
   1393
   1394	err = hl_poll_timeout(
   1395		hdev,
   1396		mmCPU_PQ_INIT_STATUS,
   1397		status,
   1398		(status == PQ_INIT_STATUS_READY_FOR_HOST),
   1399		1000,
   1400		GOYA_CPU_TIMEOUT_USEC);
   1401
   1402	if (err) {
   1403		dev_err(hdev->dev,
   1404			"Failed to setup communication with device CPU\n");
   1405		return -EIO;
   1406	}
   1407
   1408	/* update FW application security bits */
   1409	if (prop->fw_cpu_boot_dev_sts0_valid)
   1410		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
   1411
   1412	if (prop->fw_cpu_boot_dev_sts1_valid)
   1413		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
   1414
   1415	goya->hw_cap_initialized |= HW_CAP_CPU_Q;
   1416	return 0;
   1417}
   1418
   1419static void goya_set_pll_refclk(struct hl_device *hdev)
   1420{
   1421	WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
   1422	WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
   1423	WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
   1424	WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
   1425
   1426	WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
   1427	WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
   1428	WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
   1429	WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
   1430
   1431	WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
   1432	WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
   1433	WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
   1434	WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
   1435
   1436	WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
   1437	WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
   1438	WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
   1439	WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
   1440
   1441	WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
   1442	WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
   1443	WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
   1444	WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
   1445
   1446	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
   1447	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
   1448	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
   1449	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
   1450
   1451	WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
   1452	WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
   1453	WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
   1454	WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
   1455}
   1456
   1457static void goya_disable_clk_rlx(struct hl_device *hdev)
   1458{
   1459	WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
   1460	WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
   1461}
   1462
   1463static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
   1464{
   1465	u64 tpc_eml_address;
   1466	u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
   1467	int err, slm_index;
   1468
   1469	tpc_offset = tpc_id * 0x40000;
   1470	tpc_eml_offset = tpc_id * 0x200000;
   1471	tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
   1472	tpc_slm_offset = tpc_eml_address + 0x100000;
   1473
   1474	/*
   1475	 * Workaround for Bug H2 #2443 :
   1476	 * "TPC SB is not initialized on chip reset"
   1477	 */
   1478
   1479	val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
   1480	if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
   1481		dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
   1482			tpc_id);
   1483
   1484	WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
   1485
   1486	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
   1487	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
   1488	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
   1489	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
   1490	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
   1491	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
   1492	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
   1493	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
   1494	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
   1495	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
   1496
   1497	WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
   1498		1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
   1499
   1500	err = hl_poll_timeout(
   1501		hdev,
   1502		mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
   1503		val,
   1504		(val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
   1505		1000,
   1506		HL_DEVICE_TIMEOUT_USEC);
   1507
   1508	if (err)
   1509		dev_err(hdev->dev,
   1510			"Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
   1511
   1512	WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
   1513		1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
   1514
   1515	msleep(GOYA_RESET_WAIT_MSEC);
   1516
   1517	WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
   1518		~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
   1519
   1520	msleep(GOYA_RESET_WAIT_MSEC);
   1521
   1522	for (slm_index = 0 ; slm_index < 256 ; slm_index++)
   1523		WREG32(tpc_slm_offset + (slm_index << 2), 0);
   1524
   1525	val = RREG32(tpc_slm_offset);
   1526}
   1527
   1528static void goya_tpc_mbist_workaround(struct hl_device *hdev)
   1529{
   1530	struct goya_device *goya = hdev->asic_specific;
   1531	int i;
   1532
   1533	if (hdev->pldm)
   1534		return;
   1535
   1536	if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
   1537		return;
   1538
   1539	/* Workaround for H2 #2443 */
   1540
   1541	for (i = 0 ; i < TPC_MAX_NUM ; i++)
   1542		_goya_tpc_mbist_workaround(hdev, i);
   1543
   1544	goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
   1545}
   1546
   1547/*
   1548 * goya_init_golden_registers - Initialize golden registers
   1549 *
   1550 * @hdev: pointer to hl_device structure
   1551 *
   1552 * Initialize the H/W registers of the device
   1553 *
   1554 */
   1555static void goya_init_golden_registers(struct hl_device *hdev)
   1556{
   1557	struct goya_device *goya = hdev->asic_specific;
   1558	u32 polynom[10], tpc_intr_mask, offset;
   1559	int i;
   1560
   1561	if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
   1562		return;
   1563
   1564	polynom[0] = 0x00020080;
   1565	polynom[1] = 0x00401000;
   1566	polynom[2] = 0x00200800;
   1567	polynom[3] = 0x00002000;
   1568	polynom[4] = 0x00080200;
   1569	polynom[5] = 0x00040100;
   1570	polynom[6] = 0x00100400;
   1571	polynom[7] = 0x00004000;
   1572	polynom[8] = 0x00010000;
   1573	polynom[9] = 0x00008000;
   1574
   1575	/* Mask all arithmetic interrupts from TPC */
   1576	tpc_intr_mask = 0x7FFF;
   1577
   1578	for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
   1579		WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
   1580		WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
   1581		WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
   1582		WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
   1583		WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
   1584
   1585		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
   1586		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
   1587		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
   1588		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
   1589		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
   1590
   1591
   1592		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
   1593		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
   1594		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
   1595		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
   1596		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
   1597
   1598		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
   1599		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
   1600		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
   1601		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
   1602		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
   1603
   1604		WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
   1605		WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
   1606		WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
   1607		WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
   1608		WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
   1609
   1610		WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
   1611		WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
   1612		WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
   1613		WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
   1614		WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
   1615	}
   1616
   1617	WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
   1618	WREG32(mmMME_AGU, 0x0f0f0f10);
   1619	WREG32(mmMME_SEI_MASK, ~0x0);
   1620
   1621	WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
   1622	WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
   1623	WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
   1624	WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
   1625	WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
   1626	WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
   1627	WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
   1628	WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
   1629	WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
   1630	WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
   1631	WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
   1632	WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
   1633	WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
   1634	WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
   1635	WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
   1636	WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
   1637	WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
   1638	WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
   1639	WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
   1640	WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
   1641	WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
   1642	WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
   1643	WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
   1644	WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
   1645	WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
   1646	WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
   1647	WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
   1648	WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
   1649	WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
   1650	WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
   1651	WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
   1652	WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
   1653	WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
   1654	WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
   1655	WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
   1656	WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
   1657	WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
   1658	WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
   1659	WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
   1660	WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
   1661	WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
   1662	WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
   1663	WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
   1664	WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
   1665	WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
   1666	WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
   1667	WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
   1668	WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
   1669	WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
   1670	WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
   1671	WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
   1672	WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
   1673	WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
   1674	WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
   1675	WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
   1676	WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
   1677	WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
   1678	WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
   1679	WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
   1680	WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
   1681	WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
   1682	WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
   1683	WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
   1684	WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
   1685	WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
   1686	WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
   1687	WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
   1688	WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
   1689	WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
   1690	WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
   1691	WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
   1692	WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
   1693	WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
   1694	WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
   1695	WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
   1696	WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
   1697	WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
   1698	WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
   1699	WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
   1700	WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
   1701	WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
   1702	WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
   1703	WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
   1704	WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
   1705
   1706	WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
   1707	WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
   1708	WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
   1709	WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
   1710	WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
   1711	WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
   1712	WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
   1713	WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
   1714	WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
   1715	WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
   1716	WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
   1717	WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
   1718
   1719	WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
   1720	WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
   1721	WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
   1722	WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
   1723	WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
   1724	WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
   1725	WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
   1726	WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
   1727	WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
   1728	WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
   1729	WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
   1730	WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
   1731
   1732	WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
   1733	WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
   1734	WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
   1735	WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
   1736	WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
   1737	WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
   1738	WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
   1739	WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
   1740	WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
   1741	WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
   1742	WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
   1743	WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
   1744
   1745	WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
   1746	WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
   1747	WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
   1748	WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
   1749	WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
   1750	WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
   1751	WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
   1752	WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
   1753	WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
   1754	WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
   1755	WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
   1756	WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
   1757
   1758	WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
   1759	WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
   1760	WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
   1761	WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
   1762	WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
   1763	WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
   1764	WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
   1765	WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
   1766	WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
   1767	WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
   1768	WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
   1769	WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
   1770
   1771	WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
   1772	WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
   1773	WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
   1774	WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
   1775	WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
   1776	WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
   1777	WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
   1778	WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
   1779	WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
   1780	WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
   1781	WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
   1782	WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
   1783
   1784	for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
   1785		WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1786		WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1787		WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1788		WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1789		WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1790		WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1791
   1792		WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1793		WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1794		WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1795		WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1796		WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1797		WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1798		WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1799		WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1800
   1801		WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1802		WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
   1803	}
   1804
   1805	for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
   1806		WREG32(mmMME1_RTR_SCRAMB_EN + offset,
   1807				1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
   1808		WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
   1809				1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
   1810	}
   1811
   1812	for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
   1813		/*
   1814		 * Workaround for Bug H2 #2441 :
   1815		 * "ST.NOP set trace event illegal opcode"
   1816		 */
   1817		WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
   1818
   1819		WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
   1820				1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
   1821		WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
   1822				1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
   1823
   1824		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
   1825				ICACHE_FETCH_LINE_NUM, 2);
   1826	}
   1827
   1828	WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
   1829	WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
   1830			1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
   1831
   1832	WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
   1833	WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
   1834			1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
   1835
   1836	/*
   1837	 * Workaround for H2 #HW-23 bug
   1838	 * Set DMA max outstanding read requests to 240 on DMA CH 1.
   1839	 * This limitation is still large enough to not affect Gen4 bandwidth.
   1840	 * We need to only limit that DMA channel because the user can only read
   1841	 * from Host using DMA CH 1
   1842	 */
   1843	WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
   1844
   1845	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
   1846
   1847	goya->hw_cap_initialized |= HW_CAP_GOLDEN;
   1848}
   1849
   1850static void goya_init_mme_qman(struct hl_device *hdev)
   1851{
   1852	u32 mtr_base_lo, mtr_base_hi;
   1853	u32 so_base_lo, so_base_hi;
   1854	u32 gic_base_lo, gic_base_hi;
   1855	u64 qman_base_addr;
   1856
   1857	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1858	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1859	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1860	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1861
   1862	gic_base_lo =
   1863		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1864	gic_base_hi =
   1865		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1866
   1867	qman_base_addr = hdev->asic_prop.sram_base_address +
   1868				MME_QMAN_BASE_OFFSET;
   1869
   1870	WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
   1871	WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
   1872	WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
   1873	WREG32(mmMME_QM_PQ_PI, 0);
   1874	WREG32(mmMME_QM_PQ_CI, 0);
   1875	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
   1876	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
   1877	WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
   1878	WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
   1879
   1880	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
   1881	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
   1882	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
   1883	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
   1884
   1885	/* QMAN CQ has 8 cache lines */
   1886	WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
   1887
   1888	WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
   1889	WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
   1890
   1891	WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
   1892
   1893	WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
   1894
   1895	WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
   1896
   1897	WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
   1898}
   1899
   1900static void goya_init_mme_cmdq(struct hl_device *hdev)
   1901{
   1902	u32 mtr_base_lo, mtr_base_hi;
   1903	u32 so_base_lo, so_base_hi;
   1904	u32 gic_base_lo, gic_base_hi;
   1905
   1906	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1907	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1908	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1909	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1910
   1911	gic_base_lo =
   1912		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1913	gic_base_hi =
   1914		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1915
   1916	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
   1917	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
   1918	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO,	so_base_lo);
   1919	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
   1920
   1921	/* CMDQ CQ has 20 cache lines */
   1922	WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
   1923
   1924	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
   1925	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
   1926
   1927	WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
   1928
   1929	WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
   1930
   1931	WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
   1932
   1933	WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
   1934}
   1935
   1936void goya_init_mme_qmans(struct hl_device *hdev)
   1937{
   1938	struct goya_device *goya = hdev->asic_specific;
   1939	u32 so_base_lo, so_base_hi;
   1940
   1941	if (goya->hw_cap_initialized & HW_CAP_MME)
   1942		return;
   1943
   1944	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1945	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1946
   1947	WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
   1948	WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
   1949
   1950	goya_init_mme_qman(hdev);
   1951	goya_init_mme_cmdq(hdev);
   1952
   1953	goya->hw_cap_initialized |= HW_CAP_MME;
   1954}
   1955
   1956static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
   1957{
   1958	u32 mtr_base_lo, mtr_base_hi;
   1959	u32 so_base_lo, so_base_hi;
   1960	u32 gic_base_lo, gic_base_hi;
   1961	u64 qman_base_addr;
   1962	u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
   1963
   1964	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1965	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   1966	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1967	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   1968
   1969	gic_base_lo =
   1970		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1971	gic_base_hi =
   1972		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   1973
   1974	qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
   1975
   1976	WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
   1977	WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
   1978	WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
   1979	WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
   1980	WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
   1981	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
   1982	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
   1983	WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
   1984	WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
   1985
   1986	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
   1987	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
   1988	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
   1989	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
   1990
   1991	WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
   1992
   1993	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
   1994	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
   1995
   1996	WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
   1997			GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
   1998
   1999	WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
   2000
   2001	WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
   2002
   2003	WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
   2004}
   2005
   2006static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
   2007{
   2008	u32 mtr_base_lo, mtr_base_hi;
   2009	u32 so_base_lo, so_base_hi;
   2010	u32 gic_base_lo, gic_base_hi;
   2011	u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
   2012
   2013	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   2014	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
   2015	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   2016	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   2017
   2018	gic_base_lo =
   2019		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   2020	gic_base_hi =
   2021		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
   2022
   2023	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
   2024	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
   2025	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
   2026	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
   2027
   2028	WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
   2029
   2030	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
   2031	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
   2032
   2033	WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
   2034			GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
   2035
   2036	WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
   2037
   2038	WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
   2039
   2040	WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
   2041}
   2042
   2043void goya_init_tpc_qmans(struct hl_device *hdev)
   2044{
   2045	struct goya_device *goya = hdev->asic_specific;
   2046	u32 so_base_lo, so_base_hi;
   2047	u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
   2048			mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
   2049	int i;
   2050
   2051	if (goya->hw_cap_initialized & HW_CAP_TPC)
   2052		return;
   2053
   2054	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   2055	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   2056
   2057	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
   2058		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
   2059				so_base_lo);
   2060		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
   2061				so_base_hi);
   2062	}
   2063
   2064	goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
   2065	goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
   2066	goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
   2067	goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
   2068	goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
   2069	goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
   2070	goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
   2071	goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
   2072
   2073	for (i = 0 ; i < TPC_MAX_NUM ; i++)
   2074		goya_init_tpc_cmdq(hdev, i);
   2075
   2076	goya->hw_cap_initialized |= HW_CAP_TPC;
   2077}
   2078
   2079/*
   2080 * goya_disable_internal_queues - Disable internal queues
   2081 *
   2082 * @hdev: pointer to hl_device structure
   2083 *
   2084 */
   2085static void goya_disable_internal_queues(struct hl_device *hdev)
   2086{
   2087	struct goya_device *goya = hdev->asic_specific;
   2088
   2089	if (!(goya->hw_cap_initialized & HW_CAP_MME))
   2090		goto disable_tpc;
   2091
   2092	WREG32(mmMME_QM_GLBL_CFG0, 0);
   2093	WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
   2094
   2095disable_tpc:
   2096	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
   2097		return;
   2098
   2099	WREG32(mmTPC0_QM_GLBL_CFG0, 0);
   2100	WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
   2101
   2102	WREG32(mmTPC1_QM_GLBL_CFG0, 0);
   2103	WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
   2104
   2105	WREG32(mmTPC2_QM_GLBL_CFG0, 0);
   2106	WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
   2107
   2108	WREG32(mmTPC3_QM_GLBL_CFG0, 0);
   2109	WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
   2110
   2111	WREG32(mmTPC4_QM_GLBL_CFG0, 0);
   2112	WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
   2113
   2114	WREG32(mmTPC5_QM_GLBL_CFG0, 0);
   2115	WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
   2116
   2117	WREG32(mmTPC6_QM_GLBL_CFG0, 0);
   2118	WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
   2119
   2120	WREG32(mmTPC7_QM_GLBL_CFG0, 0);
   2121	WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
   2122}
   2123
   2124/*
   2125 * goya_stop_internal_queues - Stop internal queues
   2126 *
   2127 * @hdev: pointer to hl_device structure
   2128 *
   2129 * Returns 0 on success
   2130 *
   2131 */
   2132static int goya_stop_internal_queues(struct hl_device *hdev)
   2133{
   2134	struct goya_device *goya = hdev->asic_specific;
   2135	int rc, retval = 0;
   2136
   2137	if (!(goya->hw_cap_initialized & HW_CAP_MME))
   2138		goto stop_tpc;
   2139
   2140	/*
   2141	 * Each queue (QMAN) is a separate H/W logic. That means that each
   2142	 * QMAN can be stopped independently and failure to stop one does NOT
   2143	 * mandate we should not try to stop other QMANs
   2144	 */
   2145
   2146	rc = goya_stop_queue(hdev,
   2147			mmMME_QM_GLBL_CFG1,
   2148			mmMME_QM_CP_STS,
   2149			mmMME_QM_GLBL_STS0);
   2150
   2151	if (rc) {
   2152		dev_err(hdev->dev, "failed to stop MME QMAN\n");
   2153		retval = -EIO;
   2154	}
   2155
   2156	rc = goya_stop_queue(hdev,
   2157			mmMME_CMDQ_GLBL_CFG1,
   2158			mmMME_CMDQ_CP_STS,
   2159			mmMME_CMDQ_GLBL_STS0);
   2160
   2161	if (rc) {
   2162		dev_err(hdev->dev, "failed to stop MME CMDQ\n");
   2163		retval = -EIO;
   2164	}
   2165
   2166stop_tpc:
   2167	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
   2168		return retval;
   2169
   2170	rc = goya_stop_queue(hdev,
   2171			mmTPC0_QM_GLBL_CFG1,
   2172			mmTPC0_QM_CP_STS,
   2173			mmTPC0_QM_GLBL_STS0);
   2174
   2175	if (rc) {
   2176		dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
   2177		retval = -EIO;
   2178	}
   2179
   2180	rc = goya_stop_queue(hdev,
   2181			mmTPC0_CMDQ_GLBL_CFG1,
   2182			mmTPC0_CMDQ_CP_STS,
   2183			mmTPC0_CMDQ_GLBL_STS0);
   2184
   2185	if (rc) {
   2186		dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
   2187		retval = -EIO;
   2188	}
   2189
   2190	rc = goya_stop_queue(hdev,
   2191			mmTPC1_QM_GLBL_CFG1,
   2192			mmTPC1_QM_CP_STS,
   2193			mmTPC1_QM_GLBL_STS0);
   2194
   2195	if (rc) {
   2196		dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
   2197		retval = -EIO;
   2198	}
   2199
   2200	rc = goya_stop_queue(hdev,
   2201			mmTPC1_CMDQ_GLBL_CFG1,
   2202			mmTPC1_CMDQ_CP_STS,
   2203			mmTPC1_CMDQ_GLBL_STS0);
   2204
   2205	if (rc) {
   2206		dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
   2207		retval = -EIO;
   2208	}
   2209
   2210	rc = goya_stop_queue(hdev,
   2211			mmTPC2_QM_GLBL_CFG1,
   2212			mmTPC2_QM_CP_STS,
   2213			mmTPC2_QM_GLBL_STS0);
   2214
   2215	if (rc) {
   2216		dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
   2217		retval = -EIO;
   2218	}
   2219
   2220	rc = goya_stop_queue(hdev,
   2221			mmTPC2_CMDQ_GLBL_CFG1,
   2222			mmTPC2_CMDQ_CP_STS,
   2223			mmTPC2_CMDQ_GLBL_STS0);
   2224
   2225	if (rc) {
   2226		dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
   2227		retval = -EIO;
   2228	}
   2229
   2230	rc = goya_stop_queue(hdev,
   2231			mmTPC3_QM_GLBL_CFG1,
   2232			mmTPC3_QM_CP_STS,
   2233			mmTPC3_QM_GLBL_STS0);
   2234
   2235	if (rc) {
   2236		dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
   2237		retval = -EIO;
   2238	}
   2239
   2240	rc = goya_stop_queue(hdev,
   2241			mmTPC3_CMDQ_GLBL_CFG1,
   2242			mmTPC3_CMDQ_CP_STS,
   2243			mmTPC3_CMDQ_GLBL_STS0);
   2244
   2245	if (rc) {
   2246		dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
   2247		retval = -EIO;
   2248	}
   2249
   2250	rc = goya_stop_queue(hdev,
   2251			mmTPC4_QM_GLBL_CFG1,
   2252			mmTPC4_QM_CP_STS,
   2253			mmTPC4_QM_GLBL_STS0);
   2254
   2255	if (rc) {
   2256		dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
   2257		retval = -EIO;
   2258	}
   2259
   2260	rc = goya_stop_queue(hdev,
   2261			mmTPC4_CMDQ_GLBL_CFG1,
   2262			mmTPC4_CMDQ_CP_STS,
   2263			mmTPC4_CMDQ_GLBL_STS0);
   2264
   2265	if (rc) {
   2266		dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
   2267		retval = -EIO;
   2268	}
   2269
   2270	rc = goya_stop_queue(hdev,
   2271			mmTPC5_QM_GLBL_CFG1,
   2272			mmTPC5_QM_CP_STS,
   2273			mmTPC5_QM_GLBL_STS0);
   2274
   2275	if (rc) {
   2276		dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
   2277		retval = -EIO;
   2278	}
   2279
   2280	rc = goya_stop_queue(hdev,
   2281			mmTPC5_CMDQ_GLBL_CFG1,
   2282			mmTPC5_CMDQ_CP_STS,
   2283			mmTPC5_CMDQ_GLBL_STS0);
   2284
   2285	if (rc) {
   2286		dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
   2287		retval = -EIO;
   2288	}
   2289
   2290	rc = goya_stop_queue(hdev,
   2291			mmTPC6_QM_GLBL_CFG1,
   2292			mmTPC6_QM_CP_STS,
   2293			mmTPC6_QM_GLBL_STS0);
   2294
   2295	if (rc) {
   2296		dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
   2297		retval = -EIO;
   2298	}
   2299
   2300	rc = goya_stop_queue(hdev,
   2301			mmTPC6_CMDQ_GLBL_CFG1,
   2302			mmTPC6_CMDQ_CP_STS,
   2303			mmTPC6_CMDQ_GLBL_STS0);
   2304
   2305	if (rc) {
   2306		dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
   2307		retval = -EIO;
   2308	}
   2309
   2310	rc = goya_stop_queue(hdev,
   2311			mmTPC7_QM_GLBL_CFG1,
   2312			mmTPC7_QM_CP_STS,
   2313			mmTPC7_QM_GLBL_STS0);
   2314
   2315	if (rc) {
   2316		dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
   2317		retval = -EIO;
   2318	}
   2319
   2320	rc = goya_stop_queue(hdev,
   2321			mmTPC7_CMDQ_GLBL_CFG1,
   2322			mmTPC7_CMDQ_CP_STS,
   2323			mmTPC7_CMDQ_GLBL_STS0);
   2324
   2325	if (rc) {
   2326		dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
   2327		retval = -EIO;
   2328	}
   2329
   2330	return retval;
   2331}
   2332
   2333static void goya_dma_stall(struct hl_device *hdev)
   2334{
   2335	struct goya_device *goya = hdev->asic_specific;
   2336
   2337	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
   2338		return;
   2339
   2340	WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
   2341	WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
   2342	WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
   2343	WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
   2344	WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
   2345}
   2346
   2347static void goya_tpc_stall(struct hl_device *hdev)
   2348{
   2349	struct goya_device *goya = hdev->asic_specific;
   2350
   2351	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
   2352		return;
   2353
   2354	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
   2355	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
   2356	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
   2357	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
   2358	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
   2359	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
   2360	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
   2361	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
   2362}
   2363
   2364static void goya_mme_stall(struct hl_device *hdev)
   2365{
   2366	struct goya_device *goya = hdev->asic_specific;
   2367
   2368	if (!(goya->hw_cap_initialized & HW_CAP_MME))
   2369		return;
   2370
   2371	WREG32(mmMME_STALL, 0xFFFFFFFF);
   2372}
   2373
   2374static int goya_enable_msix(struct hl_device *hdev)
   2375{
   2376	struct goya_device *goya = hdev->asic_specific;
   2377	int cq_cnt = hdev->asic_prop.completion_queues_count;
   2378	int rc, i, irq_cnt_init, irq;
   2379
   2380	if (goya->hw_cap_initialized & HW_CAP_MSIX)
   2381		return 0;
   2382
   2383	rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
   2384				GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
   2385	if (rc < 0) {
   2386		dev_err(hdev->dev,
   2387			"MSI-X: Failed to enable support -- %d/%d\n",
   2388			GOYA_MSIX_ENTRIES, rc);
   2389		return rc;
   2390	}
   2391
   2392	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
   2393		irq = pci_irq_vector(hdev->pdev, i);
   2394		rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
   2395				&hdev->completion_queue[i]);
   2396		if (rc) {
   2397			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
   2398			goto free_irqs;
   2399		}
   2400	}
   2401
   2402	irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
   2403
   2404	rc = request_irq(irq, hl_irq_handler_eq, 0,
   2405			goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
   2406			&hdev->event_queue);
   2407	if (rc) {
   2408		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
   2409		goto free_irqs;
   2410	}
   2411
   2412	goya->hw_cap_initialized |= HW_CAP_MSIX;
   2413	return 0;
   2414
   2415free_irqs:
   2416	for (i = 0 ; i < irq_cnt_init ; i++)
   2417		free_irq(pci_irq_vector(hdev->pdev, i),
   2418			&hdev->completion_queue[i]);
   2419
   2420	pci_free_irq_vectors(hdev->pdev);
   2421	return rc;
   2422}
   2423
   2424static void goya_sync_irqs(struct hl_device *hdev)
   2425{
   2426	struct goya_device *goya = hdev->asic_specific;
   2427	int i;
   2428
   2429	if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
   2430		return;
   2431
   2432	/* Wait for all pending IRQs to be finished */
   2433	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
   2434		synchronize_irq(pci_irq_vector(hdev->pdev, i));
   2435
   2436	synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
   2437}
   2438
   2439static void goya_disable_msix(struct hl_device *hdev)
   2440{
   2441	struct goya_device *goya = hdev->asic_specific;
   2442	int i, irq;
   2443
   2444	if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
   2445		return;
   2446
   2447	goya_sync_irqs(hdev);
   2448
   2449	irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
   2450	free_irq(irq, &hdev->event_queue);
   2451
   2452	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
   2453		irq = pci_irq_vector(hdev->pdev, i);
   2454		free_irq(irq, &hdev->completion_queue[i]);
   2455	}
   2456
   2457	pci_free_irq_vectors(hdev->pdev);
   2458
   2459	goya->hw_cap_initialized &= ~HW_CAP_MSIX;
   2460}
   2461
   2462static void goya_enable_timestamp(struct hl_device *hdev)
   2463{
   2464	/* Disable the timestamp counter */
   2465	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
   2466
   2467	/* Zero the lower/upper parts of the 64-bit counter */
   2468	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
   2469	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
   2470
   2471	/* Enable the counter */
   2472	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
   2473}
   2474
   2475static void goya_disable_timestamp(struct hl_device *hdev)
   2476{
   2477	/* Disable the timestamp counter */
   2478	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
   2479}
   2480
   2481static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
   2482{
   2483	u32 wait_timeout_ms;
   2484
   2485	if (hdev->pldm)
   2486		wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
   2487	else
   2488		wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
   2489
   2490	goya_stop_external_queues(hdev);
   2491	goya_stop_internal_queues(hdev);
   2492
   2493	msleep(wait_timeout_ms);
   2494
   2495	goya_dma_stall(hdev);
   2496	goya_tpc_stall(hdev);
   2497	goya_mme_stall(hdev);
   2498
   2499	msleep(wait_timeout_ms);
   2500
   2501	goya_disable_external_queues(hdev);
   2502	goya_disable_internal_queues(hdev);
   2503
   2504	goya_disable_timestamp(hdev);
   2505
   2506	if (hard_reset) {
   2507		goya_disable_msix(hdev);
   2508		goya_mmu_remove_device_cpu_mappings(hdev);
   2509	} else {
   2510		goya_sync_irqs(hdev);
   2511	}
   2512}
   2513
   2514/*
   2515 * goya_load_firmware_to_device() - Load LINUX FW code to device.
   2516 * @hdev: Pointer to hl_device structure.
   2517 *
   2518 * Copy LINUX fw code from firmware file to HBM BAR.
   2519 *
   2520 * Return: 0 on success, non-zero for failure.
   2521 */
   2522static int goya_load_firmware_to_device(struct hl_device *hdev)
   2523{
   2524	void __iomem *dst;
   2525
   2526	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
   2527
   2528	return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
   2529}
   2530
   2531/*
   2532 * goya_load_boot_fit_to_device() - Load boot fit to device.
   2533 * @hdev: Pointer to hl_device structure.
   2534 *
   2535 * Copy boot fit file to SRAM BAR.
   2536 *
   2537 * Return: 0 on success, non-zero for failure.
   2538 */
   2539static int goya_load_boot_fit_to_device(struct hl_device *hdev)
   2540{
   2541	void __iomem *dst;
   2542
   2543	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
   2544
   2545	return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
   2546}
   2547
   2548static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
   2549{
   2550	struct dynamic_fw_load_mgr *dynamic_loader;
   2551	struct cpu_dyn_regs *dyn_regs;
   2552
   2553	dynamic_loader = &hdev->fw_loader.dynamic_loader;
   2554
   2555	/*
   2556	 * here we update initial values for few specific dynamic regs (as
   2557	 * before reading the first descriptor from FW those value has to be
   2558	 * hard-coded) in later stages of the protocol those values will be
   2559	 * updated automatically by reading the FW descriptor so data there
   2560	 * will always be up-to-date
   2561	 */
   2562	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
   2563	dyn_regs->kmd_msg_to_cpu =
   2564				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
   2565	dyn_regs->cpu_cmd_status_to_host =
   2566				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
   2567
   2568	dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
   2569}
   2570
   2571static void goya_init_static_firmware_loader(struct hl_device *hdev)
   2572{
   2573	struct static_fw_load_mgr *static_loader;
   2574
   2575	static_loader = &hdev->fw_loader.static_loader;
   2576
   2577	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
   2578	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
   2579	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
   2580	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
   2581	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
   2582	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
   2583	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
   2584	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
   2585	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
   2586	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
   2587	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
   2588	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
   2589}
   2590
   2591static void goya_init_firmware_loader(struct hl_device *hdev)
   2592{
   2593	struct asic_fixed_properties *prop = &hdev->asic_prop;
   2594	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
   2595
   2596	/* fill common fields */
   2597	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
   2598	fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
   2599	fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
   2600	fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
   2601	fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
   2602	fw_loader->skip_bmc = false;
   2603	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
   2604	fw_loader->dram_bar_id = DDR_BAR_ID;
   2605
   2606	if (prop->dynamic_fw_load)
   2607		goya_init_dynamic_firmware_loader(hdev);
   2608	else
   2609		goya_init_static_firmware_loader(hdev);
   2610}
   2611
   2612static int goya_init_cpu(struct hl_device *hdev)
   2613{
   2614	struct goya_device *goya = hdev->asic_specific;
   2615	int rc;
   2616
   2617	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
   2618		return 0;
   2619
   2620	if (goya->hw_cap_initialized & HW_CAP_CPU)
   2621		return 0;
   2622
   2623	/*
   2624	 * Before pushing u-boot/linux to device, need to set the ddr bar to
   2625	 * base address of dram
   2626	 */
   2627	if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
   2628		dev_err(hdev->dev,
   2629			"failed to map DDR bar to DRAM base address\n");
   2630		return -EIO;
   2631	}
   2632
   2633	rc = hl_fw_init_cpu(hdev);
   2634
   2635	if (rc)
   2636		return rc;
   2637
   2638	goya->hw_cap_initialized |= HW_CAP_CPU;
   2639
   2640	return 0;
   2641}
   2642
   2643static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
   2644						u64 phys_addr)
   2645{
   2646	u32 status, timeout_usec;
   2647	int rc;
   2648
   2649	if (hdev->pldm)
   2650		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
   2651	else
   2652		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
   2653
   2654	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
   2655	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
   2656	WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
   2657
   2658	rc = hl_poll_timeout(
   2659		hdev,
   2660		MMU_ASID_BUSY,
   2661		status,
   2662		!(status & 0x80000000),
   2663		1000,
   2664		timeout_usec);
   2665
   2666	if (rc) {
   2667		dev_err(hdev->dev,
   2668			"Timeout during MMU hop0 config of asid %d\n", asid);
   2669		return rc;
   2670	}
   2671
   2672	return 0;
   2673}
   2674
   2675int goya_mmu_init(struct hl_device *hdev)
   2676{
   2677	struct asic_fixed_properties *prop = &hdev->asic_prop;
   2678	struct goya_device *goya = hdev->asic_specific;
   2679	u64 hop0_addr;
   2680	int rc, i;
   2681
   2682	if (!hdev->mmu_enable)
   2683		return 0;
   2684
   2685	if (goya->hw_cap_initialized & HW_CAP_MMU)
   2686		return 0;
   2687
   2688	hdev->dram_default_page_mapping = true;
   2689
   2690	for (i = 0 ; i < prop->max_asid ; i++) {
   2691		hop0_addr = prop->mmu_pgt_addr +
   2692				(i * prop->mmu_hop_table_size);
   2693
   2694		rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
   2695		if (rc) {
   2696			dev_err(hdev->dev,
   2697				"failed to set hop0 addr for asid %d\n", i);
   2698			goto err;
   2699		}
   2700	}
   2701
   2702	goya->hw_cap_initialized |= HW_CAP_MMU;
   2703
   2704	/* init MMU cache manage page */
   2705	WREG32(mmSTLB_CACHE_INV_BASE_39_8,
   2706				lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
   2707	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
   2708
   2709	/* Remove follower feature due to performance bug */
   2710	WREG32_AND(mmSTLB_STLB_FEATURE_EN,
   2711			(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
   2712
   2713	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
   2714
   2715	WREG32(mmMMU_MMU_ENABLE, 1);
   2716	WREG32(mmMMU_SPI_MASK, 0xF);
   2717
   2718	return 0;
   2719
   2720err:
   2721	return rc;
   2722}
   2723
   2724/*
   2725 * goya_hw_init - Goya hardware initialization code
   2726 *
   2727 * @hdev: pointer to hl_device structure
   2728 *
   2729 * Returns 0 on success
   2730 *
   2731 */
   2732static int goya_hw_init(struct hl_device *hdev)
   2733{
   2734	struct asic_fixed_properties *prop = &hdev->asic_prop;
   2735	int rc;
   2736
   2737	/* Perform read from the device to make sure device is up */
   2738	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
   2739
   2740	/*
   2741	 * Let's mark in the H/W that we have reached this point. We check
   2742	 * this value in the reset_before_init function to understand whether
   2743	 * we need to reset the chip before doing H/W init. This register is
   2744	 * cleared by the H/W upon H/W reset
   2745	 */
   2746	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
   2747
   2748	rc = goya_init_cpu(hdev);
   2749	if (rc) {
   2750		dev_err(hdev->dev, "failed to initialize CPU\n");
   2751		return rc;
   2752	}
   2753
   2754	goya_tpc_mbist_workaround(hdev);
   2755
   2756	goya_init_golden_registers(hdev);
   2757
   2758	/*
   2759	 * After CPU initialization is finished, change DDR bar mapping inside
   2760	 * iATU to point to the start address of the MMU page tables
   2761	 */
   2762	if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
   2763			~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
   2764		dev_err(hdev->dev,
   2765			"failed to map DDR bar to MMU page tables\n");
   2766		return -EIO;
   2767	}
   2768
   2769	rc = goya_mmu_init(hdev);
   2770	if (rc)
   2771		return rc;
   2772
   2773	goya_init_security(hdev);
   2774
   2775	goya_init_dma_qmans(hdev);
   2776
   2777	goya_init_mme_qmans(hdev);
   2778
   2779	goya_init_tpc_qmans(hdev);
   2780
   2781	goya_enable_timestamp(hdev);
   2782
   2783	/* MSI-X must be enabled before CPU queues are initialized */
   2784	rc = goya_enable_msix(hdev);
   2785	if (rc)
   2786		goto disable_queues;
   2787
   2788	/* Perform read from the device to flush all MSI-X configuration */
   2789	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
   2790
   2791	return 0;
   2792
   2793disable_queues:
   2794	goya_disable_internal_queues(hdev);
   2795	goya_disable_external_queues(hdev);
   2796
   2797	return rc;
   2798}
   2799
   2800static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
   2801{
   2802	struct goya_device *goya = hdev->asic_specific;
   2803	u32 reset_timeout_ms, cpu_timeout_ms, status;
   2804
   2805	if (hdev->pldm) {
   2806		reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
   2807		cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
   2808	} else {
   2809		reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
   2810		cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
   2811	}
   2812
   2813	if (hard_reset) {
   2814		/* I don't know what is the state of the CPU so make sure it is
   2815		 * stopped in any means necessary
   2816		 */
   2817		WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
   2818		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
   2819			GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
   2820
   2821		msleep(cpu_timeout_ms);
   2822
   2823		goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
   2824		goya_disable_clk_rlx(hdev);
   2825		goya_set_pll_refclk(hdev);
   2826
   2827		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
   2828		dev_dbg(hdev->dev,
   2829			"Issued HARD reset command, going to wait %dms\n",
   2830			reset_timeout_ms);
   2831	} else {
   2832		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
   2833		dev_dbg(hdev->dev,
   2834			"Issued SOFT reset command, going to wait %dms\n",
   2835			reset_timeout_ms);
   2836	}
   2837
   2838	/*
   2839	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
   2840	 * itself is in reset. In either reset we need to wait until the reset
   2841	 * is deasserted
   2842	 */
   2843	msleep(reset_timeout_ms);
   2844
   2845	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
   2846	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
   2847		dev_err(hdev->dev,
   2848			"Timeout while waiting for device to reset 0x%x\n",
   2849			status);
   2850
   2851	if (!hard_reset && goya) {
   2852		goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
   2853						HW_CAP_GOLDEN | HW_CAP_TPC);
   2854		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
   2855				GOYA_ASYNC_EVENT_ID_SOFT_RESET);
   2856		return;
   2857	}
   2858
   2859	/* Chicken bit to re-initiate boot sequencer flow */
   2860	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
   2861		1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
   2862	/* Move boot manager FSM to pre boot sequencer init state */
   2863	WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
   2864			0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
   2865
   2866	if (goya) {
   2867		goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
   2868				HW_CAP_DDR_0 | HW_CAP_DDR_1 |
   2869				HW_CAP_DMA | HW_CAP_MME |
   2870				HW_CAP_MMU | HW_CAP_TPC_MBIST |
   2871				HW_CAP_GOLDEN | HW_CAP_TPC);
   2872
   2873		memset(goya->events_stat, 0, sizeof(goya->events_stat));
   2874	}
   2875}
   2876
   2877int goya_suspend(struct hl_device *hdev)
   2878{
   2879	int rc;
   2880
   2881	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
   2882	if (rc)
   2883		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
   2884
   2885	return rc;
   2886}
   2887
   2888int goya_resume(struct hl_device *hdev)
   2889{
   2890	return goya_init_iatu(hdev);
   2891}
   2892
   2893static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
   2894			void *cpu_addr, dma_addr_t dma_addr, size_t size)
   2895{
   2896	int rc;
   2897
   2898	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
   2899			VM_DONTCOPY | VM_NORESERVE;
   2900
   2901	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
   2902				(dma_addr - HOST_PHYS_BASE), size);
   2903	if (rc)
   2904		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
   2905
   2906	return rc;
   2907}
   2908
   2909void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
   2910{
   2911	u32 db_reg_offset, db_value;
   2912
   2913	switch (hw_queue_id) {
   2914	case GOYA_QUEUE_ID_DMA_0:
   2915		db_reg_offset = mmDMA_QM_0_PQ_PI;
   2916		break;
   2917
   2918	case GOYA_QUEUE_ID_DMA_1:
   2919		db_reg_offset = mmDMA_QM_1_PQ_PI;
   2920		break;
   2921
   2922	case GOYA_QUEUE_ID_DMA_2:
   2923		db_reg_offset = mmDMA_QM_2_PQ_PI;
   2924		break;
   2925
   2926	case GOYA_QUEUE_ID_DMA_3:
   2927		db_reg_offset = mmDMA_QM_3_PQ_PI;
   2928		break;
   2929
   2930	case GOYA_QUEUE_ID_DMA_4:
   2931		db_reg_offset = mmDMA_QM_4_PQ_PI;
   2932		break;
   2933
   2934	case GOYA_QUEUE_ID_CPU_PQ:
   2935		db_reg_offset = mmCPU_IF_PF_PQ_PI;
   2936		break;
   2937
   2938	case GOYA_QUEUE_ID_MME:
   2939		db_reg_offset = mmMME_QM_PQ_PI;
   2940		break;
   2941
   2942	case GOYA_QUEUE_ID_TPC0:
   2943		db_reg_offset = mmTPC0_QM_PQ_PI;
   2944		break;
   2945
   2946	case GOYA_QUEUE_ID_TPC1:
   2947		db_reg_offset = mmTPC1_QM_PQ_PI;
   2948		break;
   2949
   2950	case GOYA_QUEUE_ID_TPC2:
   2951		db_reg_offset = mmTPC2_QM_PQ_PI;
   2952		break;
   2953
   2954	case GOYA_QUEUE_ID_TPC3:
   2955		db_reg_offset = mmTPC3_QM_PQ_PI;
   2956		break;
   2957
   2958	case GOYA_QUEUE_ID_TPC4:
   2959		db_reg_offset = mmTPC4_QM_PQ_PI;
   2960		break;
   2961
   2962	case GOYA_QUEUE_ID_TPC5:
   2963		db_reg_offset = mmTPC5_QM_PQ_PI;
   2964		break;
   2965
   2966	case GOYA_QUEUE_ID_TPC6:
   2967		db_reg_offset = mmTPC6_QM_PQ_PI;
   2968		break;
   2969
   2970	case GOYA_QUEUE_ID_TPC7:
   2971		db_reg_offset = mmTPC7_QM_PQ_PI;
   2972		break;
   2973
   2974	default:
   2975		/* Should never get here */
   2976		dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
   2977			hw_queue_id);
   2978		return;
   2979	}
   2980
   2981	db_value = pi;
   2982
   2983	/* ring the doorbell */
   2984	WREG32(db_reg_offset, db_value);
   2985
   2986	if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
   2987		/* make sure device CPU will read latest data from host */
   2988		mb();
   2989		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
   2990				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
   2991	}
   2992}
   2993
   2994void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
   2995{
   2996	/* The QMANs are on the SRAM so need to copy to IO space */
   2997	memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
   2998}
   2999
   3000static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
   3001					dma_addr_t *dma_handle, gfp_t flags)
   3002{
   3003	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
   3004						dma_handle, flags);
   3005
   3006	/* Shift to the device's base physical address of host memory */
   3007	if (kernel_addr)
   3008		*dma_handle += HOST_PHYS_BASE;
   3009
   3010	return kernel_addr;
   3011}
   3012
   3013static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
   3014					void *cpu_addr, dma_addr_t dma_handle)
   3015{
   3016	/* Cancel the device's base physical address of host memory */
   3017	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
   3018
   3019	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
   3020}
   3021
   3022int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
   3023{
   3024	return 0;
   3025}
   3026
   3027void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
   3028				dma_addr_t *dma_handle,	u16 *queue_len)
   3029{
   3030	void *base;
   3031	u32 offset;
   3032
   3033	*dma_handle = hdev->asic_prop.sram_base_address;
   3034
   3035	base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
   3036
   3037	switch (queue_id) {
   3038	case GOYA_QUEUE_ID_MME:
   3039		offset = MME_QMAN_BASE_OFFSET;
   3040		*queue_len = MME_QMAN_LENGTH;
   3041		break;
   3042	case GOYA_QUEUE_ID_TPC0:
   3043		offset = TPC0_QMAN_BASE_OFFSET;
   3044		*queue_len = TPC_QMAN_LENGTH;
   3045		break;
   3046	case GOYA_QUEUE_ID_TPC1:
   3047		offset = TPC1_QMAN_BASE_OFFSET;
   3048		*queue_len = TPC_QMAN_LENGTH;
   3049		break;
   3050	case GOYA_QUEUE_ID_TPC2:
   3051		offset = TPC2_QMAN_BASE_OFFSET;
   3052		*queue_len = TPC_QMAN_LENGTH;
   3053		break;
   3054	case GOYA_QUEUE_ID_TPC3:
   3055		offset = TPC3_QMAN_BASE_OFFSET;
   3056		*queue_len = TPC_QMAN_LENGTH;
   3057		break;
   3058	case GOYA_QUEUE_ID_TPC4:
   3059		offset = TPC4_QMAN_BASE_OFFSET;
   3060		*queue_len = TPC_QMAN_LENGTH;
   3061		break;
   3062	case GOYA_QUEUE_ID_TPC5:
   3063		offset = TPC5_QMAN_BASE_OFFSET;
   3064		*queue_len = TPC_QMAN_LENGTH;
   3065		break;
   3066	case GOYA_QUEUE_ID_TPC6:
   3067		offset = TPC6_QMAN_BASE_OFFSET;
   3068		*queue_len = TPC_QMAN_LENGTH;
   3069		break;
   3070	case GOYA_QUEUE_ID_TPC7:
   3071		offset = TPC7_QMAN_BASE_OFFSET;
   3072		*queue_len = TPC_QMAN_LENGTH;
   3073		break;
   3074	default:
   3075		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
   3076		return NULL;
   3077	}
   3078
   3079	base += offset;
   3080	*dma_handle += offset;
   3081
   3082	return base;
   3083}
   3084
   3085static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
   3086{
   3087	struct packet_msg_prot *fence_pkt;
   3088	u32 *fence_ptr;
   3089	dma_addr_t fence_dma_addr;
   3090	struct hl_cb *cb;
   3091	u32 tmp, timeout;
   3092	int rc;
   3093
   3094	if (hdev->pldm)
   3095		timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
   3096	else
   3097		timeout = HL_DEVICE_TIMEOUT_USEC;
   3098
   3099	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
   3100		dev_err_ratelimited(hdev->dev,
   3101			"Can't send driver job on QMAN0 because the device is not idle\n");
   3102		return -EBUSY;
   3103	}
   3104
   3105	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
   3106							&fence_dma_addr);
   3107	if (!fence_ptr) {
   3108		dev_err(hdev->dev,
   3109			"Failed to allocate fence memory for QMAN0\n");
   3110		return -ENOMEM;
   3111	}
   3112
   3113	goya_qman0_set_security(hdev, true);
   3114
   3115	cb = job->patched_cb;
   3116
   3117	fence_pkt = cb->kernel_address +
   3118			job->job_cb_size - sizeof(struct packet_msg_prot);
   3119
   3120	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
   3121			(1 << GOYA_PKT_CTL_EB_SHIFT) |
   3122			(1 << GOYA_PKT_CTL_MB_SHIFT);
   3123	fence_pkt->ctl = cpu_to_le32(tmp);
   3124	fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
   3125	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
   3126
   3127	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
   3128					job->job_cb_size, cb->bus_address);
   3129	if (rc) {
   3130		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
   3131		goto free_fence_ptr;
   3132	}
   3133
   3134	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
   3135				(tmp == GOYA_QMAN0_FENCE_VAL), 1000,
   3136				timeout, true);
   3137
   3138	hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
   3139
   3140	if (rc == -ETIMEDOUT) {
   3141		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
   3142		goto free_fence_ptr;
   3143	}
   3144
   3145free_fence_ptr:
   3146	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
   3147					fence_dma_addr);
   3148
   3149	goya_qman0_set_security(hdev, false);
   3150
   3151	return rc;
   3152}
   3153
   3154int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
   3155				u32 timeout, u64 *result)
   3156{
   3157	struct goya_device *goya = hdev->asic_specific;
   3158
   3159	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
   3160		if (result)
   3161			*result = 0;
   3162		return 0;
   3163	}
   3164
   3165	if (!timeout)
   3166		timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
   3167
   3168	return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
   3169					timeout, result);
   3170}
   3171
   3172int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
   3173{
   3174	struct packet_msg_prot *fence_pkt;
   3175	dma_addr_t pkt_dma_addr;
   3176	u32 fence_val, tmp;
   3177	dma_addr_t fence_dma_addr;
   3178	u32 *fence_ptr;
   3179	int rc;
   3180
   3181	fence_val = GOYA_QMAN0_FENCE_VAL;
   3182
   3183	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
   3184							&fence_dma_addr);
   3185	if (!fence_ptr) {
   3186		dev_err(hdev->dev,
   3187			"Failed to allocate memory for H/W queue %d testing\n",
   3188			hw_queue_id);
   3189		return -ENOMEM;
   3190	}
   3191
   3192	*fence_ptr = 0;
   3193
   3194	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
   3195					sizeof(struct packet_msg_prot),
   3196					GFP_KERNEL, &pkt_dma_addr);
   3197	if (!fence_pkt) {
   3198		dev_err(hdev->dev,
   3199			"Failed to allocate packet for H/W queue %d testing\n",
   3200			hw_queue_id);
   3201		rc = -ENOMEM;
   3202		goto free_fence_ptr;
   3203	}
   3204
   3205	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
   3206			(1 << GOYA_PKT_CTL_EB_SHIFT) |
   3207			(1 << GOYA_PKT_CTL_MB_SHIFT);
   3208	fence_pkt->ctl = cpu_to_le32(tmp);
   3209	fence_pkt->value = cpu_to_le32(fence_val);
   3210	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
   3211
   3212	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
   3213					sizeof(struct packet_msg_prot),
   3214					pkt_dma_addr);
   3215	if (rc) {
   3216		dev_err(hdev->dev,
   3217			"Failed to send fence packet to H/W queue %d\n",
   3218			hw_queue_id);
   3219		goto free_pkt;
   3220	}
   3221
   3222	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
   3223					1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
   3224
   3225	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
   3226
   3227	if (rc == -ETIMEDOUT) {
   3228		dev_err(hdev->dev,
   3229			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
   3230			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
   3231		rc = -EIO;
   3232	}
   3233
   3234free_pkt:
   3235	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
   3236					pkt_dma_addr);
   3237free_fence_ptr:
   3238	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
   3239					fence_dma_addr);
   3240	return rc;
   3241}
   3242
   3243int goya_test_cpu_queue(struct hl_device *hdev)
   3244{
   3245	struct goya_device *goya = hdev->asic_specific;
   3246
   3247	/*
   3248	 * check capability here as send_cpu_message() won't update the result
   3249	 * value if no capability
   3250	 */
   3251	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
   3252		return 0;
   3253
   3254	return hl_fw_test_cpu_queue(hdev);
   3255}
   3256
   3257int goya_test_queues(struct hl_device *hdev)
   3258{
   3259	int i, rc, ret_val = 0;
   3260
   3261	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
   3262		rc = goya_test_queue(hdev, i);
   3263		if (rc)
   3264			ret_val = -EINVAL;
   3265	}
   3266
   3267	return ret_val;
   3268}
   3269
   3270static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
   3271					gfp_t mem_flags, dma_addr_t *dma_handle)
   3272{
   3273	void *kernel_addr;
   3274
   3275	if (size > GOYA_DMA_POOL_BLK_SIZE)
   3276		return NULL;
   3277
   3278	kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
   3279
   3280	/* Shift to the device's base physical address of host memory */
   3281	if (kernel_addr)
   3282		*dma_handle += HOST_PHYS_BASE;
   3283
   3284	return kernel_addr;
   3285}
   3286
   3287static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
   3288				dma_addr_t dma_addr)
   3289{
   3290	/* Cancel the device's base physical address of host memory */
   3291	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
   3292
   3293	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
   3294}
   3295
   3296void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
   3297					dma_addr_t *dma_handle)
   3298{
   3299	void *vaddr;
   3300
   3301	vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
   3302	*dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
   3303			VA_CPU_ACCESSIBLE_MEM_ADDR;
   3304
   3305	return vaddr;
   3306}
   3307
   3308void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
   3309					void *vaddr)
   3310{
   3311	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
   3312}
   3313
   3314u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
   3315{
   3316	struct scatterlist *sg, *sg_next_iter;
   3317	u32 count, dma_desc_cnt;
   3318	u64 len, len_next;
   3319	dma_addr_t addr, addr_next;
   3320
   3321	dma_desc_cnt = 0;
   3322
   3323	for_each_sgtable_dma_sg(sgt, sg, count) {
   3324		len = sg_dma_len(sg);
   3325		addr = sg_dma_address(sg);
   3326
   3327		if (len == 0)
   3328			break;
   3329
   3330		while ((count + 1) < sgt->nents) {
   3331			sg_next_iter = sg_next(sg);
   3332			len_next = sg_dma_len(sg_next_iter);
   3333			addr_next = sg_dma_address(sg_next_iter);
   3334
   3335			if (len_next == 0)
   3336				break;
   3337
   3338			if ((addr + len == addr_next) &&
   3339				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
   3340				len += len_next;
   3341				count++;
   3342				sg = sg_next_iter;
   3343			} else {
   3344				break;
   3345			}
   3346		}
   3347
   3348		dma_desc_cnt++;
   3349	}
   3350
   3351	return dma_desc_cnt * sizeof(struct packet_lin_dma);
   3352}
   3353
   3354static int goya_pin_memory_before_cs(struct hl_device *hdev,
   3355				struct hl_cs_parser *parser,
   3356				struct packet_lin_dma *user_dma_pkt,
   3357				u64 addr, enum dma_data_direction dir)
   3358{
   3359	struct hl_userptr *userptr;
   3360	int rc;
   3361
   3362	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
   3363			parser->job_userptr_list, &userptr))
   3364		goto already_pinned;
   3365
   3366	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
   3367	if (!userptr)
   3368		return -ENOMEM;
   3369
   3370	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
   3371				userptr);
   3372	if (rc)
   3373		goto free_userptr;
   3374
   3375	list_add_tail(&userptr->job_node, parser->job_userptr_list);
   3376
   3377	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
   3378	if (rc) {
   3379		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
   3380		goto unpin_memory;
   3381	}
   3382
   3383	userptr->dma_mapped = true;
   3384	userptr->dir = dir;
   3385
   3386already_pinned:
   3387	parser->patched_cb_size +=
   3388			goya_get_dma_desc_list_size(hdev, userptr->sgt);
   3389
   3390	return 0;
   3391
   3392unpin_memory:
   3393	list_del(&userptr->job_node);
   3394	hl_unpin_host_memory(hdev, userptr);
   3395free_userptr:
   3396	kfree(userptr);
   3397	return rc;
   3398}
   3399
   3400static int goya_validate_dma_pkt_host(struct hl_device *hdev,
   3401				struct hl_cs_parser *parser,
   3402				struct packet_lin_dma *user_dma_pkt)
   3403{
   3404	u64 device_memory_addr, addr;
   3405	enum dma_data_direction dir;
   3406	enum goya_dma_direction user_dir;
   3407	bool sram_addr = true;
   3408	bool skip_host_mem_pin = false;
   3409	bool user_memset;
   3410	u32 ctl;
   3411	int rc = 0;
   3412
   3413	ctl = le32_to_cpu(user_dma_pkt->ctl);
   3414
   3415	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
   3416			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
   3417
   3418	user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
   3419			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
   3420
   3421	switch (user_dir) {
   3422	case DMA_HOST_TO_DRAM:
   3423		dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
   3424		dir = DMA_TO_DEVICE;
   3425		sram_addr = false;
   3426		addr = le64_to_cpu(user_dma_pkt->src_addr);
   3427		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3428		if (user_memset)
   3429			skip_host_mem_pin = true;
   3430		break;
   3431
   3432	case DMA_DRAM_TO_HOST:
   3433		dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
   3434		dir = DMA_FROM_DEVICE;
   3435		sram_addr = false;
   3436		addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3437		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
   3438		break;
   3439
   3440	case DMA_HOST_TO_SRAM:
   3441		dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
   3442		dir = DMA_TO_DEVICE;
   3443		addr = le64_to_cpu(user_dma_pkt->src_addr);
   3444		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3445		if (user_memset)
   3446			skip_host_mem_pin = true;
   3447		break;
   3448
   3449	case DMA_SRAM_TO_HOST:
   3450		dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
   3451		dir = DMA_FROM_DEVICE;
   3452		addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3453		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
   3454		break;
   3455	default:
   3456		dev_err(hdev->dev, "DMA direction is undefined\n");
   3457		return -EFAULT;
   3458	}
   3459
   3460	if (sram_addr) {
   3461		if (!hl_mem_area_inside_range(device_memory_addr,
   3462				le32_to_cpu(user_dma_pkt->tsize),
   3463				hdev->asic_prop.sram_user_base_address,
   3464				hdev->asic_prop.sram_end_address)) {
   3465
   3466			dev_err(hdev->dev,
   3467				"SRAM address 0x%llx + 0x%x is invalid\n",
   3468				device_memory_addr,
   3469				user_dma_pkt->tsize);
   3470			return -EFAULT;
   3471		}
   3472	} else {
   3473		if (!hl_mem_area_inside_range(device_memory_addr,
   3474				le32_to_cpu(user_dma_pkt->tsize),
   3475				hdev->asic_prop.dram_user_base_address,
   3476				hdev->asic_prop.dram_end_address)) {
   3477
   3478			dev_err(hdev->dev,
   3479				"DRAM address 0x%llx + 0x%x is invalid\n",
   3480				device_memory_addr,
   3481				user_dma_pkt->tsize);
   3482			return -EFAULT;
   3483		}
   3484	}
   3485
   3486	if (skip_host_mem_pin)
   3487		parser->patched_cb_size += sizeof(*user_dma_pkt);
   3488	else {
   3489		if ((dir == DMA_TO_DEVICE) &&
   3490				(parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
   3491			dev_err(hdev->dev,
   3492				"Can't DMA from host on queue other then 1\n");
   3493			return -EFAULT;
   3494		}
   3495
   3496		rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
   3497						addr, dir);
   3498	}
   3499
   3500	return rc;
   3501}
   3502
   3503static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
   3504				struct hl_cs_parser *parser,
   3505				struct packet_lin_dma *user_dma_pkt)
   3506{
   3507	u64 sram_memory_addr, dram_memory_addr;
   3508	enum goya_dma_direction user_dir;
   3509	u32 ctl;
   3510
   3511	ctl = le32_to_cpu(user_dma_pkt->ctl);
   3512	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
   3513			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
   3514
   3515	if (user_dir == DMA_DRAM_TO_SRAM) {
   3516		dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
   3517		dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
   3518		sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3519	} else {
   3520		dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
   3521		sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
   3522		dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3523	}
   3524
   3525	if (!hl_mem_area_inside_range(sram_memory_addr,
   3526				le32_to_cpu(user_dma_pkt->tsize),
   3527				hdev->asic_prop.sram_user_base_address,
   3528				hdev->asic_prop.sram_end_address)) {
   3529		dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
   3530			sram_memory_addr, user_dma_pkt->tsize);
   3531		return -EFAULT;
   3532	}
   3533
   3534	if (!hl_mem_area_inside_range(dram_memory_addr,
   3535				le32_to_cpu(user_dma_pkt->tsize),
   3536				hdev->asic_prop.dram_user_base_address,
   3537				hdev->asic_prop.dram_end_address)) {
   3538		dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
   3539			dram_memory_addr, user_dma_pkt->tsize);
   3540		return -EFAULT;
   3541	}
   3542
   3543	parser->patched_cb_size += sizeof(*user_dma_pkt);
   3544
   3545	return 0;
   3546}
   3547
   3548static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
   3549				struct hl_cs_parser *parser,
   3550				struct packet_lin_dma *user_dma_pkt)
   3551{
   3552	enum goya_dma_direction user_dir;
   3553	u32 ctl;
   3554	int rc;
   3555
   3556	dev_dbg(hdev->dev, "DMA packet details:\n");
   3557	dev_dbg(hdev->dev, "source == 0x%llx\n",
   3558		le64_to_cpu(user_dma_pkt->src_addr));
   3559	dev_dbg(hdev->dev, "destination == 0x%llx\n",
   3560		le64_to_cpu(user_dma_pkt->dst_addr));
   3561	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
   3562
   3563	ctl = le32_to_cpu(user_dma_pkt->ctl);
   3564	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
   3565			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
   3566
   3567	/*
   3568	 * Special handling for DMA with size 0. The H/W has a bug where
   3569	 * this can cause the QMAN DMA to get stuck, so block it here.
   3570	 */
   3571	if (user_dma_pkt->tsize == 0) {
   3572		dev_err(hdev->dev,
   3573			"Got DMA with size 0, might reset the device\n");
   3574		return -EINVAL;
   3575	}
   3576
   3577	if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
   3578		rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
   3579	else
   3580		rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
   3581
   3582	return rc;
   3583}
   3584
   3585static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
   3586				struct hl_cs_parser *parser,
   3587				struct packet_lin_dma *user_dma_pkt)
   3588{
   3589	dev_dbg(hdev->dev, "DMA packet details:\n");
   3590	dev_dbg(hdev->dev, "source == 0x%llx\n",
   3591		le64_to_cpu(user_dma_pkt->src_addr));
   3592	dev_dbg(hdev->dev, "destination == 0x%llx\n",
   3593		le64_to_cpu(user_dma_pkt->dst_addr));
   3594	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
   3595
   3596	/*
   3597	 * WA for HW-23.
   3598	 * We can't allow user to read from Host using QMANs other than 1.
   3599	 * PMMU and HPMMU addresses are equal, check only one of them.
   3600	 */
   3601	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
   3602		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
   3603				le32_to_cpu(user_dma_pkt->tsize),
   3604				hdev->asic_prop.pmmu.start_addr,
   3605				hdev->asic_prop.pmmu.end_addr)) {
   3606		dev_err(hdev->dev,
   3607			"Can't DMA from host on queue other then 1\n");
   3608		return -EFAULT;
   3609	}
   3610
   3611	if (user_dma_pkt->tsize == 0) {
   3612		dev_err(hdev->dev,
   3613			"Got DMA with size 0, might reset the device\n");
   3614		return -EINVAL;
   3615	}
   3616
   3617	parser->patched_cb_size += sizeof(*user_dma_pkt);
   3618
   3619	return 0;
   3620}
   3621
   3622static int goya_validate_wreg32(struct hl_device *hdev,
   3623				struct hl_cs_parser *parser,
   3624				struct packet_wreg32 *wreg_pkt)
   3625{
   3626	struct goya_device *goya = hdev->asic_specific;
   3627	u32 sob_start_addr, sob_end_addr;
   3628	u16 reg_offset;
   3629
   3630	reg_offset = le32_to_cpu(wreg_pkt->ctl) &
   3631			GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
   3632
   3633	dev_dbg(hdev->dev, "WREG32 packet details:\n");
   3634	dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
   3635	dev_dbg(hdev->dev, "value      == 0x%x\n",
   3636		le32_to_cpu(wreg_pkt->value));
   3637
   3638	if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
   3639		dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
   3640			reg_offset);
   3641		return -EPERM;
   3642	}
   3643
   3644	/*
   3645	 * With MMU, DMA channels are not secured, so it doesn't matter where
   3646	 * the WR COMP will be written to because it will go out with
   3647	 * non-secured property
   3648	 */
   3649	if (goya->hw_cap_initialized & HW_CAP_MMU)
   3650		return 0;
   3651
   3652	sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
   3653	sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
   3654
   3655	if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
   3656			(le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
   3657
   3658		dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
   3659			wreg_pkt->value);
   3660		return -EPERM;
   3661	}
   3662
   3663	return 0;
   3664}
   3665
   3666static int goya_validate_cb(struct hl_device *hdev,
   3667			struct hl_cs_parser *parser, bool is_mmu)
   3668{
   3669	u32 cb_parsed_length = 0;
   3670	int rc = 0;
   3671
   3672	parser->patched_cb_size = 0;
   3673
   3674	/* cb_user_size is more than 0 so loop will always be executed */
   3675	while (cb_parsed_length < parser->user_cb_size) {
   3676		enum packet_id pkt_id;
   3677		u16 pkt_size;
   3678		struct goya_packet *user_pkt;
   3679
   3680		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
   3681
   3682		pkt_id = (enum packet_id) (
   3683				(le64_to_cpu(user_pkt->header) &
   3684				PACKET_HEADER_PACKET_ID_MASK) >>
   3685					PACKET_HEADER_PACKET_ID_SHIFT);
   3686
   3687		if (!validate_packet_id(pkt_id)) {
   3688			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
   3689			rc = -EINVAL;
   3690			break;
   3691		}
   3692
   3693		pkt_size = goya_packet_sizes[pkt_id];
   3694		cb_parsed_length += pkt_size;
   3695		if (cb_parsed_length > parser->user_cb_size) {
   3696			dev_err(hdev->dev,
   3697				"packet 0x%x is out of CB boundary\n", pkt_id);
   3698			rc = -EINVAL;
   3699			break;
   3700		}
   3701
   3702		switch (pkt_id) {
   3703		case PACKET_WREG_32:
   3704			/*
   3705			 * Although it is validated after copy in patch_cb(),
   3706			 * need to validate here as well because patch_cb() is
   3707			 * not called in MMU path while this function is called
   3708			 */
   3709			rc = goya_validate_wreg32(hdev,
   3710				parser, (struct packet_wreg32 *) user_pkt);
   3711			parser->patched_cb_size += pkt_size;
   3712			break;
   3713
   3714		case PACKET_WREG_BULK:
   3715			dev_err(hdev->dev,
   3716				"User not allowed to use WREG_BULK\n");
   3717			rc = -EPERM;
   3718			break;
   3719
   3720		case PACKET_MSG_PROT:
   3721			dev_err(hdev->dev,
   3722				"User not allowed to use MSG_PROT\n");
   3723			rc = -EPERM;
   3724			break;
   3725
   3726		case PACKET_CP_DMA:
   3727			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
   3728			rc = -EPERM;
   3729			break;
   3730
   3731		case PACKET_STOP:
   3732			dev_err(hdev->dev, "User not allowed to use STOP\n");
   3733			rc = -EPERM;
   3734			break;
   3735
   3736		case PACKET_LIN_DMA:
   3737			if (is_mmu)
   3738				rc = goya_validate_dma_pkt_mmu(hdev, parser,
   3739					(struct packet_lin_dma *) user_pkt);
   3740			else
   3741				rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
   3742					(struct packet_lin_dma *) user_pkt);
   3743			break;
   3744
   3745		case PACKET_MSG_LONG:
   3746		case PACKET_MSG_SHORT:
   3747		case PACKET_FENCE:
   3748		case PACKET_NOP:
   3749			parser->patched_cb_size += pkt_size;
   3750			break;
   3751
   3752		default:
   3753			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
   3754				pkt_id);
   3755			rc = -EINVAL;
   3756			break;
   3757		}
   3758
   3759		if (rc)
   3760			break;
   3761	}
   3762
   3763	/*
   3764	 * The new CB should have space at the end for two MSG_PROT packets:
   3765	 * 1. A packet that will act as a completion packet
   3766	 * 2. A packet that will generate MSI-X interrupt
   3767	 */
   3768	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
   3769
   3770	return rc;
   3771}
   3772
   3773static int goya_patch_dma_packet(struct hl_device *hdev,
   3774				struct hl_cs_parser *parser,
   3775				struct packet_lin_dma *user_dma_pkt,
   3776				struct packet_lin_dma *new_dma_pkt,
   3777				u32 *new_dma_pkt_size)
   3778{
   3779	struct hl_userptr *userptr;
   3780	struct scatterlist *sg, *sg_next_iter;
   3781	u32 count, dma_desc_cnt;
   3782	u64 len, len_next;
   3783	dma_addr_t dma_addr, dma_addr_next;
   3784	enum goya_dma_direction user_dir;
   3785	u64 device_memory_addr, addr;
   3786	enum dma_data_direction dir;
   3787	struct sg_table *sgt;
   3788	bool skip_host_mem_pin = false;
   3789	bool user_memset;
   3790	u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
   3791
   3792	ctl = le32_to_cpu(user_dma_pkt->ctl);
   3793
   3794	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
   3795			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
   3796
   3797	user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
   3798			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
   3799
   3800	if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
   3801			(user_dma_pkt->tsize == 0)) {
   3802		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
   3803		*new_dma_pkt_size = sizeof(*new_dma_pkt);
   3804		return 0;
   3805	}
   3806
   3807	if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
   3808		addr = le64_to_cpu(user_dma_pkt->src_addr);
   3809		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3810		dir = DMA_TO_DEVICE;
   3811		if (user_memset)
   3812			skip_host_mem_pin = true;
   3813	} else {
   3814		addr = le64_to_cpu(user_dma_pkt->dst_addr);
   3815		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
   3816		dir = DMA_FROM_DEVICE;
   3817	}
   3818
   3819	if ((!skip_host_mem_pin) &&
   3820		(hl_userptr_is_pinned(hdev, addr,
   3821			le32_to_cpu(user_dma_pkt->tsize),
   3822			parser->job_userptr_list, &userptr) == false)) {
   3823		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
   3824				addr, user_dma_pkt->tsize);
   3825		return -EFAULT;
   3826	}
   3827
   3828	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
   3829		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
   3830		*new_dma_pkt_size = sizeof(*user_dma_pkt);
   3831		return 0;
   3832	}
   3833
   3834	user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
   3835
   3836	user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
   3837
   3838	sgt = userptr->sgt;
   3839	dma_desc_cnt = 0;
   3840
   3841	for_each_sgtable_dma_sg(sgt, sg, count) {
   3842		len = sg_dma_len(sg);
   3843		dma_addr = sg_dma_address(sg);
   3844
   3845		if (len == 0)
   3846			break;
   3847
   3848		while ((count + 1) < sgt->nents) {
   3849			sg_next_iter = sg_next(sg);
   3850			len_next = sg_dma_len(sg_next_iter);
   3851			dma_addr_next = sg_dma_address(sg_next_iter);
   3852
   3853			if (len_next == 0)
   3854				break;
   3855
   3856			if ((dma_addr + len == dma_addr_next) &&
   3857				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
   3858				len += len_next;
   3859				count++;
   3860				sg = sg_next_iter;
   3861			} else {
   3862				break;
   3863			}
   3864		}
   3865
   3866		ctl = le32_to_cpu(user_dma_pkt->ctl);
   3867		if (likely(dma_desc_cnt))
   3868			ctl &= ~GOYA_PKT_CTL_EB_MASK;
   3869		ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
   3870				GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
   3871		new_dma_pkt->ctl = cpu_to_le32(ctl);
   3872		new_dma_pkt->tsize = cpu_to_le32((u32) len);
   3873
   3874		if (dir == DMA_TO_DEVICE) {
   3875			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
   3876			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
   3877		} else {
   3878			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
   3879			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
   3880		}
   3881
   3882		if (!user_memset)
   3883			device_memory_addr += len;
   3884		dma_desc_cnt++;
   3885		new_dma_pkt++;
   3886	}
   3887
   3888	if (!dma_desc_cnt) {
   3889		dev_err(hdev->dev,
   3890			"Error of 0 SG entries when patching DMA packet\n");
   3891		return -EFAULT;
   3892	}
   3893
   3894	/* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
   3895	new_dma_pkt--;
   3896	new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
   3897
   3898	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
   3899
   3900	return 0;
   3901}
   3902
   3903static int goya_patch_cb(struct hl_device *hdev,
   3904				struct hl_cs_parser *parser)
   3905{
   3906	u32 cb_parsed_length = 0;
   3907	u32 cb_patched_cur_length = 0;
   3908	int rc = 0;
   3909
   3910	/* cb_user_size is more than 0 so loop will always be executed */
   3911	while (cb_parsed_length < parser->user_cb_size) {
   3912		enum packet_id pkt_id;
   3913		u16 pkt_size;
   3914		u32 new_pkt_size = 0;
   3915		struct goya_packet *user_pkt, *kernel_pkt;
   3916
   3917		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
   3918		kernel_pkt = parser->patched_cb->kernel_address +
   3919					cb_patched_cur_length;
   3920
   3921		pkt_id = (enum packet_id) (
   3922				(le64_to_cpu(user_pkt->header) &
   3923				PACKET_HEADER_PACKET_ID_MASK) >>
   3924					PACKET_HEADER_PACKET_ID_SHIFT);
   3925
   3926		if (!validate_packet_id(pkt_id)) {
   3927			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
   3928			rc = -EINVAL;
   3929			break;
   3930		}
   3931
   3932		pkt_size = goya_packet_sizes[pkt_id];
   3933		cb_parsed_length += pkt_size;
   3934		if (cb_parsed_length > parser->user_cb_size) {
   3935			dev_err(hdev->dev,
   3936				"packet 0x%x is out of CB boundary\n", pkt_id);
   3937			rc = -EINVAL;
   3938			break;
   3939		}
   3940
   3941		switch (pkt_id) {
   3942		case PACKET_LIN_DMA:
   3943			rc = goya_patch_dma_packet(hdev, parser,
   3944					(struct packet_lin_dma *) user_pkt,
   3945					(struct packet_lin_dma *) kernel_pkt,
   3946					&new_pkt_size);
   3947			cb_patched_cur_length += new_pkt_size;
   3948			break;
   3949
   3950		case PACKET_WREG_32:
   3951			memcpy(kernel_pkt, user_pkt, pkt_size);
   3952			cb_patched_cur_length += pkt_size;
   3953			rc = goya_validate_wreg32(hdev, parser,
   3954					(struct packet_wreg32 *) kernel_pkt);
   3955			break;
   3956
   3957		case PACKET_WREG_BULK:
   3958			dev_err(hdev->dev,
   3959				"User not allowed to use WREG_BULK\n");
   3960			rc = -EPERM;
   3961			break;
   3962
   3963		case PACKET_MSG_PROT:
   3964			dev_err(hdev->dev,
   3965				"User not allowed to use MSG_PROT\n");
   3966			rc = -EPERM;
   3967			break;
   3968
   3969		case PACKET_CP_DMA:
   3970			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
   3971			rc = -EPERM;
   3972			break;
   3973
   3974		case PACKET_STOP:
   3975			dev_err(hdev->dev, "User not allowed to use STOP\n");
   3976			rc = -EPERM;
   3977			break;
   3978
   3979		case PACKET_MSG_LONG:
   3980		case PACKET_MSG_SHORT:
   3981		case PACKET_FENCE:
   3982		case PACKET_NOP:
   3983			memcpy(kernel_pkt, user_pkt, pkt_size);
   3984			cb_patched_cur_length += pkt_size;
   3985			break;
   3986
   3987		default:
   3988			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
   3989				pkt_id);
   3990			rc = -EINVAL;
   3991			break;
   3992		}
   3993
   3994		if (rc)
   3995			break;
   3996	}
   3997
   3998	return rc;
   3999}
   4000
   4001static int goya_parse_cb_mmu(struct hl_device *hdev,
   4002		struct hl_cs_parser *parser)
   4003{
   4004	u64 handle;
   4005	u32 patched_cb_size;
   4006	struct hl_cb *user_cb;
   4007	int rc;
   4008
   4009	/*
   4010	 * The new CB should have space at the end for two MSG_PROT pkt:
   4011	 * 1. A packet that will act as a completion packet
   4012	 * 2. A packet that will generate MSI-X interrupt
   4013	 */
   4014	parser->patched_cb_size = parser->user_cb_size +
   4015			sizeof(struct packet_msg_prot) * 2;
   4016
   4017	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
   4018				parser->patched_cb_size, false, false,
   4019				&handle);
   4020
   4021	if (rc) {
   4022		dev_err(hdev->dev,
   4023			"Failed to allocate patched CB for DMA CS %d\n",
   4024			rc);
   4025		return rc;
   4026	}
   4027
   4028	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
   4029	/* hl_cb_get should never fail here */
   4030	if (!parser->patched_cb) {
   4031		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
   4032		rc = -EFAULT;
   4033		goto out;
   4034	}
   4035
   4036	/*
   4037	 * The check that parser->user_cb_size <= parser->user_cb->size was done
   4038	 * in validate_queue_index().
   4039	 */
   4040	memcpy(parser->patched_cb->kernel_address,
   4041		parser->user_cb->kernel_address,
   4042		parser->user_cb_size);
   4043
   4044	patched_cb_size = parser->patched_cb_size;
   4045
   4046	/* validate patched CB instead of user CB */
   4047	user_cb = parser->user_cb;
   4048	parser->user_cb = parser->patched_cb;
   4049	rc = goya_validate_cb(hdev, parser, true);
   4050	parser->user_cb = user_cb;
   4051
   4052	if (rc) {
   4053		hl_cb_put(parser->patched_cb);
   4054		goto out;
   4055	}
   4056
   4057	if (patched_cb_size != parser->patched_cb_size) {
   4058		dev_err(hdev->dev, "user CB size mismatch\n");
   4059		hl_cb_put(parser->patched_cb);
   4060		rc = -EINVAL;
   4061		goto out;
   4062	}
   4063
   4064out:
   4065	/*
   4066	 * Always call cb destroy here because we still have 1 reference
   4067	 * to it by calling cb_get earlier. After the job will be completed,
   4068	 * cb_put will release it, but here we want to remove it from the
   4069	 * idr
   4070	 */
   4071	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
   4072
   4073	return rc;
   4074}
   4075
   4076static int goya_parse_cb_no_mmu(struct hl_device *hdev,
   4077				struct hl_cs_parser *parser)
   4078{
   4079	u64 handle;
   4080	int rc;
   4081
   4082	rc = goya_validate_cb(hdev, parser, false);
   4083
   4084	if (rc)
   4085		goto free_userptr;
   4086
   4087	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
   4088				parser->patched_cb_size, false, false,
   4089				&handle);
   4090	if (rc) {
   4091		dev_err(hdev->dev,
   4092			"Failed to allocate patched CB for DMA CS %d\n", rc);
   4093		goto free_userptr;
   4094	}
   4095
   4096	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
   4097	/* hl_cb_get should never fail here */
   4098	if (!parser->patched_cb) {
   4099		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
   4100		rc = -EFAULT;
   4101		goto out;
   4102	}
   4103
   4104	rc = goya_patch_cb(hdev, parser);
   4105
   4106	if (rc)
   4107		hl_cb_put(parser->patched_cb);
   4108
   4109out:
   4110	/*
   4111	 * Always call cb destroy here because we still have 1 reference
   4112	 * to it by calling cb_get earlier. After the job will be completed,
   4113	 * cb_put will release it, but here we want to remove it from the
   4114	 * idr
   4115	 */
   4116	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
   4117
   4118free_userptr:
   4119	if (rc)
   4120		hl_userptr_delete_list(hdev, parser->job_userptr_list);
   4121	return rc;
   4122}
   4123
   4124static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
   4125					struct hl_cs_parser *parser)
   4126{
   4127	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
   4128	struct goya_device *goya = hdev->asic_specific;
   4129
   4130	if (goya->hw_cap_initialized & HW_CAP_MMU)
   4131		return 0;
   4132
   4133	/* For internal queue jobs, just check if CB address is valid */
   4134	if (hl_mem_area_inside_range(
   4135			(u64) (uintptr_t) parser->user_cb,
   4136			parser->user_cb_size,
   4137			asic_prop->sram_user_base_address,
   4138			asic_prop->sram_end_address))
   4139		return 0;
   4140
   4141	if (hl_mem_area_inside_range(
   4142			(u64) (uintptr_t) parser->user_cb,
   4143			parser->user_cb_size,
   4144			asic_prop->dram_user_base_address,
   4145			asic_prop->dram_end_address))
   4146		return 0;
   4147
   4148	dev_err(hdev->dev,
   4149		"Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
   4150		parser->user_cb, parser->user_cb_size);
   4151
   4152	return -EFAULT;
   4153}
   4154
   4155int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
   4156{
   4157	struct goya_device *goya = hdev->asic_specific;
   4158
   4159	if (parser->queue_type == QUEUE_TYPE_INT)
   4160		return goya_parse_cb_no_ext_queue(hdev, parser);
   4161
   4162	if (goya->hw_cap_initialized & HW_CAP_MMU)
   4163		return goya_parse_cb_mmu(hdev, parser);
   4164	else
   4165		return goya_parse_cb_no_mmu(hdev, parser);
   4166}
   4167
   4168void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
   4169				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
   4170				bool eb)
   4171{
   4172	struct packet_msg_prot *cq_pkt;
   4173	u32 tmp;
   4174
   4175	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
   4176
   4177	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
   4178			(1 << GOYA_PKT_CTL_EB_SHIFT) |
   4179			(1 << GOYA_PKT_CTL_MB_SHIFT);
   4180	cq_pkt->ctl = cpu_to_le32(tmp);
   4181	cq_pkt->value = cpu_to_le32(cq_val);
   4182	cq_pkt->addr = cpu_to_le64(cq_addr);
   4183
   4184	cq_pkt++;
   4185
   4186	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
   4187			(1 << GOYA_PKT_CTL_MB_SHIFT);
   4188	cq_pkt->ctl = cpu_to_le32(tmp);
   4189	cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
   4190	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
   4191}
   4192
   4193void goya_update_eq_ci(struct hl_device *hdev, u32 val)
   4194{
   4195	WREG32(mmCPU_EQ_CI, val);
   4196}
   4197
   4198void goya_restore_phase_topology(struct hl_device *hdev)
   4199{
   4200
   4201}
   4202
   4203static void goya_clear_sm_regs(struct hl_device *hdev)
   4204{
   4205	int i, num_of_sob_in_longs, num_of_mon_in_longs;
   4206
   4207	num_of_sob_in_longs =
   4208		((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
   4209
   4210	num_of_mon_in_longs =
   4211		((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
   4212
   4213	for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
   4214		WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
   4215
   4216	for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
   4217		WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
   4218
   4219	/* Flush all WREG to prevent race */
   4220	i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
   4221}
   4222
   4223static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
   4224{
   4225	dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
   4226	return -EPERM;
   4227}
   4228
   4229static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
   4230{
   4231	struct goya_device *goya = hdev->asic_specific;
   4232
   4233	if (hdev->reset_info.hard_reset_pending)
   4234		return U64_MAX;
   4235
   4236	return readq(hdev->pcie_bar[DDR_BAR_ID] +
   4237			(addr - goya->ddr_bar_cur_addr));
   4238}
   4239
   4240static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
   4241{
   4242	struct goya_device *goya = hdev->asic_specific;
   4243
   4244	if (hdev->reset_info.hard_reset_pending)
   4245		return;
   4246
   4247	writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
   4248			(addr - goya->ddr_bar_cur_addr));
   4249}
   4250
   4251static const char *_goya_get_event_desc(u16 event_type)
   4252{
   4253	switch (event_type) {
   4254	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
   4255		return "PCIe_if";
   4256	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
   4257	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
   4258	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
   4259	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
   4260	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
   4261	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
   4262	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
   4263	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
   4264		return "TPC%d_ecc";
   4265	case GOYA_ASYNC_EVENT_ID_MME_ECC:
   4266		return "MME_ecc";
   4267	case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
   4268		return "MME_ecc_ext";
   4269	case GOYA_ASYNC_EVENT_ID_MMU_ECC:
   4270		return "MMU_ecc";
   4271	case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
   4272		return "DMA_macro";
   4273	case GOYA_ASYNC_EVENT_ID_DMA_ECC:
   4274		return "DMA_ecc";
   4275	case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
   4276		return "CPU_if_ecc";
   4277	case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
   4278		return "PSOC_mem";
   4279	case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
   4280		return "PSOC_coresight";
   4281	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
   4282		return "SRAM%d";
   4283	case GOYA_ASYNC_EVENT_ID_GIC500:
   4284		return "GIC500";
   4285	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
   4286		return "PLL%d";
   4287	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
   4288		return "AXI_ecc";
   4289	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
   4290		return "L2_ram_ecc";
   4291	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
   4292		return "PSOC_gpio_05_sw_reset";
   4293	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
   4294		return "PSOC_gpio_10_vrhot_icrit";
   4295	case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
   4296		return "PCIe_dec";
   4297	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
   4298	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
   4299	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
   4300	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
   4301	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
   4302	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
   4303	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
   4304	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
   4305		return "TPC%d_dec";
   4306	case GOYA_ASYNC_EVENT_ID_MME_WACS:
   4307		return "MME_wacs";
   4308	case GOYA_ASYNC_EVENT_ID_MME_WACSD:
   4309		return "MME_wacsd";
   4310	case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
   4311		return "CPU_axi_splitter";
   4312	case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
   4313		return "PSOC_axi_dec";
   4314	case GOYA_ASYNC_EVENT_ID_PSOC:
   4315		return "PSOC";
   4316	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
   4317	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
   4318	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
   4319	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
   4320	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
   4321	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
   4322	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
   4323	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
   4324		return "TPC%d_krn_err";
   4325	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
   4326		return "TPC%d_cq";
   4327	case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
   4328		return "TPC%d_qm";
   4329	case GOYA_ASYNC_EVENT_ID_MME_QM:
   4330		return "MME_qm";
   4331	case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
   4332		return "MME_cq";
   4333	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
   4334		return "DMA%d_qm";
   4335	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
   4336		return "DMA%d_ch";
   4337	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
   4338	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
   4339	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
   4340	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
   4341	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
   4342	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
   4343	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
   4344	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
   4345		return "TPC%d_bmon_spmu";
   4346	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
   4347		return "DMA_bm_ch%d";
   4348	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
   4349		return "POWER_ENV_S";
   4350	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
   4351		return "POWER_ENV_E";
   4352	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
   4353		return "THERMAL_ENV_S";
   4354	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
   4355		return "THERMAL_ENV_E";
   4356	case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
   4357		return "QUEUE_OUT_OF_SYNC";
   4358	default:
   4359		return "N/A";
   4360	}
   4361}
   4362
   4363static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
   4364{
   4365	u8 index;
   4366
   4367	switch (event_type) {
   4368	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
   4369	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
   4370	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
   4371	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
   4372	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
   4373	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
   4374	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
   4375	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
   4376		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
   4377		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4378		break;
   4379	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
   4380		index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
   4381		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4382		break;
   4383	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
   4384		index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
   4385		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4386		break;
   4387	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
   4388	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
   4389	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
   4390	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
   4391	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
   4392	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
   4393	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
   4394	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
   4395		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
   4396		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4397		break;
   4398	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
   4399	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
   4400	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
   4401	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
   4402	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
   4403	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
   4404	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
   4405	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
   4406		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
   4407		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4408		break;
   4409	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
   4410		index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
   4411		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4412		break;
   4413	case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
   4414		index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
   4415		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4416		break;
   4417	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
   4418		index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
   4419		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4420		break;
   4421	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
   4422		index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
   4423		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4424		break;
   4425	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
   4426	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
   4427	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
   4428	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
   4429	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
   4430	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
   4431	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
   4432	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
   4433		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
   4434		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4435		break;
   4436	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
   4437		index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
   4438		snprintf(desc, size, _goya_get_event_desc(event_type), index);
   4439		break;
   4440	case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
   4441		snprintf(desc, size, _goya_get_event_desc(event_type));
   4442		break;
   4443	default:
   4444		snprintf(desc, size, _goya_get_event_desc(event_type));
   4445		break;
   4446	}
   4447}
   4448
   4449static void goya_print_razwi_info(struct hl_device *hdev)
   4450{
   4451	if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
   4452		dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
   4453		WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
   4454	}
   4455
   4456	if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
   4457		dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
   4458		WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
   4459	}
   4460
   4461	if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
   4462		dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
   4463		WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
   4464	}
   4465
   4466	if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
   4467		dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
   4468		WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
   4469	}
   4470}
   4471
   4472static void goya_print_mmu_error_info(struct hl_device *hdev)
   4473{
   4474	struct goya_device *goya = hdev->asic_specific;
   4475	u64 addr;
   4476	u32 val;
   4477
   4478	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
   4479		return;
   4480
   4481	val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
   4482	if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
   4483		addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
   4484		addr <<= 32;
   4485		addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
   4486
   4487		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
   4488					addr);
   4489
   4490		WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
   4491	}
   4492}
   4493
   4494static void goya_print_out_of_sync_info(struct hl_device *hdev,
   4495					struct cpucp_pkt_sync_err *sync_err)
   4496{
   4497	struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
   4498
   4499	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
   4500			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
   4501}
   4502
   4503static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
   4504				bool razwi)
   4505{
   4506	char desc[20] = "";
   4507
   4508	goya_get_event_desc(event_type, desc, sizeof(desc));
   4509	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
   4510		event_type, desc);
   4511
   4512	if (razwi) {
   4513		goya_print_razwi_info(hdev);
   4514		goya_print_mmu_error_info(hdev);
   4515	}
   4516}
   4517
   4518static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
   4519		size_t irq_arr_size)
   4520{
   4521	struct cpucp_unmask_irq_arr_packet *pkt;
   4522	size_t total_pkt_size;
   4523	u64 result;
   4524	int rc;
   4525	int irq_num_entries, irq_arr_index;
   4526	__le32 *goya_irq_arr;
   4527
   4528	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
   4529			irq_arr_size;
   4530
   4531	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
   4532	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
   4533
   4534	/* total_pkt_size is casted to u16 later on */
   4535	if (total_pkt_size > USHRT_MAX) {
   4536		dev_err(hdev->dev, "too many elements in IRQ array\n");
   4537		return -EINVAL;
   4538	}
   4539
   4540	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
   4541	if (!pkt)
   4542		return -ENOMEM;
   4543
   4544	irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
   4545	pkt->length = cpu_to_le32(irq_num_entries);
   4546
   4547	/* We must perform any necessary endianness conversation on the irq
   4548	 * array being passed to the goya hardware
   4549	 */
   4550	for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
   4551			irq_arr_index < irq_num_entries ; irq_arr_index++)
   4552		goya_irq_arr[irq_arr_index] =
   4553				cpu_to_le32(irq_arr[irq_arr_index]);
   4554
   4555	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
   4556						CPUCP_PKT_CTL_OPCODE_SHIFT);
   4557
   4558	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
   4559						total_pkt_size,	0, &result);
   4560
   4561	if (rc)
   4562		dev_err(hdev->dev, "failed to unmask IRQ array\n");
   4563
   4564	kfree(pkt);
   4565
   4566	return rc;
   4567}
   4568
   4569static int goya_non_hard_reset_late_init(struct hl_device *hdev)
   4570{
   4571	/*
   4572	 * Unmask all IRQs since some could have been received
   4573	 * during the soft reset
   4574	 */
   4575	return goya_unmask_irq_arr(hdev, goya_all_events,
   4576					sizeof(goya_all_events));
   4577}
   4578
   4579static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
   4580{
   4581	struct cpucp_packet pkt;
   4582	u64 result;
   4583	int rc;
   4584
   4585	memset(&pkt, 0, sizeof(pkt));
   4586
   4587	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
   4588				CPUCP_PKT_CTL_OPCODE_SHIFT);
   4589	pkt.value = cpu_to_le64(event_type);
   4590
   4591	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   4592						0, &result);
   4593
   4594	if (rc)
   4595		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
   4596
   4597	return rc;
   4598}
   4599
   4600static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
   4601{
   4602	ktime_t zero_time = ktime_set(0, 0);
   4603
   4604	mutex_lock(&hdev->clk_throttling.lock);
   4605
   4606	switch (event_type) {
   4607	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
   4608		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
   4609		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
   4610		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
   4611		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
   4612		dev_info_ratelimited(hdev->dev,
   4613			"Clock throttling due to power consumption\n");
   4614		break;
   4615
   4616	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
   4617		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
   4618		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
   4619		dev_info_ratelimited(hdev->dev,
   4620			"Power envelop is safe, back to optimal clock\n");
   4621		break;
   4622
   4623	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
   4624		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
   4625		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
   4626		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
   4627		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
   4628		dev_info_ratelimited(hdev->dev,
   4629			"Clock throttling due to overheating\n");
   4630		break;
   4631
   4632	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
   4633		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
   4634		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
   4635		dev_info_ratelimited(hdev->dev,
   4636			"Thermal envelop is safe, back to optimal clock\n");
   4637		break;
   4638
   4639	default:
   4640		dev_err(hdev->dev, "Received invalid clock change event %d\n",
   4641			event_type);
   4642		break;
   4643	}
   4644
   4645	mutex_unlock(&hdev->clk_throttling.lock);
   4646}
   4647
   4648void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
   4649{
   4650	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
   4651	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
   4652				>> EQ_CTL_EVENT_TYPE_SHIFT);
   4653	struct goya_device *goya = hdev->asic_specific;
   4654
   4655	if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
   4656		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
   4657				event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
   4658		return;
   4659	}
   4660
   4661	goya->events_stat[event_type]++;
   4662	goya->events_stat_aggregate[event_type]++;
   4663
   4664	switch (event_type) {
   4665	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
   4666	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
   4667	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
   4668	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
   4669	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
   4670	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
   4671	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
   4672	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
   4673	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
   4674	case GOYA_ASYNC_EVENT_ID_MME_ECC:
   4675	case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
   4676	case GOYA_ASYNC_EVENT_ID_MMU_ECC:
   4677	case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
   4678	case GOYA_ASYNC_EVENT_ID_DMA_ECC:
   4679	case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
   4680	case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
   4681	case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
   4682	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
   4683	case GOYA_ASYNC_EVENT_ID_GIC500:
   4684	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
   4685	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
   4686	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
   4687		goya_print_irq_info(hdev, event_type, false);
   4688		if (hdev->hard_reset_on_fw_events)
   4689			hl_device_reset(hdev, (HL_DRV_RESET_HARD |
   4690						HL_DRV_RESET_FW_FATAL_ERR));
   4691		break;
   4692
   4693	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
   4694		goya_print_irq_info(hdev, event_type, false);
   4695		if (hdev->hard_reset_on_fw_events)
   4696			hl_device_reset(hdev, HL_DRV_RESET_HARD);
   4697		break;
   4698
   4699	case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
   4700	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
   4701	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
   4702	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
   4703	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
   4704	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
   4705	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
   4706	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
   4707	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
   4708	case GOYA_ASYNC_EVENT_ID_MME_WACS:
   4709	case GOYA_ASYNC_EVENT_ID_MME_WACSD:
   4710	case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
   4711	case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
   4712	case GOYA_ASYNC_EVENT_ID_PSOC:
   4713	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
   4714	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
   4715	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
   4716	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
   4717	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
   4718	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
   4719	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
   4720	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
   4721	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
   4722	case GOYA_ASYNC_EVENT_ID_MME_QM:
   4723	case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
   4724	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
   4725	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
   4726		goya_print_irq_info(hdev, event_type, true);
   4727		goya_unmask_irq(hdev, event_type);
   4728		break;
   4729
   4730	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
   4731	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
   4732	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
   4733	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
   4734	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
   4735	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
   4736	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
   4737	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
   4738	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
   4739	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
   4740		goya_print_irq_info(hdev, event_type, false);
   4741		goya_unmask_irq(hdev, event_type);
   4742		break;
   4743
   4744	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
   4745	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
   4746	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
   4747	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
   4748		goya_print_clk_change_info(hdev, event_type);
   4749		goya_unmask_irq(hdev, event_type);
   4750		break;
   4751
   4752	case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
   4753		goya_print_irq_info(hdev, event_type, false);
   4754		goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
   4755		if (hdev->hard_reset_on_fw_events)
   4756			hl_device_reset(hdev, HL_DRV_RESET_HARD);
   4757		else
   4758			hl_fw_unmask_irq(hdev, event_type);
   4759		break;
   4760
   4761	default:
   4762		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
   4763				event_type);
   4764		break;
   4765	}
   4766}
   4767
   4768void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
   4769{
   4770	struct goya_device *goya = hdev->asic_specific;
   4771
   4772	if (aggregate) {
   4773		*size = (u32) sizeof(goya->events_stat_aggregate);
   4774		return goya->events_stat_aggregate;
   4775	}
   4776
   4777	*size = (u32) sizeof(goya->events_stat);
   4778	return goya->events_stat;
   4779}
   4780
   4781static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
   4782				u64 val, bool is_dram)
   4783{
   4784	struct packet_lin_dma *lin_dma_pkt;
   4785	struct hl_cs_job *job;
   4786	u32 cb_size, ctl;
   4787	struct hl_cb *cb;
   4788	int rc, lin_dma_pkts_cnt;
   4789
   4790	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
   4791	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
   4792						sizeof(struct packet_msg_prot);
   4793	cb = hl_cb_kernel_create(hdev, cb_size, false);
   4794	if (!cb)
   4795		return -ENOMEM;
   4796
   4797	lin_dma_pkt = cb->kernel_address;
   4798
   4799	do {
   4800		memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
   4801
   4802		ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
   4803				(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
   4804				(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
   4805				(1 << GOYA_PKT_CTL_RB_SHIFT) |
   4806				(1 << GOYA_PKT_CTL_MB_SHIFT));
   4807		ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
   4808				GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
   4809		lin_dma_pkt->ctl = cpu_to_le32(ctl);
   4810
   4811		lin_dma_pkt->src_addr = cpu_to_le64(val);
   4812		lin_dma_pkt->dst_addr = cpu_to_le64(addr);
   4813		if (lin_dma_pkts_cnt > 1)
   4814			lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
   4815		else
   4816			lin_dma_pkt->tsize = cpu_to_le32(size);
   4817
   4818		size -= SZ_2G;
   4819		addr += SZ_2G;
   4820		lin_dma_pkt++;
   4821	} while (--lin_dma_pkts_cnt);
   4822
   4823	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
   4824	if (!job) {
   4825		dev_err(hdev->dev, "Failed to allocate a new job\n");
   4826		rc = -ENOMEM;
   4827		goto release_cb;
   4828	}
   4829
   4830	job->id = 0;
   4831	job->user_cb = cb;
   4832	atomic_inc(&job->user_cb->cs_cnt);
   4833	job->user_cb_size = cb_size;
   4834	job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
   4835	job->patched_cb = job->user_cb;
   4836	job->job_cb_size = job->user_cb_size;
   4837
   4838	hl_debugfs_add_job(hdev, job);
   4839
   4840	rc = goya_send_job_on_qman0(hdev, job);
   4841
   4842	hl_debugfs_remove_job(hdev, job);
   4843	kfree(job);
   4844	atomic_dec(&cb->cs_cnt);
   4845
   4846release_cb:
   4847	hl_cb_put(cb);
   4848	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
   4849
   4850	return rc;
   4851}
   4852
   4853int goya_context_switch(struct hl_device *hdev, u32 asid)
   4854{
   4855	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4856	u64 addr = prop->sram_base_address, sob_addr;
   4857	u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
   4858	u64 val = 0x7777777777777777ull;
   4859	int rc, dma_id;
   4860	u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
   4861					mmDMA_CH_0_WR_COMP_ADDR_LO;
   4862
   4863	rc = goya_memset_device_memory(hdev, addr, size, val, false);
   4864	if (rc) {
   4865		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
   4866		return rc;
   4867	}
   4868
   4869	/* we need to reset registers that the user is allowed to change */
   4870	sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
   4871	WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
   4872
   4873	for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
   4874		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
   4875							(dma_id - 1) * 4;
   4876		WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
   4877						lower_32_bits(sob_addr));
   4878	}
   4879
   4880	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
   4881
   4882	goya_clear_sm_regs(hdev);
   4883
   4884	return 0;
   4885}
   4886
   4887static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
   4888{
   4889	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4890	struct goya_device *goya = hdev->asic_specific;
   4891	u64 addr = prop->mmu_pgt_addr;
   4892	u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
   4893			MMU_CACHE_MNG_SIZE;
   4894
   4895	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
   4896		return 0;
   4897
   4898	return goya_memset_device_memory(hdev, addr, size, 0, true);
   4899}
   4900
   4901static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
   4902{
   4903	struct goya_device *goya = hdev->asic_specific;
   4904	u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
   4905	u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
   4906	u64 val = 0x9999999999999999ull;
   4907
   4908	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
   4909		return 0;
   4910
   4911	return goya_memset_device_memory(hdev, addr, size, val, true);
   4912}
   4913
   4914static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
   4915{
   4916	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4917	struct goya_device *goya = hdev->asic_specific;
   4918	s64 off, cpu_off;
   4919	int rc;
   4920
   4921	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
   4922		return 0;
   4923
   4924	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
   4925		rc = hl_mmu_map_page(hdev->kernel_ctx,
   4926			prop->dram_base_address + off,
   4927			prop->dram_base_address + off, PAGE_SIZE_2MB,
   4928			(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
   4929		if (rc) {
   4930			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
   4931				prop->dram_base_address + off);
   4932			goto unmap;
   4933		}
   4934	}
   4935
   4936	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
   4937		rc = hl_mmu_map_page(hdev->kernel_ctx,
   4938			VA_CPU_ACCESSIBLE_MEM_ADDR,
   4939			hdev->cpu_accessible_dma_address,
   4940			PAGE_SIZE_2MB, true);
   4941
   4942		if (rc) {
   4943			dev_err(hdev->dev,
   4944				"Map failed for CPU accessible memory\n");
   4945			off -= PAGE_SIZE_2MB;
   4946			goto unmap;
   4947		}
   4948	} else {
   4949		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
   4950			rc = hl_mmu_map_page(hdev->kernel_ctx,
   4951				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
   4952				hdev->cpu_accessible_dma_address + cpu_off,
   4953				PAGE_SIZE_4KB, true);
   4954			if (rc) {
   4955				dev_err(hdev->dev,
   4956					"Map failed for CPU accessible memory\n");
   4957				cpu_off -= PAGE_SIZE_4KB;
   4958				goto unmap_cpu;
   4959			}
   4960		}
   4961	}
   4962
   4963	goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
   4964	goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
   4965	WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
   4966	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
   4967
   4968	/* Make sure configuration is flushed to device */
   4969	RREG32(mmCPU_IF_AWUSER_OVR_EN);
   4970
   4971	goya->device_cpu_mmu_mappings_done = true;
   4972
   4973	return 0;
   4974
   4975unmap_cpu:
   4976	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
   4977		if (hl_mmu_unmap_page(hdev->kernel_ctx,
   4978				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
   4979				PAGE_SIZE_4KB, true))
   4980			dev_warn_ratelimited(hdev->dev,
   4981				"failed to unmap address 0x%llx\n",
   4982				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
   4983unmap:
   4984	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
   4985		if (hl_mmu_unmap_page(hdev->kernel_ctx,
   4986				prop->dram_base_address + off, PAGE_SIZE_2MB,
   4987				true))
   4988			dev_warn_ratelimited(hdev->dev,
   4989				"failed to unmap address 0x%llx\n",
   4990				prop->dram_base_address + off);
   4991
   4992	return rc;
   4993}
   4994
   4995void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
   4996{
   4997	struct asic_fixed_properties *prop = &hdev->asic_prop;
   4998	struct goya_device *goya = hdev->asic_specific;
   4999	u32 off, cpu_off;
   5000
   5001	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
   5002		return;
   5003
   5004	if (!goya->device_cpu_mmu_mappings_done)
   5005		return;
   5006
   5007	WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
   5008	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
   5009
   5010	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
   5011		if (hl_mmu_unmap_page(hdev->kernel_ctx,
   5012				VA_CPU_ACCESSIBLE_MEM_ADDR,
   5013				PAGE_SIZE_2MB, true))
   5014			dev_warn(hdev->dev,
   5015				"Failed to unmap CPU accessible memory\n");
   5016	} else {
   5017		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
   5018			if (hl_mmu_unmap_page(hdev->kernel_ctx,
   5019					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
   5020					PAGE_SIZE_4KB,
   5021					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
   5022				dev_warn_ratelimited(hdev->dev,
   5023					"failed to unmap address 0x%llx\n",
   5024					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
   5025	}
   5026
   5027	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
   5028		if (hl_mmu_unmap_page(hdev->kernel_ctx,
   5029				prop->dram_base_address + off, PAGE_SIZE_2MB,
   5030				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
   5031			dev_warn_ratelimited(hdev->dev,
   5032					"Failed to unmap address 0x%llx\n",
   5033					prop->dram_base_address + off);
   5034
   5035	goya->device_cpu_mmu_mappings_done = false;
   5036}
   5037
   5038static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
   5039{
   5040	struct goya_device *goya = hdev->asic_specific;
   5041	int i;
   5042
   5043	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
   5044		return;
   5045
   5046	if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
   5047		dev_crit(hdev->dev, "asid %u is too big\n", asid);
   5048		return;
   5049	}
   5050
   5051	/* zero the MMBP and ASID bits and then set the ASID */
   5052	for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
   5053		goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
   5054}
   5055
   5056static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
   5057					u32 flags)
   5058{
   5059	struct goya_device *goya = hdev->asic_specific;
   5060	u32 status, timeout_usec;
   5061	int rc;
   5062
   5063	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
   5064		hdev->reset_info.hard_reset_pending)
   5065		return 0;
   5066
   5067	/* no need in L1 only invalidation in Goya */
   5068	if (!is_hard)
   5069		return 0;
   5070
   5071	if (hdev->pldm)
   5072		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
   5073	else
   5074		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
   5075
   5076	/* L0 & L1 invalidation */
   5077	WREG32(mmSTLB_INV_ALL_START, 1);
   5078
   5079	rc = hl_poll_timeout(
   5080		hdev,
   5081		mmSTLB_INV_ALL_START,
   5082		status,
   5083		!status,
   5084		1000,
   5085		timeout_usec);
   5086
   5087	return rc;
   5088}
   5089
   5090static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
   5091						bool is_hard, u32 flags,
   5092						u32 asid, u64 va, u64 size)
   5093{
   5094	/* Treat as invalidate all because there is no range invalidation
   5095	 * in Goya
   5096	 */
   5097	return hl_mmu_invalidate_cache(hdev, is_hard, flags);
   5098}
   5099
   5100int goya_send_heartbeat(struct hl_device *hdev)
   5101{
   5102	struct goya_device *goya = hdev->asic_specific;
   5103
   5104	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
   5105		return 0;
   5106
   5107	return hl_fw_send_heartbeat(hdev);
   5108}
   5109
   5110int goya_cpucp_info_get(struct hl_device *hdev)
   5111{
   5112	struct goya_device *goya = hdev->asic_specific;
   5113	struct asic_fixed_properties *prop = &hdev->asic_prop;
   5114	u64 dram_size;
   5115	int rc;
   5116
   5117	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
   5118		return 0;
   5119
   5120	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
   5121					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
   5122					mmCPU_BOOT_ERR1);
   5123	if (rc)
   5124		return rc;
   5125
   5126	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
   5127	if (dram_size) {
   5128		if ((!is_power_of_2(dram_size)) ||
   5129				(dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
   5130			dev_err(hdev->dev,
   5131				"F/W reported invalid DRAM size %llu. Trying to use default size\n",
   5132				dram_size);
   5133			dram_size = DRAM_PHYS_DEFAULT_SIZE;
   5134		}
   5135
   5136		prop->dram_size = dram_size;
   5137		prop->dram_end_address = prop->dram_base_address + dram_size;
   5138	}
   5139
   5140	if (!strlen(prop->cpucp_info.card_name))
   5141		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
   5142				CARD_NAME_MAX_LEN);
   5143
   5144	return 0;
   5145}
   5146
   5147static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
   5148					u8 mask_len, struct seq_file *s)
   5149{
   5150	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
   5151	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
   5152	unsigned long *mask = (unsigned long *)mask_arr;
   5153	u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
   5154		mme_arch_sts;
   5155	bool is_idle = true, is_eng_idle;
   5156	u64 offset;
   5157	int i;
   5158
   5159	if (s)
   5160		seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
   5161				"---  -------  ------------  -------------\n");
   5162
   5163	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
   5164
   5165	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
   5166		qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
   5167		dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
   5168		is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
   5169				IS_DMA_IDLE(dma_core_sts0);
   5170		is_idle &= is_eng_idle;
   5171
   5172		if (mask && !is_eng_idle)
   5173			set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
   5174		if (s)
   5175			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
   5176					qm_glbl_sts0, dma_core_sts0);
   5177	}
   5178
   5179	if (s)
   5180		seq_puts(s,
   5181			"\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
   5182			"---  -------  ------------  --------------  ----------\n");
   5183
   5184	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
   5185
   5186	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
   5187		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
   5188		cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
   5189		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
   5190		is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
   5191				IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
   5192				IS_TPC_IDLE(tpc_cfg_sts);
   5193		is_idle &= is_eng_idle;
   5194
   5195		if (mask && !is_eng_idle)
   5196			set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
   5197		if (s)
   5198			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
   5199				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
   5200	}
   5201
   5202	if (s)
   5203		seq_puts(s,
   5204			"\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
   5205			"---  -------  ------------  --------------  -----------\n");
   5206
   5207	qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
   5208	cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
   5209	mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
   5210	is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
   5211			IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
   5212			IS_MME_IDLE(mme_arch_sts);
   5213	is_idle &= is_eng_idle;
   5214
   5215	if (mask && !is_eng_idle)
   5216		set_bit(GOYA_ENGINE_ID_MME_0, mask);
   5217	if (s) {
   5218		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
   5219				cmdq_glbl_sts0, mme_arch_sts);
   5220		seq_puts(s, "\n");
   5221	}
   5222
   5223	return is_idle;
   5224}
   5225
   5226static void goya_hw_queues_lock(struct hl_device *hdev)
   5227	__acquires(&goya->hw_queues_lock)
   5228{
   5229	struct goya_device *goya = hdev->asic_specific;
   5230
   5231	spin_lock(&goya->hw_queues_lock);
   5232}
   5233
   5234static void goya_hw_queues_unlock(struct hl_device *hdev)
   5235	__releases(&goya->hw_queues_lock)
   5236{
   5237	struct goya_device *goya = hdev->asic_specific;
   5238
   5239	spin_unlock(&goya->hw_queues_lock);
   5240}
   5241
   5242static u32 goya_get_pci_id(struct hl_device *hdev)
   5243{
   5244	return hdev->pdev->device;
   5245}
   5246
   5247static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
   5248				size_t max_size)
   5249{
   5250	struct goya_device *goya = hdev->asic_specific;
   5251
   5252	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
   5253		return 0;
   5254
   5255	return hl_fw_get_eeprom_data(hdev, data, max_size);
   5256}
   5257
   5258static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
   5259{
   5260
   5261}
   5262
   5263static int goya_ctx_init(struct hl_ctx *ctx)
   5264{
   5265	if (ctx->asid != HL_KERNEL_ASID_ID)
   5266		goya_mmu_prepare(ctx->hdev, ctx->asid);
   5267
   5268	return 0;
   5269}
   5270
   5271u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
   5272{
   5273	return cq_idx;
   5274}
   5275
   5276static u32 goya_get_signal_cb_size(struct hl_device *hdev)
   5277{
   5278	return 0;
   5279}
   5280
   5281static u32 goya_get_wait_cb_size(struct hl_device *hdev)
   5282{
   5283	return 0;
   5284}
   5285
   5286static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
   5287				u32 size, bool eb)
   5288{
   5289	return 0;
   5290}
   5291
   5292static u32 goya_gen_wait_cb(struct hl_device *hdev,
   5293		struct hl_gen_wait_properties *prop)
   5294{
   5295	return 0;
   5296}
   5297
   5298static void goya_reset_sob(struct hl_device *hdev, void *data)
   5299{
   5300
   5301}
   5302
   5303static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
   5304{
   5305
   5306}
   5307
   5308u64 goya_get_device_time(struct hl_device *hdev)
   5309{
   5310	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
   5311
   5312	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
   5313}
   5314
   5315static int goya_collective_wait_init_cs(struct hl_cs *cs)
   5316{
   5317	return 0;
   5318}
   5319
   5320static int goya_collective_wait_create_jobs(struct hl_device *hdev,
   5321		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
   5322		u32 collective_engine_id, u32 encaps_signal_offset)
   5323{
   5324	return -EINVAL;
   5325}
   5326
   5327static void goya_ctx_fini(struct hl_ctx *ctx)
   5328{
   5329
   5330}
   5331
   5332static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
   5333			u32 *block_size, u32 *block_id)
   5334{
   5335	return -EPERM;
   5336}
   5337
   5338static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
   5339				u32 block_id, u32 block_size)
   5340{
   5341	return -EPERM;
   5342}
   5343
   5344static void goya_enable_events_from_fw(struct hl_device *hdev)
   5345{
   5346	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
   5347			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
   5348}
   5349
   5350static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
   5351{
   5352	switch (pll_idx) {
   5353	case HL_GOYA_CPU_PLL: return CPU_PLL;
   5354	case HL_GOYA_PCI_PLL: return PCI_PLL;
   5355	case HL_GOYA_MME_PLL: return MME_PLL;
   5356	case HL_GOYA_TPC_PLL: return TPC_PLL;
   5357	case HL_GOYA_IC_PLL: return IC_PLL;
   5358	case HL_GOYA_MC_PLL: return MC_PLL;
   5359	case HL_GOYA_EMMC_PLL: return EMMC_PLL;
   5360	default: return -EINVAL;
   5361	}
   5362}
   5363
   5364static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
   5365				struct hl_sync_to_engine_map *map)
   5366{
   5367	/* Not implemented */
   5368	return 0;
   5369}
   5370
   5371static int goya_monitor_valid(struct hl_mon_state_dump *mon)
   5372{
   5373	/* Not implemented */
   5374	return 0;
   5375}
   5376
   5377static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
   5378				struct hl_device *hdev,
   5379				struct hl_mon_state_dump *mon)
   5380{
   5381	/* Not implemented */
   5382	return 0;
   5383}
   5384
   5385
   5386static int goya_print_fences_single_engine(
   5387	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
   5388	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
   5389	size_t *size, size_t *offset)
   5390{
   5391	/* Not implemented */
   5392	return 0;
   5393}
   5394
   5395
   5396static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
   5397	.monitor_valid = goya_monitor_valid,
   5398	.print_single_monitor = goya_print_single_monitor,
   5399	.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
   5400	.print_fences_single_engine = goya_print_fences_single_engine,
   5401};
   5402
   5403static void goya_state_dump_init(struct hl_device *hdev)
   5404{
   5405	/* Not implemented */
   5406	hdev->state_dump_specs.props = goya_state_dump_specs_props;
   5407	hdev->state_dump_specs.funcs = goya_state_dump_funcs;
   5408}
   5409
   5410static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
   5411{
   5412	return 0;
   5413}
   5414
   5415static u32 *goya_get_stream_master_qid_arr(void)
   5416{
   5417	return NULL;
   5418}
   5419
   5420static void goya_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info)
   5421{
   5422	/* set 0 since multiple pages are not supported */
   5423	info->page_order_bitmask = 0;
   5424}
   5425
   5426static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
   5427{
   5428	return -EOPNOTSUPP;
   5429}
   5430
   5431static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
   5432{
   5433	return -EOPNOTSUPP;
   5434}
   5435
   5436static const struct hl_asic_funcs goya_funcs = {
   5437	.early_init = goya_early_init,
   5438	.early_fini = goya_early_fini,
   5439	.late_init = goya_late_init,
   5440	.late_fini = goya_late_fini,
   5441	.sw_init = goya_sw_init,
   5442	.sw_fini = goya_sw_fini,
   5443	.hw_init = goya_hw_init,
   5444	.hw_fini = goya_hw_fini,
   5445	.halt_engines = goya_halt_engines,
   5446	.suspend = goya_suspend,
   5447	.resume = goya_resume,
   5448	.mmap = goya_mmap,
   5449	.ring_doorbell = goya_ring_doorbell,
   5450	.pqe_write = goya_pqe_write,
   5451	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
   5452	.asic_dma_free_coherent = goya_dma_free_coherent,
   5453	.scrub_device_mem = goya_scrub_device_mem,
   5454	.scrub_device_dram = goya_scrub_device_dram,
   5455	.get_int_queue_base = goya_get_int_queue_base,
   5456	.test_queues = goya_test_queues,
   5457	.asic_dma_pool_zalloc = goya_dma_pool_zalloc,
   5458	.asic_dma_pool_free = goya_dma_pool_free,
   5459	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
   5460	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
   5461	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
   5462	.cs_parser = goya_cs_parser,
   5463	.asic_dma_map_sgtable = hl_dma_map_sgtable,
   5464	.get_dma_desc_list_size = goya_get_dma_desc_list_size,
   5465	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
   5466	.update_eq_ci = goya_update_eq_ci,
   5467	.context_switch = goya_context_switch,
   5468	.restore_phase_topology = goya_restore_phase_topology,
   5469	.debugfs_read_dma = goya_debugfs_read_dma,
   5470	.add_device_attr = goya_add_device_attr,
   5471	.handle_eqe = goya_handle_eqe,
   5472	.get_events_stat = goya_get_events_stat,
   5473	.read_pte = goya_read_pte,
   5474	.write_pte = goya_write_pte,
   5475	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
   5476	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
   5477	.mmu_prefetch_cache_range = NULL,
   5478	.send_heartbeat = goya_send_heartbeat,
   5479	.debug_coresight = goya_debug_coresight,
   5480	.is_device_idle = goya_is_device_idle,
   5481	.non_hard_reset_late_init = goya_non_hard_reset_late_init,
   5482	.hw_queues_lock = goya_hw_queues_lock,
   5483	.hw_queues_unlock = goya_hw_queues_unlock,
   5484	.get_pci_id = goya_get_pci_id,
   5485	.get_eeprom_data = goya_get_eeprom_data,
   5486	.get_monitor_dump = goya_get_monitor_dump,
   5487	.send_cpu_message = goya_send_cpu_message,
   5488	.pci_bars_map = goya_pci_bars_map,
   5489	.init_iatu = goya_init_iatu,
   5490	.rreg = hl_rreg,
   5491	.wreg = hl_wreg,
   5492	.halt_coresight = goya_halt_coresight,
   5493	.ctx_init = goya_ctx_init,
   5494	.ctx_fini = goya_ctx_fini,
   5495	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
   5496	.load_firmware_to_device = goya_load_firmware_to_device,
   5497	.load_boot_fit_to_device = goya_load_boot_fit_to_device,
   5498	.get_signal_cb_size = goya_get_signal_cb_size,
   5499	.get_wait_cb_size = goya_get_wait_cb_size,
   5500	.gen_signal_cb = goya_gen_signal_cb,
   5501	.gen_wait_cb = goya_gen_wait_cb,
   5502	.reset_sob = goya_reset_sob,
   5503	.reset_sob_group = goya_reset_sob_group,
   5504	.get_device_time = goya_get_device_time,
   5505	.collective_wait_init_cs = goya_collective_wait_init_cs,
   5506	.collective_wait_create_jobs = goya_collective_wait_create_jobs,
   5507	.scramble_addr = hl_mmu_scramble_addr,
   5508	.descramble_addr = hl_mmu_descramble_addr,
   5509	.ack_protection_bits_errors = goya_ack_protection_bits_errors,
   5510	.get_hw_block_id = goya_get_hw_block_id,
   5511	.hw_block_mmap = goya_block_mmap,
   5512	.enable_events_from_fw = goya_enable_events_from_fw,
   5513	.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
   5514	.init_firmware_loader = goya_init_firmware_loader,
   5515	.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
   5516	.state_dump_init = goya_state_dump_init,
   5517	.get_sob_addr = &goya_get_sob_addr,
   5518	.set_pci_memory_regions = goya_set_pci_memory_regions,
   5519	.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
   5520	.is_valid_dram_page_size = NULL,
   5521	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
   5522	.get_valid_dram_page_orders = goya_get_valid_dram_page_orders,
   5523	.access_dev_mem = hl_access_dev_mem,
   5524	.set_dram_bar_base = goya_set_ddr_bar_base,
   5525};
   5526
   5527/*
   5528 * goya_set_asic_funcs - set Goya function pointers
   5529 *
   5530 * @*hdev: pointer to hl_device structure
   5531 *
   5532 */
   5533void goya_set_asic_funcs(struct hl_device *hdev)
   5534{
   5535	hdev->asic_funcs = &goya_funcs;
   5536}