crdump.c (7792B)
1/* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include "mlx4.h" 34 35#define BAD_ACCESS 0xBADACCE5 36#define HEALTH_BUFFER_SIZE 0x40 37#define CR_ENABLE_BIT swab32(BIT(6)) 38#define CR_ENABLE_BIT_OFFSET 0xF3F04 39#define MAX_NUM_OF_DUMPS_TO_STORE (8) 40 41#define REGION_CR_SPACE "cr-space" 42#define REGION_FW_HEALTH "fw-health" 43 44static const char * const region_cr_space_str = REGION_CR_SPACE; 45static const char * const region_fw_health_str = REGION_FW_HEALTH; 46 47static const struct devlink_region_ops region_cr_space_ops = { 48 .name = REGION_CR_SPACE, 49 .destructor = &kvfree, 50}; 51 52static const struct devlink_region_ops region_fw_health_ops = { 53 .name = REGION_FW_HEALTH, 54 .destructor = &kvfree, 55}; 56 57/* Set to true in case cr enable bit was set to true before crdump */ 58static bool crdump_enbale_bit_set; 59 60static void crdump_enable_crspace_access(struct mlx4_dev *dev, 61 u8 __iomem *cr_space) 62{ 63 /* Get current enable bit value */ 64 crdump_enbale_bit_set = 65 readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT; 66 67 /* Enable FW CR filter (set bit6 to 0) */ 68 if (crdump_enbale_bit_set) 69 writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT, 70 cr_space + CR_ENABLE_BIT_OFFSET); 71 72 /* Enable block volatile crspace accesses */ 73 writel(swab32(1), cr_space + dev->caps.health_buffer_addrs + 74 HEALTH_BUFFER_SIZE); 75} 76 77static void crdump_disable_crspace_access(struct mlx4_dev *dev, 78 u8 __iomem *cr_space) 79{ 80 /* Disable block volatile crspace accesses */ 81 writel(0, cr_space + dev->caps.health_buffer_addrs + 82 HEALTH_BUFFER_SIZE); 83 84 /* Restore FW CR filter value (set bit6 to original value) */ 85 if (crdump_enbale_bit_set) 86 writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT, 87 cr_space + CR_ENABLE_BIT_OFFSET); 88} 89 90static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev, 91 u8 __iomem *cr_space, 92 u32 id) 93{ 94 struct mlx4_fw_crdump *crdump = &dev->persist->crdump; 95 struct pci_dev *pdev = dev->persist->pdev; 96 unsigned long cr_res_size; 97 u8 *crspace_data; 98 int offset; 99 int err; 100 101 if (!crdump->region_crspace) { 102 mlx4_err(dev, "crdump: cr-space region is NULL\n"); 103 return; 104 } 105 106 /* Try to collect CR space */ 107 cr_res_size = pci_resource_len(pdev, 0); 108 crspace_data = kvmalloc(cr_res_size, GFP_KERNEL); 109 if (crspace_data) { 110 for (offset = 0; offset < cr_res_size; offset += 4) 111 *(u32 *)(crspace_data + offset) = 112 readl(cr_space + offset); 113 114 err = devlink_region_snapshot_create(crdump->region_crspace, 115 crspace_data, id); 116 if (err) { 117 kvfree(crspace_data); 118 mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", 119 region_cr_space_str, id, err); 120 } else { 121 mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n", 122 id, region_cr_space_str); 123 } 124 } else { 125 mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n"); 126 } 127} 128 129static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev, 130 u8 __iomem *cr_space, 131 u32 id) 132{ 133 struct mlx4_fw_crdump *crdump = &dev->persist->crdump; 134 u8 *health_data; 135 int offset; 136 int err; 137 138 if (!crdump->region_fw_health) { 139 mlx4_err(dev, "crdump: fw-health region is NULL\n"); 140 return; 141 } 142 143 /* Try to collect health buffer */ 144 health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL); 145 if (health_data) { 146 u8 __iomem *health_buf_start = 147 cr_space + dev->caps.health_buffer_addrs; 148 149 for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4) 150 *(u32 *)(health_data + offset) = 151 readl(health_buf_start + offset); 152 153 err = devlink_region_snapshot_create(crdump->region_fw_health, 154 health_data, id); 155 if (err) { 156 kvfree(health_data); 157 mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", 158 region_fw_health_str, id, err); 159 } else { 160 mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n", 161 id, region_fw_health_str); 162 } 163 } else { 164 mlx4_err(dev, "crdump: Failed to allocate health buffer\n"); 165 } 166} 167 168int mlx4_crdump_collect(struct mlx4_dev *dev) 169{ 170 struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); 171 struct mlx4_fw_crdump *crdump = &dev->persist->crdump; 172 struct pci_dev *pdev = dev->persist->pdev; 173 unsigned long cr_res_size; 174 u8 __iomem *cr_space; 175 int err; 176 u32 id; 177 178 if (!dev->caps.health_buffer_addrs) { 179 mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n"); 180 return 0; 181 } 182 183 if (!crdump->snapshot_enable) { 184 mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n"); 185 return 0; 186 } 187 188 cr_res_size = pci_resource_len(pdev, 0); 189 190 cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size); 191 if (!cr_space) { 192 mlx4_err(dev, "crdump: Failed to map pci cr region\n"); 193 return -ENODEV; 194 } 195 196 /* Get the available snapshot ID for the dumps */ 197 err = devlink_region_snapshot_id_get(devlink, &id); 198 if (err) { 199 mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", err); 200 iounmap(cr_space); 201 return err; 202 } 203 204 crdump_enable_crspace_access(dev, cr_space); 205 206 /* Try to capture dumps */ 207 mlx4_crdump_collect_crspace(dev, cr_space, id); 208 mlx4_crdump_collect_fw_health(dev, cr_space, id); 209 210 /* Release reference on the snapshot id */ 211 devlink_region_snapshot_id_put(devlink, id); 212 213 crdump_disable_crspace_access(dev, cr_space); 214 215 iounmap(cr_space); 216 return 0; 217} 218 219int mlx4_crdump_init(struct mlx4_dev *dev) 220{ 221 struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); 222 struct mlx4_fw_crdump *crdump = &dev->persist->crdump; 223 struct pci_dev *pdev = dev->persist->pdev; 224 225 crdump->snapshot_enable = false; 226 227 /* Create cr-space region */ 228 crdump->region_crspace = 229 devlink_region_create(devlink, 230 ®ion_cr_space_ops, 231 MAX_NUM_OF_DUMPS_TO_STORE, 232 pci_resource_len(pdev, 0)); 233 if (IS_ERR(crdump->region_crspace)) 234 mlx4_warn(dev, "crdump: create devlink region %s err %ld\n", 235 region_cr_space_str, 236 PTR_ERR(crdump->region_crspace)); 237 238 /* Create fw-health region */ 239 crdump->region_fw_health = 240 devlink_region_create(devlink, 241 ®ion_fw_health_ops, 242 MAX_NUM_OF_DUMPS_TO_STORE, 243 HEALTH_BUFFER_SIZE); 244 if (IS_ERR(crdump->region_fw_health)) 245 mlx4_warn(dev, "crdump: create devlink region %s err %ld\n", 246 region_fw_health_str, 247 PTR_ERR(crdump->region_fw_health)); 248 249 return 0; 250} 251 252void mlx4_crdump_end(struct mlx4_dev *dev) 253{ 254 struct mlx4_fw_crdump *crdump = &dev->persist->crdump; 255 256 devlink_region_destroy(crdump->region_fw_health); 257 devlink_region_destroy(crdump->region_crspace); 258}