cxllib.c (6497B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright 2017 IBM Corp. 4 */ 5 6#include <linux/hugetlb.h> 7#include <linux/sched/mm.h> 8#include <asm/opal-api.h> 9#include <asm/pnv-pci.h> 10#include <misc/cxllib.h> 11 12#include "cxl.h" 13 14#define CXL_INVALID_DRA ~0ull 15#define CXL_DUMMY_READ_SIZE 128 16#define CXL_DUMMY_READ_ALIGN 8 17#define CXL_CAPI_WINDOW_START 0x2000000000000ull 18#define CXL_CAPI_WINDOW_LOG_SIZE 48 19#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 20 21 22bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 23{ 24 int rc; 25 u32 phb_index; 26 u64 chip_id, capp_unit_id; 27 28 /* No flags currently supported */ 29 if (flags) 30 return false; 31 32 if (!cpu_has_feature(CPU_FTR_HVMODE)) 33 return false; 34 35 if (!cxl_is_power9()) 36 return false; 37 38 if (cxl_slot_is_switched(dev)) 39 return false; 40 41 /* on p9, some pci slots are not connected to a CAPP unit */ 42 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 43 if (rc) 44 return false; 45 46 return true; 47} 48EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 49 50static DEFINE_MUTEX(dra_mutex); 51static u64 dummy_read_addr = CXL_INVALID_DRA; 52 53static int allocate_dummy_read_buf(void) 54{ 55 u64 buf, vaddr; 56 size_t buf_size; 57 58 /* 59 * Dummy read buffer is 128-byte long, aligned on a 60 * 256-byte boundary and we need the physical address. 61 */ 62 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 63 buf = (u64) kzalloc(buf_size, GFP_KERNEL); 64 if (!buf) 65 return -ENOMEM; 66 67 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 68 (~0ull << CXL_DUMMY_READ_ALIGN); 69 70 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 71 "Dummy read buffer alignment issue"); 72 dummy_read_addr = virt_to_phys((void *) vaddr); 73 return 0; 74} 75 76int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 77{ 78 int rc; 79 u32 phb_index; 80 u64 chip_id, capp_unit_id; 81 82 if (!cpu_has_feature(CPU_FTR_HVMODE)) 83 return -EINVAL; 84 85 mutex_lock(&dra_mutex); 86 if (dummy_read_addr == CXL_INVALID_DRA) { 87 rc = allocate_dummy_read_buf(); 88 if (rc) { 89 mutex_unlock(&dra_mutex); 90 return rc; 91 } 92 } 93 mutex_unlock(&dra_mutex); 94 95 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 96 if (rc) 97 return rc; 98 99 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 100 if (rc) 101 return rc; 102 103 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 104 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 105 cfg->bar_addr = CXL_CAPI_WINDOW_START; 106 cfg->dra = dummy_read_addr; 107 return 0; 108} 109EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 110 111int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 112 unsigned long flags) 113{ 114 int rc = 0; 115 116 if (!cpu_has_feature(CPU_FTR_HVMODE)) 117 return -EINVAL; 118 119 switch (mode) { 120 case CXL_MODE_PCI: 121 /* 122 * We currently don't support going back to PCI mode 123 * However, we'll turn the invalidations off, so that 124 * the firmware doesn't have to ack them and can do 125 * things like reset, etc.. with no worries. 126 * So always return EPERM (can't go back to PCI) or 127 * EBUSY if we couldn't even turn off snooping 128 */ 129 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 130 if (rc) 131 rc = -EBUSY; 132 else 133 rc = -EPERM; 134 break; 135 case CXL_MODE_CXL: 136 /* DMA only supported on TVT1 for the time being */ 137 if (flags != CXL_MODE_DMA_TVT1) 138 return -EINVAL; 139 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 140 if (rc) 141 return rc; 142 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 143 break; 144 default: 145 rc = -EINVAL; 146 } 147 return rc; 148} 149EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 150 151/* 152 * When switching the PHB to capi mode, the TVT#1 entry for 153 * the Partitionable Endpoint is set in bypass mode, like 154 * in PCI mode. 155 * Configure the device dma to use TVT#1, which is done 156 * by calling dma_set_mask() with a mask large enough. 157 */ 158int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 159{ 160 int rc; 161 162 if (flags) 163 return -EINVAL; 164 165 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 166 return rc; 167} 168EXPORT_SYMBOL_GPL(cxllib_set_device_dma); 169 170int cxllib_get_PE_attributes(struct task_struct *task, 171 unsigned long translation_mode, 172 struct cxllib_pe_attributes *attr) 173{ 174 if (translation_mode != CXL_TRANSLATED_MODE && 175 translation_mode != CXL_REAL_MODE) 176 return -EINVAL; 177 178 attr->sr = cxl_calculate_sr(false, 179 task == NULL, 180 translation_mode == CXL_REAL_MODE, 181 true); 182 attr->lpid = mfspr(SPRN_LPID); 183 if (task) { 184 struct mm_struct *mm = get_task_mm(task); 185 if (mm == NULL) 186 return -EINVAL; 187 /* 188 * Caller is keeping a reference on mm_users for as long 189 * as XSL uses the memory context 190 */ 191 attr->pid = mm->context.id; 192 mmput(mm); 193 attr->tid = task->thread.tidr; 194 } else { 195 attr->pid = 0; 196 attr->tid = 0; 197 } 198 return 0; 199} 200EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 201 202static int get_vma_info(struct mm_struct *mm, u64 addr, 203 u64 *vma_start, u64 *vma_end, 204 unsigned long *page_size) 205{ 206 struct vm_area_struct *vma = NULL; 207 int rc = 0; 208 209 mmap_read_lock(mm); 210 211 vma = find_vma(mm, addr); 212 if (!vma) { 213 rc = -EFAULT; 214 goto out; 215 } 216 *page_size = vma_kernel_pagesize(vma); 217 *vma_start = vma->vm_start; 218 *vma_end = vma->vm_end; 219out: 220 mmap_read_unlock(mm); 221 return rc; 222} 223 224int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 225{ 226 int rc; 227 u64 dar, vma_start, vma_end; 228 unsigned long page_size; 229 230 if (mm == NULL) 231 return -EFAULT; 232 233 /* 234 * The buffer we have to process can extend over several pages 235 * and may also cover several VMAs. 236 * We iterate over all the pages. The page size could vary 237 * between VMAs. 238 */ 239 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 240 if (rc) 241 return rc; 242 243 for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 244 dar += page_size) { 245 if (dar < vma_start || dar >= vma_end) { 246 /* 247 * We don't hold mm->mmap_lock while iterating, since 248 * the lock is required by one of the lower-level page 249 * fault processing functions and it could 250 * create a deadlock. 251 * 252 * It means the VMAs can be altered between 2 253 * loop iterations and we could theoretically 254 * miss a page (however unlikely). But that's 255 * not really a problem, as the driver will 256 * retry access, get another page fault on the 257 * missing page and call us again. 258 */ 259 rc = get_vma_info(mm, dar, &vma_start, &vma_end, 260 &page_size); 261 if (rc) 262 return rc; 263 } 264 265 rc = cxl_handle_mm_fault(mm, flags, dar); 266 if (rc) 267 return -EFAULT; 268 } 269 return 0; 270} 271EXPORT_SYMBOL_GPL(cxllib_handle_fault);