cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

card_utils.c (27783B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * IBM Accelerator Family 'GenWQE'
      4 *
      5 * (C) Copyright IBM Corp. 2013
      6 *
      7 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
      8 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
      9 * Author: Michael Jung <mijung@gmx.net>
     10 * Author: Michael Ruettger <michael@ibmra.de>
     11 */
     12
     13/*
     14 * Miscelanous functionality used in the other GenWQE driver parts.
     15 */
     16
     17#include <linux/kernel.h>
     18#include <linux/sched.h>
     19#include <linux/vmalloc.h>
     20#include <linux/page-flags.h>
     21#include <linux/scatterlist.h>
     22#include <linux/hugetlb.h>
     23#include <linux/iommu.h>
     24#include <linux/pci.h>
     25#include <linux/dma-mapping.h>
     26#include <linux/ctype.h>
     27#include <linux/module.h>
     28#include <linux/platform_device.h>
     29#include <linux/delay.h>
     30#include <linux/pgtable.h>
     31
     32#include "genwqe_driver.h"
     33#include "card_base.h"
     34#include "card_ddcb.h"
     35
     36/**
     37 * __genwqe_writeq() - Write 64-bit register
     38 * @cd:	        genwqe device descriptor
     39 * @byte_offs:  byte offset within BAR
     40 * @val:        64-bit value
     41 *
     42 * Return: 0 if success; < 0 if error
     43 */
     44int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
     45{
     46	struct pci_dev *pci_dev = cd->pci_dev;
     47
     48	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
     49		return -EIO;
     50
     51	if (cd->mmio == NULL)
     52		return -EIO;
     53
     54	if (pci_channel_offline(pci_dev))
     55		return -EIO;
     56
     57	__raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
     58	return 0;
     59}
     60
     61/**
     62 * __genwqe_readq() - Read 64-bit register
     63 * @cd:         genwqe device descriptor
     64 * @byte_offs:  offset within BAR
     65 *
     66 * Return: value from register
     67 */
     68u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
     69{
     70	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
     71		return 0xffffffffffffffffull;
     72
     73	if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
     74	    (byte_offs == IO_SLC_CFGREG_GFIR))
     75		return 0x000000000000ffffull;
     76
     77	if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
     78	    (byte_offs == IO_SLC_CFGREG_GFIR))
     79		return 0x00000000ffff0000ull;
     80
     81	if (cd->mmio == NULL)
     82		return 0xffffffffffffffffull;
     83
     84	return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
     85}
     86
     87/**
     88 * __genwqe_writel() - Write 32-bit register
     89 * @cd:	        genwqe device descriptor
     90 * @byte_offs:  byte offset within BAR
     91 * @val:        32-bit value
     92 *
     93 * Return: 0 if success; < 0 if error
     94 */
     95int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
     96{
     97	struct pci_dev *pci_dev = cd->pci_dev;
     98
     99	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
    100		return -EIO;
    101
    102	if (cd->mmio == NULL)
    103		return -EIO;
    104
    105	if (pci_channel_offline(pci_dev))
    106		return -EIO;
    107
    108	__raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
    109	return 0;
    110}
    111
    112/**
    113 * __genwqe_readl() - Read 32-bit register
    114 * @cd:         genwqe device descriptor
    115 * @byte_offs:  offset within BAR
    116 *
    117 * Return: Value from register
    118 */
    119u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
    120{
    121	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
    122		return 0xffffffff;
    123
    124	if (cd->mmio == NULL)
    125		return 0xffffffff;
    126
    127	return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
    128}
    129
    130/**
    131 * genwqe_read_app_id() - Extract app_id
    132 * @cd:	        genwqe device descriptor
    133 * @app_name:   carrier used to pass-back name
    134 * @len:        length of data for name
    135 *
    136 * app_unitcfg need to be filled with valid data first
    137 */
    138int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
    139{
    140	int i, j;
    141	u32 app_id = (u32)cd->app_unitcfg;
    142
    143	memset(app_name, 0, len);
    144	for (i = 0, j = 0; j < min(len, 4); j++) {
    145		char ch = (char)((app_id >> (24 - j*8)) & 0xff);
    146
    147		if (ch == ' ')
    148			continue;
    149		app_name[i++] = isprint(ch) ? ch : 'X';
    150	}
    151	return i;
    152}
    153
    154/**
    155 * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
    156 *
    157 * Existing kernel functions seem to use a different polynom,
    158 * therefore we could not use them here.
    159 *
    160 * Genwqe's Polynomial = 0x20044009
    161 */
    162#define CRC32_POLYNOMIAL	0x20044009
    163static u32 crc32_tab[256];	/* crc32 lookup table */
    164
    165void genwqe_init_crc32(void)
    166{
    167	int i, j;
    168	u32 crc;
    169
    170	for (i = 0;  i < 256;  i++) {
    171		crc = i << 24;
    172		for (j = 0;  j < 8;  j++) {
    173			if (crc & 0x80000000)
    174				crc = (crc << 1) ^ CRC32_POLYNOMIAL;
    175			else
    176				crc = (crc << 1);
    177		}
    178		crc32_tab[i] = crc;
    179	}
    180}
    181
    182/**
    183 * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
    184 * @buff:       pointer to data buffer
    185 * @len:        length of data for calculation
    186 * @init:       initial crc (0xffffffff at start)
    187 *
    188 * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
    189 *
    190 * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
    191 * result in a crc32 of 0xf33cb7d3.
    192 *
    193 * The existing kernel crc functions did not cover this polynom yet.
    194 *
    195 * Return: crc32 checksum.
    196 */
    197u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
    198{
    199	int i;
    200	u32 crc;
    201
    202	crc = init;
    203	while (len--) {
    204		i = ((crc >> 24) ^ *buff++) & 0xFF;
    205		crc = (crc << 8) ^ crc32_tab[i];
    206	}
    207	return crc;
    208}
    209
    210void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
    211			       dma_addr_t *dma_handle)
    212{
    213	if (get_order(size) >= MAX_ORDER)
    214		return NULL;
    215
    216	return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
    217				  GFP_KERNEL);
    218}
    219
    220void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
    221			     void *vaddr, dma_addr_t dma_handle)
    222{
    223	if (vaddr == NULL)
    224		return;
    225
    226	dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
    227}
    228
    229static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
    230			      int num_pages)
    231{
    232	int i;
    233	struct pci_dev *pci_dev = cd->pci_dev;
    234
    235	for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
    236		dma_unmap_page(&pci_dev->dev, dma_list[i], PAGE_SIZE,
    237			       DMA_BIDIRECTIONAL);
    238		dma_list[i] = 0x0;
    239	}
    240}
    241
    242static int genwqe_map_pages(struct genwqe_dev *cd,
    243			   struct page **page_list, int num_pages,
    244			   dma_addr_t *dma_list)
    245{
    246	int i;
    247	struct pci_dev *pci_dev = cd->pci_dev;
    248
    249	/* establish DMA mapping for requested pages */
    250	for (i = 0; i < num_pages; i++) {
    251		dma_addr_t daddr;
    252
    253		dma_list[i] = 0x0;
    254		daddr = dma_map_page(&pci_dev->dev, page_list[i],
    255				     0,	 /* map_offs */
    256				     PAGE_SIZE,
    257				     DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
    258
    259		if (dma_mapping_error(&pci_dev->dev, daddr)) {
    260			dev_err(&pci_dev->dev,
    261				"[%s] err: no dma addr daddr=%016llx!\n",
    262				__func__, (long long)daddr);
    263			goto err;
    264		}
    265
    266		dma_list[i] = daddr;
    267	}
    268	return 0;
    269
    270 err:
    271	genwqe_unmap_pages(cd, dma_list, num_pages);
    272	return -EIO;
    273}
    274
    275static int genwqe_sgl_size(int num_pages)
    276{
    277	int len, num_tlb = num_pages / 7;
    278
    279	len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
    280	return roundup(len, PAGE_SIZE);
    281}
    282
    283/*
    284 * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
    285 *
    286 * Allocates memory for sgl and overlapping pages. Pages which might
    287 * overlap other user-space memory blocks are being cached for DMAs,
    288 * such that we do not run into syncronization issues. Data is copied
    289 * from user-space into the cached pages.
    290 */
    291int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
    292			  void __user *user_addr, size_t user_size, int write)
    293{
    294	int ret = -ENOMEM;
    295	struct pci_dev *pci_dev = cd->pci_dev;
    296
    297	sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
    298	sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
    299	sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
    300	sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
    301
    302	dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
    303		__func__, user_addr, user_size, sgl->nr_pages,
    304		sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
    305
    306	sgl->user_addr = user_addr;
    307	sgl->user_size = user_size;
    308	sgl->write = write;
    309	sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
    310
    311	if (get_order(sgl->sgl_size) > MAX_ORDER) {
    312		dev_err(&pci_dev->dev,
    313			"[%s] err: too much memory requested!\n", __func__);
    314		return ret;
    315	}
    316
    317	sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
    318					     &sgl->sgl_dma_addr);
    319	if (sgl->sgl == NULL) {
    320		dev_err(&pci_dev->dev,
    321			"[%s] err: no memory available!\n", __func__);
    322		return ret;
    323	}
    324
    325	/* Only use buffering on incomplete pages */
    326	if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
    327		sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
    328						       &sgl->fpage_dma_addr);
    329		if (sgl->fpage == NULL)
    330			goto err_out;
    331
    332		/* Sync with user memory */
    333		if (copy_from_user(sgl->fpage + sgl->fpage_offs,
    334				   user_addr, sgl->fpage_size)) {
    335			ret = -EFAULT;
    336			goto err_out;
    337		}
    338	}
    339	if (sgl->lpage_size != 0) {
    340		sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
    341						       &sgl->lpage_dma_addr);
    342		if (sgl->lpage == NULL)
    343			goto err_out1;
    344
    345		/* Sync with user memory */
    346		if (copy_from_user(sgl->lpage, user_addr + user_size -
    347				   sgl->lpage_size, sgl->lpage_size)) {
    348			ret = -EFAULT;
    349			goto err_out2;
    350		}
    351	}
    352	return 0;
    353
    354 err_out2:
    355	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
    356				 sgl->lpage_dma_addr);
    357	sgl->lpage = NULL;
    358	sgl->lpage_dma_addr = 0;
    359 err_out1:
    360	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
    361				 sgl->fpage_dma_addr);
    362	sgl->fpage = NULL;
    363	sgl->fpage_dma_addr = 0;
    364 err_out:
    365	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
    366				 sgl->sgl_dma_addr);
    367	sgl->sgl = NULL;
    368	sgl->sgl_dma_addr = 0;
    369	sgl->sgl_size = 0;
    370
    371	return ret;
    372}
    373
    374int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
    375		     dma_addr_t *dma_list)
    376{
    377	int i = 0, j = 0, p;
    378	unsigned long dma_offs, map_offs;
    379	dma_addr_t prev_daddr = 0;
    380	struct sg_entry *s, *last_s = NULL;
    381	size_t size = sgl->user_size;
    382
    383	dma_offs = 128;		/* next block if needed/dma_offset */
    384	map_offs = sgl->fpage_offs; /* offset in first page */
    385
    386	s = &sgl->sgl[0];	/* first set of 8 entries */
    387	p = 0;			/* page */
    388	while (p < sgl->nr_pages) {
    389		dma_addr_t daddr;
    390		unsigned int size_to_map;
    391
    392		/* always write the chaining entry, cleanup is done later */
    393		j = 0;
    394		s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
    395		s[j].len	 = cpu_to_be32(128);
    396		s[j].flags	 = cpu_to_be32(SG_CHAINED);
    397		j++;
    398
    399		while (j < 8) {
    400			/* DMA mapping for requested page, offs, size */
    401			size_to_map = min(size, PAGE_SIZE - map_offs);
    402
    403			if ((p == 0) && (sgl->fpage != NULL)) {
    404				daddr = sgl->fpage_dma_addr + map_offs;
    405
    406			} else if ((p == sgl->nr_pages - 1) &&
    407				   (sgl->lpage != NULL)) {
    408				daddr = sgl->lpage_dma_addr;
    409			} else {
    410				daddr = dma_list[p] + map_offs;
    411			}
    412
    413			size -= size_to_map;
    414			map_offs = 0;
    415
    416			if (prev_daddr == daddr) {
    417				u32 prev_len = be32_to_cpu(last_s->len);
    418
    419				/* pr_info("daddr combining: "
    420					"%016llx/%08x -> %016llx\n",
    421					prev_daddr, prev_len, daddr); */
    422
    423				last_s->len = cpu_to_be32(prev_len +
    424							  size_to_map);
    425
    426				p++; /* process next page */
    427				if (p == sgl->nr_pages)
    428					goto fixup;  /* nothing to do */
    429
    430				prev_daddr = daddr + size_to_map;
    431				continue;
    432			}
    433
    434			/* start new entry */
    435			s[j].target_addr = cpu_to_be64(daddr);
    436			s[j].len	 = cpu_to_be32(size_to_map);
    437			s[j].flags	 = cpu_to_be32(SG_DATA);
    438			prev_daddr = daddr + size_to_map;
    439			last_s = &s[j];
    440			j++;
    441
    442			p++;	/* process next page */
    443			if (p == sgl->nr_pages)
    444				goto fixup;  /* nothing to do */
    445		}
    446		dma_offs += 128;
    447		s += 8;		/* continue 8 elements further */
    448	}
    449 fixup:
    450	if (j == 1) {		/* combining happened on last entry! */
    451		s -= 8;		/* full shift needed on previous sgl block */
    452		j =  7;		/* shift all elements */
    453	}
    454
    455	for (i = 0; i < j; i++)	/* move elements 1 up */
    456		s[i] = s[i + 1];
    457
    458	s[i].target_addr = cpu_to_be64(0);
    459	s[i].len	 = cpu_to_be32(0);
    460	s[i].flags	 = cpu_to_be32(SG_END_LIST);
    461	return 0;
    462}
    463
    464/**
    465 * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
    466 * @cd:	        genwqe device descriptor
    467 * @sgl:        scatter gather list describing user-space memory
    468 *
    469 * After the DMA transfer has been completed we free the memory for
    470 * the sgl and the cached pages. Data is being transferred from cached
    471 * pages into user-space buffers.
    472 */
    473int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
    474{
    475	int rc = 0;
    476	size_t offset;
    477	unsigned long res;
    478	struct pci_dev *pci_dev = cd->pci_dev;
    479
    480	if (sgl->fpage) {
    481		if (sgl->write) {
    482			res = copy_to_user(sgl->user_addr,
    483				sgl->fpage + sgl->fpage_offs, sgl->fpage_size);
    484			if (res) {
    485				dev_err(&pci_dev->dev,
    486					"[%s] err: copying fpage! (res=%lu)\n",
    487					__func__, res);
    488				rc = -EFAULT;
    489			}
    490		}
    491		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
    492					 sgl->fpage_dma_addr);
    493		sgl->fpage = NULL;
    494		sgl->fpage_dma_addr = 0;
    495	}
    496	if (sgl->lpage) {
    497		if (sgl->write) {
    498			offset = sgl->user_size - sgl->lpage_size;
    499			res = copy_to_user(sgl->user_addr + offset, sgl->lpage,
    500					   sgl->lpage_size);
    501			if (res) {
    502				dev_err(&pci_dev->dev,
    503					"[%s] err: copying lpage! (res=%lu)\n",
    504					__func__, res);
    505				rc = -EFAULT;
    506			}
    507		}
    508		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
    509					 sgl->lpage_dma_addr);
    510		sgl->lpage = NULL;
    511		sgl->lpage_dma_addr = 0;
    512	}
    513	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
    514				 sgl->sgl_dma_addr);
    515
    516	sgl->sgl = NULL;
    517	sgl->sgl_dma_addr = 0x0;
    518	sgl->sgl_size = 0;
    519	return rc;
    520}
    521
    522/**
    523 * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
    524 * @cd:         pointer to genwqe device
    525 * @m:          mapping params
    526 * @uaddr:      user virtual address
    527 * @size:       size of memory to be mapped
    528 *
    529 * We need to think about how we could speed this up. Of course it is
    530 * not a good idea to do this over and over again, like we are
    531 * currently doing it. Nevertheless, I am curious where on the path
    532 * the performance is spend. Most probably within the memory
    533 * allocation functions, but maybe also in the DMA mapping code.
    534 *
    535 * Restrictions: The maximum size of the possible mapping currently depends
    536 *               on the amount of memory we can get using kzalloc() for the
    537 *               page_list and pci_alloc_consistent for the sg_list.
    538 *               The sg_list is currently itself not scattered, which could
    539 *               be fixed with some effort. The page_list must be split into
    540 *               PAGE_SIZE chunks too. All that will make the complicated
    541 *               code more complicated.
    542 *
    543 * Return: 0 if success
    544 */
    545int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
    546		     unsigned long size)
    547{
    548	int rc = -EINVAL;
    549	unsigned long data, offs;
    550	struct pci_dev *pci_dev = cd->pci_dev;
    551
    552	if ((uaddr == NULL) || (size == 0)) {
    553		m->size = 0;	/* mark unused and not added */
    554		return -EINVAL;
    555	}
    556	m->u_vaddr = uaddr;
    557	m->size    = size;
    558
    559	/* determine space needed for page_list. */
    560	data = (unsigned long)uaddr;
    561	offs = offset_in_page(data);
    562	if (size > ULONG_MAX - PAGE_SIZE - offs) {
    563		m->size = 0;	/* mark unused and not added */
    564		return -EINVAL;
    565	}
    566	m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
    567
    568	m->page_list = kcalloc(m->nr_pages,
    569			       sizeof(struct page *) + sizeof(dma_addr_t),
    570			       GFP_KERNEL);
    571	if (!m->page_list) {
    572		dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
    573		m->nr_pages = 0;
    574		m->u_vaddr = NULL;
    575		m->size = 0;	/* mark unused and not added */
    576		return -ENOMEM;
    577	}
    578	m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
    579
    580	/* pin user pages in memory */
    581	rc = pin_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
    582				 m->nr_pages,
    583				 m->write ? FOLL_WRITE : 0,	/* readable/writable */
    584				 m->page_list);	/* ptrs to pages */
    585	if (rc < 0)
    586		goto fail_pin_user_pages;
    587
    588	/* assumption: pin_user_pages can be killed by signals. */
    589	if (rc < m->nr_pages) {
    590		unpin_user_pages_dirty_lock(m->page_list, rc, m->write);
    591		rc = -EFAULT;
    592		goto fail_pin_user_pages;
    593	}
    594
    595	rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
    596	if (rc != 0)
    597		goto fail_free_user_pages;
    598
    599	return 0;
    600
    601 fail_free_user_pages:
    602	unpin_user_pages_dirty_lock(m->page_list, m->nr_pages, m->write);
    603
    604 fail_pin_user_pages:
    605	kfree(m->page_list);
    606	m->page_list = NULL;
    607	m->dma_list = NULL;
    608	m->nr_pages = 0;
    609	m->u_vaddr = NULL;
    610	m->size = 0;		/* mark unused and not added */
    611	return rc;
    612}
    613
    614/**
    615 * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
    616 *                        memory
    617 * @cd:         pointer to genwqe device
    618 * @m:          mapping params
    619 */
    620int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m)
    621{
    622	struct pci_dev *pci_dev = cd->pci_dev;
    623
    624	if (!dma_mapping_used(m)) {
    625		dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
    626			__func__, m);
    627		return -EINVAL;
    628	}
    629
    630	if (m->dma_list)
    631		genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
    632
    633	if (m->page_list) {
    634		unpin_user_pages_dirty_lock(m->page_list, m->nr_pages,
    635					    m->write);
    636		kfree(m->page_list);
    637		m->page_list = NULL;
    638		m->dma_list = NULL;
    639		m->nr_pages = 0;
    640	}
    641
    642	m->u_vaddr = NULL;
    643	m->size = 0;		/* mark as unused and not added */
    644	return 0;
    645}
    646
    647/**
    648 * genwqe_card_type() - Get chip type SLU Configuration Register
    649 * @cd:         pointer to the genwqe device descriptor
    650 * Return: 0: Altera Stratix-IV 230
    651 *         1: Altera Stratix-IV 530
    652 *         2: Altera Stratix-V A4
    653 *         3: Altera Stratix-V A7
    654 */
    655u8 genwqe_card_type(struct genwqe_dev *cd)
    656{
    657	u64 card_type = cd->slu_unitcfg;
    658
    659	return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
    660}
    661
    662/**
    663 * genwqe_card_reset() - Reset the card
    664 * @cd:         pointer to the genwqe device descriptor
    665 */
    666int genwqe_card_reset(struct genwqe_dev *cd)
    667{
    668	u64 softrst;
    669	struct pci_dev *pci_dev = cd->pci_dev;
    670
    671	if (!genwqe_is_privileged(cd))
    672		return -ENODEV;
    673
    674	/* new SL */
    675	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
    676	msleep(1000);
    677	__genwqe_readq(cd, IO_HSU_FIR_CLR);
    678	__genwqe_readq(cd, IO_APP_FIR_CLR);
    679	__genwqe_readq(cd, IO_SLU_FIR_CLR);
    680
    681	/*
    682	 * Read-modify-write to preserve the stealth bits
    683	 *
    684	 * For SL >= 039, Stealth WE bit allows removing
    685	 * the read-modify-wrote.
    686	 * r-m-w may require a mask 0x3C to avoid hitting hard
    687	 * reset again for error reset (should be 0, chicken).
    688	 */
    689	softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
    690	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
    691
    692	/* give ERRORRESET some time to finish */
    693	msleep(50);
    694
    695	if (genwqe_need_err_masking(cd)) {
    696		dev_info(&pci_dev->dev,
    697			 "[%s] masking errors for old bitstreams\n", __func__);
    698		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
    699	}
    700	return 0;
    701}
    702
    703int genwqe_read_softreset(struct genwqe_dev *cd)
    704{
    705	u64 bitstream;
    706
    707	if (!genwqe_is_privileged(cd))
    708		return -ENODEV;
    709
    710	bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
    711	cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
    712	return 0;
    713}
    714
    715/**
    716 * genwqe_set_interrupt_capability() - Configure MSI capability structure
    717 * @cd:         pointer to the device
    718 * @count:      number of vectors to allocate
    719 * Return: 0 if no error
    720 */
    721int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
    722{
    723	int rc;
    724
    725	rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI);
    726	if (rc < 0)
    727		return rc;
    728	return 0;
    729}
    730
    731/**
    732 * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
    733 * @cd:         pointer to the device
    734 */
    735void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
    736{
    737	pci_free_irq_vectors(cd->pci_dev);
    738}
    739
    740/**
    741 * set_reg_idx() - Fill array with data. Ignore illegal offsets.
    742 * @cd:         card device
    743 * @r:          debug register array
    744 * @i:          index to desired entry
    745 * @m:          maximum possible entries
    746 * @addr:       addr which is read
    747 * @idx:        index in debug array
    748 * @val:        read value
    749 */
    750static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
    751		       unsigned int *i, unsigned int m, u32 addr, u32 idx,
    752		       u64 val)
    753{
    754	if (WARN_ON_ONCE(*i >= m))
    755		return -EFAULT;
    756
    757	r[*i].addr = addr;
    758	r[*i].idx = idx;
    759	r[*i].val = val;
    760	++*i;
    761	return 0;
    762}
    763
    764static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
    765		   unsigned int *i, unsigned int m, u32 addr, u64 val)
    766{
    767	return set_reg_idx(cd, r, i, m, addr, 0, val);
    768}
    769
    770int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
    771			 unsigned int max_regs, int all)
    772{
    773	unsigned int i, j, idx = 0;
    774	u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
    775	u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
    776
    777	/* Global FIR */
    778	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
    779	set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
    780
    781	/* UnitCfg for SLU */
    782	sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
    783	set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
    784
    785	/* UnitCfg for APP */
    786	appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
    787	set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
    788
    789	/* Check all chip Units */
    790	for (i = 0; i < GENWQE_MAX_UNITS; i++) {
    791
    792		/* Unit FIR */
    793		ufir_addr = (i << 24) | 0x008;
    794		ufir = __genwqe_readq(cd, ufir_addr);
    795		set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
    796
    797		/* Unit FEC */
    798		ufec_addr = (i << 24) | 0x018;
    799		ufec = __genwqe_readq(cd, ufec_addr);
    800		set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
    801
    802		for (j = 0; j < 64; j++) {
    803			/* wherever there is a primary 1, read the 2ndary */
    804			if (!all && (!(ufir & (1ull << j))))
    805				continue;
    806
    807			sfir_addr = (i << 24) | (0x100 + 8 * j);
    808			sfir = __genwqe_readq(cd, sfir_addr);
    809			set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
    810
    811			sfec_addr = (i << 24) | (0x300 + 8 * j);
    812			sfec = __genwqe_readq(cd, sfec_addr);
    813			set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
    814		}
    815	}
    816
    817	/* fill with invalid data until end */
    818	for (i = idx; i < max_regs; i++) {
    819		regs[i].addr = 0xffffffff;
    820		regs[i].val = 0xffffffffffffffffull;
    821	}
    822	return idx;
    823}
    824
    825/**
    826 * genwqe_ffdc_buff_size() - Calculates the number of dump registers
    827 * @cd:	        genwqe device descriptor
    828 * @uid:	unit ID
    829 */
    830int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
    831{
    832	int entries = 0, ring, traps, traces, trace_entries;
    833	u32 eevptr_addr, l_addr, d_len, d_type;
    834	u64 eevptr, val, addr;
    835
    836	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
    837	eevptr = __genwqe_readq(cd, eevptr_addr);
    838
    839	if ((eevptr != 0x0) && (eevptr != -1ull)) {
    840		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
    841
    842		while (1) {
    843			val = __genwqe_readq(cd, l_addr);
    844
    845			if ((val == 0x0) || (val == -1ull))
    846				break;
    847
    848			/* 38:24 */
    849			d_len  = (val & 0x0000007fff000000ull) >> 24;
    850
    851			/* 39 */
    852			d_type = (val & 0x0000008000000000ull) >> 36;
    853
    854			if (d_type) {	/* repeat */
    855				entries += d_len;
    856			} else {	/* size in bytes! */
    857				entries += d_len >> 3;
    858			}
    859
    860			l_addr += 8;
    861		}
    862	}
    863
    864	for (ring = 0; ring < 8; ring++) {
    865		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
    866		val = __genwqe_readq(cd, addr);
    867
    868		if ((val == 0x0ull) || (val == -1ull))
    869			continue;
    870
    871		traps = (val >> 24) & 0xff;
    872		traces = (val >> 16) & 0xff;
    873		trace_entries = val & 0xffff;
    874
    875		entries += traps + (traces * trace_entries);
    876	}
    877	return entries;
    878}
    879
    880/**
    881 * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
    882 * @cd:	        genwqe device descriptor
    883 * @uid:	unit ID
    884 * @regs:       register information
    885 * @max_regs:   number of register entries
    886 */
    887int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
    888			  struct genwqe_reg *regs, unsigned int max_regs)
    889{
    890	int i, traps, traces, trace, trace_entries, trace_entry, ring;
    891	unsigned int idx = 0;
    892	u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
    893	u64 eevptr, e, val, addr;
    894
    895	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
    896	eevptr = __genwqe_readq(cd, eevptr_addr);
    897
    898	if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
    899		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
    900		while (1) {
    901			e = __genwqe_readq(cd, l_addr);
    902			if ((e == 0x0) || (e == 0xffffffffffffffffull))
    903				break;
    904
    905			d_addr = (e & 0x0000000000ffffffull);	    /* 23:0 */
    906			d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
    907			d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
    908			d_addr |= GENWQE_UID_OFFS(uid);
    909
    910			if (d_type) {
    911				for (i = 0; i < (int)d_len; i++) {
    912					val = __genwqe_readq(cd, d_addr);
    913					set_reg_idx(cd, regs, &idx, max_regs,
    914						    d_addr, i, val);
    915				}
    916			} else {
    917				d_len >>= 3; /* Size in bytes! */
    918				for (i = 0; i < (int)d_len; i++, d_addr += 8) {
    919					val = __genwqe_readq(cd, d_addr);
    920					set_reg_idx(cd, regs, &idx, max_regs,
    921						    d_addr, 0, val);
    922				}
    923			}
    924			l_addr += 8;
    925		}
    926	}
    927
    928	/*
    929	 * To save time, there are only 6 traces poplulated on Uid=2,
    930	 * Ring=1. each with iters=512.
    931	 */
    932	for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
    933					      2...7 are ASI rings */
    934		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
    935		val = __genwqe_readq(cd, addr);
    936
    937		if ((val == 0x0ull) || (val == -1ull))
    938			continue;
    939
    940		traps = (val >> 24) & 0xff;	/* Number of Traps	*/
    941		traces = (val >> 16) & 0xff;	/* Number of Traces	*/
    942		trace_entries = val & 0xffff;	/* Entries per trace	*/
    943
    944		/* Note: This is a combined loop that dumps both the traps */
    945		/* (for the trace == 0 case) as well as the traces 1 to    */
    946		/* 'traces'.						   */
    947		for (trace = 0; trace <= traces; trace++) {
    948			u32 diag_sel =
    949				GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
    950
    951			addr = (GENWQE_UID_OFFS(uid) |
    952				IO_EXTENDED_DIAG_SELECTOR);
    953			__genwqe_writeq(cd, addr, diag_sel);
    954
    955			for (trace_entry = 0;
    956			     trace_entry < (trace ? trace_entries : traps);
    957			     trace_entry++) {
    958				addr = (GENWQE_UID_OFFS(uid) |
    959					IO_EXTENDED_DIAG_READ_MBX);
    960				val = __genwqe_readq(cd, addr);
    961				set_reg_idx(cd, regs, &idx, max_regs, addr,
    962					    (diag_sel<<16) | trace_entry, val);
    963			}
    964		}
    965	}
    966	return 0;
    967}
    968
    969/**
    970 * genwqe_write_vreg() - Write register in virtual window
    971 * @cd:	        genwqe device descriptor
    972 * @reg:	register (byte) offset within BAR
    973 * @val:	value to write
    974 * @func:	PCI virtual function
    975 *
    976 * Note, these registers are only accessible to the PF through the
    977 * VF-window. It is not intended for the VF to access.
    978 */
    979int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
    980{
    981	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
    982	__genwqe_writeq(cd, reg, val);
    983	return 0;
    984}
    985
    986/**
    987 * genwqe_read_vreg() - Read register in virtual window
    988 * @cd:	        genwqe device descriptor
    989 * @reg:	register (byte) offset within BAR
    990 * @func:	PCI virtual function
    991 *
    992 * Note, these registers are only accessible to the PF through the
    993 * VF-window. It is not intended for the VF to access.
    994 */
    995u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
    996{
    997	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
    998	return __genwqe_readq(cd, reg);
    999}
   1000
   1001/**
   1002 * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
   1003 * @cd:	        genwqe device descriptor
   1004 *
   1005 * Note: From a design perspective it turned out to be a bad idea to
   1006 * use codes here to specifiy the frequency/speed values. An old
   1007 * driver cannot understand new codes and is therefore always a
   1008 * problem. Better is to measure out the value or put the
   1009 * speed/frequency directly into a register which is always a valid
   1010 * value for old as well as for new software.
   1011 *
   1012 * Return: Card clock in MHz
   1013 */
   1014int genwqe_base_clock_frequency(struct genwqe_dev *cd)
   1015{
   1016	u16 speed;		/*         MHz  MHz  MHz  MHz */
   1017	static const int speed_grade[] = { 250, 200, 166, 175 };
   1018
   1019	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
   1020	if (speed >= ARRAY_SIZE(speed_grade))
   1021		return 0;	/* illegal value */
   1022
   1023	return speed_grade[speed];
   1024}
   1025
   1026/**
   1027 * genwqe_stop_traps() - Stop traps
   1028 * @cd:	        genwqe device descriptor
   1029 *
   1030 * Before reading out the analysis data, we need to stop the traps.
   1031 */
   1032void genwqe_stop_traps(struct genwqe_dev *cd)
   1033{
   1034	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
   1035}
   1036
   1037/**
   1038 * genwqe_start_traps() - Start traps
   1039 * @cd:	        genwqe device descriptor
   1040 *
   1041 * After having read the data, we can/must enable the traps again.
   1042 */
   1043void genwqe_start_traps(struct genwqe_dev *cd)
   1044{
   1045	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
   1046
   1047	if (genwqe_need_err_masking(cd))
   1048		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
   1049}