cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

icm.c (11988B)


      1/*
      2 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
      3 * Copyright (c) 2006, 2007 Cisco Systems, Inc.  All rights reserved.
      4 *
      5 * This software is available to you under a choice of one of two
      6 * licenses.  You may choose to be licensed under the terms of the GNU
      7 * General Public License (GPL) Version 2, available from the file
      8 * COPYING in the main directory of this source tree, or the
      9 * OpenIB.org BSD license below:
     10 *
     11 *     Redistribution and use in source and binary forms, with or
     12 *     without modification, are permitted provided that the following
     13 *     conditions are met:
     14 *
     15 *      - Redistributions of source code must retain the above
     16 *        copyright notice, this list of conditions and the following
     17 *        disclaimer.
     18 *
     19 *      - Redistributions in binary form must reproduce the above
     20 *        copyright notice, this list of conditions and the following
     21 *        disclaimer in the documentation and/or other materials
     22 *        provided with the distribution.
     23 *
     24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     31 * SOFTWARE.
     32 */
     33
     34#include <linux/errno.h>
     35#include <linux/mm.h>
     36#include <linux/scatterlist.h>
     37#include <linux/slab.h>
     38
     39#include <linux/mlx4/cmd.h>
     40
     41#include "mlx4.h"
     42#include "icm.h"
     43#include "fw.h"
     44
     45/*
     46 * We allocate in as big chunks as we can, up to a maximum of 256 KB
     47 * per chunk. Note that the chunks are not necessarily in contiguous
     48 * physical memory.
     49 */
     50enum {
     51	MLX4_ICM_ALLOC_SIZE	= 1 << 18,
     52	MLX4_TABLE_CHUNK_SIZE	= 1 << 18,
     53};
     54
     55static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
     56{
     57	int i;
     58
     59	if (chunk->nsg > 0)
     60		dma_unmap_sg(&dev->persist->pdev->dev, chunk->sg, chunk->npages,
     61			     DMA_BIDIRECTIONAL);
     62
     63	for (i = 0; i < chunk->npages; ++i)
     64		__free_pages(sg_page(&chunk->sg[i]),
     65			     get_order(chunk->sg[i].length));
     66}
     67
     68static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
     69{
     70	int i;
     71
     72	for (i = 0; i < chunk->npages; ++i)
     73		dma_free_coherent(&dev->persist->pdev->dev,
     74				  chunk->buf[i].size,
     75				  chunk->buf[i].addr,
     76				  chunk->buf[i].dma_addr);
     77}
     78
     79void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
     80{
     81	struct mlx4_icm_chunk *chunk, *tmp;
     82
     83	if (!icm)
     84		return;
     85
     86	list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
     87		if (coherent)
     88			mlx4_free_icm_coherent(dev, chunk);
     89		else
     90			mlx4_free_icm_pages(dev, chunk);
     91
     92		kfree(chunk);
     93	}
     94
     95	kfree(icm);
     96}
     97
     98static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
     99				gfp_t gfp_mask, int node)
    100{
    101	struct page *page;
    102
    103	page = alloc_pages_node(node, gfp_mask, order);
    104	if (!page) {
    105		page = alloc_pages(gfp_mask, order);
    106		if (!page)
    107			return -ENOMEM;
    108	}
    109
    110	sg_set_page(mem, page, PAGE_SIZE << order, 0);
    111	return 0;
    112}
    113
    114static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf,
    115				   int order, gfp_t gfp_mask)
    116{
    117	buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order,
    118				       &buf->dma_addr, gfp_mask);
    119	if (!buf->addr)
    120		return -ENOMEM;
    121
    122	if (offset_in_page(buf->addr)) {
    123		dma_free_coherent(dev, PAGE_SIZE << order, buf->addr,
    124				  buf->dma_addr);
    125		return -ENOMEM;
    126	}
    127
    128	buf->size = PAGE_SIZE << order;
    129	return 0;
    130}
    131
    132struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
    133				gfp_t gfp_mask, int coherent)
    134{
    135	struct mlx4_icm *icm;
    136	struct mlx4_icm_chunk *chunk = NULL;
    137	int cur_order;
    138	gfp_t mask;
    139	int ret;
    140
    141	/* We use sg_set_buf for coherent allocs, which assumes low memory */
    142	BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
    143
    144	icm = kmalloc_node(sizeof(*icm),
    145			   gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
    146			   dev->numa_node);
    147	if (!icm) {
    148		icm = kmalloc(sizeof(*icm),
    149			      gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
    150		if (!icm)
    151			return NULL;
    152	}
    153
    154	icm->refcount = 0;
    155	INIT_LIST_HEAD(&icm->chunk_list);
    156
    157	cur_order = get_order(MLX4_ICM_ALLOC_SIZE);
    158
    159	while (npages > 0) {
    160		if (!chunk) {
    161			chunk = kzalloc_node(sizeof(*chunk),
    162					     gfp_mask & ~(__GFP_HIGHMEM |
    163							  __GFP_NOWARN),
    164					     dev->numa_node);
    165			if (!chunk) {
    166				chunk = kzalloc(sizeof(*chunk),
    167						gfp_mask & ~(__GFP_HIGHMEM |
    168							     __GFP_NOWARN));
    169				if (!chunk)
    170					goto fail;
    171			}
    172			chunk->coherent = coherent;
    173
    174			if (!coherent)
    175				sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN);
    176			list_add_tail(&chunk->list, &icm->chunk_list);
    177		}
    178
    179		while (1 << cur_order > npages)
    180			--cur_order;
    181
    182		mask = gfp_mask;
    183		if (cur_order)
    184			mask &= ~__GFP_DIRECT_RECLAIM;
    185
    186		if (coherent)
    187			ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
    188						&chunk->buf[chunk->npages],
    189						cur_order, mask);
    190		else
    191			ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages],
    192						   cur_order, mask,
    193						   dev->numa_node);
    194
    195		if (ret) {
    196			if (--cur_order < 0)
    197				goto fail;
    198			else
    199				continue;
    200		}
    201
    202		++chunk->npages;
    203
    204		if (coherent)
    205			++chunk->nsg;
    206		else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
    207			chunk->nsg = dma_map_sg(&dev->persist->pdev->dev,
    208						chunk->sg, chunk->npages,
    209						DMA_BIDIRECTIONAL);
    210
    211			if (chunk->nsg <= 0)
    212				goto fail;
    213		}
    214
    215		if (chunk->npages == MLX4_ICM_CHUNK_LEN)
    216			chunk = NULL;
    217
    218		npages -= 1 << cur_order;
    219	}
    220
    221	if (!coherent && chunk) {
    222		chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, chunk->sg,
    223					chunk->npages, DMA_BIDIRECTIONAL);
    224
    225		if (chunk->nsg <= 0)
    226			goto fail;
    227	}
    228
    229	return icm;
    230
    231fail:
    232	mlx4_free_icm(dev, icm, coherent);
    233	return NULL;
    234}
    235
    236static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
    237{
    238	return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
    239}
    240
    241static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
    242{
    243	return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
    244			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
    245}
    246
    247int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
    248{
    249	return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, -1);
    250}
    251
    252int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
    253{
    254	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
    255			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
    256}
    257
    258int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
    259{
    260	u32 i = (obj & (table->num_obj - 1)) /
    261			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
    262	int ret = 0;
    263
    264	mutex_lock(&table->mutex);
    265
    266	if (table->icm[i]) {
    267		++table->icm[i]->refcount;
    268		goto out;
    269	}
    270
    271	table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
    272				       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
    273				       __GFP_NOWARN, table->coherent);
    274	if (!table->icm[i]) {
    275		ret = -ENOMEM;
    276		goto out;
    277	}
    278
    279	if (mlx4_MAP_ICM(dev, table->icm[i], table->virt +
    280			 (u64) i * MLX4_TABLE_CHUNK_SIZE)) {
    281		mlx4_free_icm(dev, table->icm[i], table->coherent);
    282		table->icm[i] = NULL;
    283		ret = -ENOMEM;
    284		goto out;
    285	}
    286
    287	++table->icm[i]->refcount;
    288
    289out:
    290	mutex_unlock(&table->mutex);
    291	return ret;
    292}
    293
    294void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
    295{
    296	u32 i;
    297	u64 offset;
    298
    299	i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
    300
    301	mutex_lock(&table->mutex);
    302
    303	if (--table->icm[i]->refcount == 0) {
    304		offset = (u64) i * MLX4_TABLE_CHUNK_SIZE;
    305		mlx4_UNMAP_ICM(dev, table->virt + offset,
    306			       MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
    307		mlx4_free_icm(dev, table->icm[i], table->coherent);
    308		table->icm[i] = NULL;
    309	}
    310
    311	mutex_unlock(&table->mutex);
    312}
    313
    314void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
    315			dma_addr_t *dma_handle)
    316{
    317	int offset, dma_offset, i;
    318	u64 idx;
    319	struct mlx4_icm_chunk *chunk;
    320	struct mlx4_icm *icm;
    321	void *addr = NULL;
    322
    323	if (!table->lowmem)
    324		return NULL;
    325
    326	mutex_lock(&table->mutex);
    327
    328	idx = (u64) (obj & (table->num_obj - 1)) * table->obj_size;
    329	icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
    330	dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
    331
    332	if (!icm)
    333		goto out;
    334
    335	list_for_each_entry(chunk, &icm->chunk_list, list) {
    336		for (i = 0; i < chunk->npages; ++i) {
    337			dma_addr_t dma_addr;
    338			size_t len;
    339
    340			if (table->coherent) {
    341				len = chunk->buf[i].size;
    342				dma_addr = chunk->buf[i].dma_addr;
    343				addr = chunk->buf[i].addr;
    344			} else {
    345				struct page *page;
    346
    347				len = sg_dma_len(&chunk->sg[i]);
    348				dma_addr = sg_dma_address(&chunk->sg[i]);
    349
    350				/* XXX: we should never do this for highmem
    351				 * allocation.  This function either needs
    352				 * to be split, or the kernel virtual address
    353				 * return needs to be made optional.
    354				 */
    355				page = sg_page(&chunk->sg[i]);
    356				addr = lowmem_page_address(page);
    357			}
    358
    359			if (dma_handle && dma_offset >= 0) {
    360				if (len > dma_offset)
    361					*dma_handle = dma_addr + dma_offset;
    362				dma_offset -= len;
    363			}
    364
    365			/*
    366			 * DMA mapping can merge pages but not split them,
    367			 * so if we found the page, dma_handle has already
    368			 * been assigned to.
    369			 */
    370			if (len > offset)
    371				goto out;
    372			offset -= len;
    373		}
    374	}
    375
    376	addr = NULL;
    377out:
    378	mutex_unlock(&table->mutex);
    379	return addr ? addr + offset : NULL;
    380}
    381
    382int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
    383			 u32 start, u32 end)
    384{
    385	int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
    386	int err;
    387	u32 i;
    388
    389	for (i = start; i <= end; i += inc) {
    390		err = mlx4_table_get(dev, table, i);
    391		if (err)
    392			goto fail;
    393	}
    394
    395	return 0;
    396
    397fail:
    398	while (i > start) {
    399		i -= inc;
    400		mlx4_table_put(dev, table, i);
    401	}
    402
    403	return err;
    404}
    405
    406void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
    407			  u32 start, u32 end)
    408{
    409	u32 i;
    410
    411	for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
    412		mlx4_table_put(dev, table, i);
    413}
    414
    415int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
    416			u64 virt, int obj_size,	u32 nobj, int reserved,
    417			int use_lowmem, int use_coherent)
    418{
    419	int obj_per_chunk;
    420	int num_icm;
    421	unsigned chunk_size;
    422	int i;
    423	u64 size;
    424
    425	obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
    426	if (WARN_ON(!obj_per_chunk))
    427		return -EINVAL;
    428	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
    429
    430	table->icm      = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
    431	if (!table->icm)
    432		return -ENOMEM;
    433	table->virt     = virt;
    434	table->num_icm  = num_icm;
    435	table->num_obj  = nobj;
    436	table->obj_size = obj_size;
    437	table->lowmem   = use_lowmem;
    438	table->coherent = use_coherent;
    439	mutex_init(&table->mutex);
    440
    441	size = (u64) nobj * obj_size;
    442	for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
    443		chunk_size = MLX4_TABLE_CHUNK_SIZE;
    444		if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size)
    445			chunk_size = PAGE_ALIGN(size -
    446					i * MLX4_TABLE_CHUNK_SIZE);
    447
    448		table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
    449					       (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
    450					       __GFP_NOWARN, use_coherent);
    451		if (!table->icm[i])
    452			goto err;
    453		if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) {
    454			mlx4_free_icm(dev, table->icm[i], use_coherent);
    455			table->icm[i] = NULL;
    456			goto err;
    457		}
    458
    459		/*
    460		 * Add a reference to this ICM chunk so that it never
    461		 * gets freed (since it contains reserved firmware objects).
    462		 */
    463		++table->icm[i]->refcount;
    464	}
    465
    466	return 0;
    467
    468err:
    469	for (i = 0; i < num_icm; ++i)
    470		if (table->icm[i]) {
    471			mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
    472				       MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
    473			mlx4_free_icm(dev, table->icm[i], use_coherent);
    474		}
    475
    476	kvfree(table->icm);
    477
    478	return -ENOMEM;
    479}
    480
    481void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
    482{
    483	int i;
    484
    485	for (i = 0; i < table->num_icm; ++i)
    486		if (table->icm[i]) {
    487			mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
    488				       MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
    489			mlx4_free_icm(dev, table->icm[i], table->coherent);
    490		}
    491
    492	kvfree(table->icm);
    493}