cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vio.c (49501B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * IBM PowerPC Virtual I/O Infrastructure Support.
      4 *
      5 *    Copyright (c) 2003,2008 IBM Corp.
      6 *     Dave Engebretsen engebret@us.ibm.com
      7 *     Santiago Leon santil@us.ibm.com
      8 *     Hollis Blanchard <hollisb@us.ibm.com>
      9 *     Stephen Rothwell
     10 *     Robert Jennings <rcjenn@us.ibm.com>
     11 */
     12
     13#include <linux/cpu.h>
     14#include <linux/types.h>
     15#include <linux/delay.h>
     16#include <linux/stat.h>
     17#include <linux/device.h>
     18#include <linux/init.h>
     19#include <linux/slab.h>
     20#include <linux/console.h>
     21#include <linux/export.h>
     22#include <linux/mm.h>
     23#include <linux/dma-map-ops.h>
     24#include <linux/kobject.h>
     25#include <linux/kexec.h>
     26#include <linux/of_irq.h>
     27
     28#include <asm/iommu.h>
     29#include <asm/dma.h>
     30#include <asm/vio.h>
     31#include <asm/prom.h>
     32#include <asm/firmware.h>
     33#include <asm/tce.h>
     34#include <asm/page.h>
     35#include <asm/hvcall.h>
     36#include <asm/machdep.h>
     37
     38static struct vio_dev vio_bus_device  = { /* fake "parent" device */
     39	.name = "vio",
     40	.type = "",
     41	.dev.init_name = "vio",
     42	.dev.bus = &vio_bus_type,
     43};
     44
     45#ifdef CONFIG_PPC_SMLPAR
     46/**
     47 * vio_cmo_pool - A pool of IO memory for CMO use
     48 *
     49 * @size: The size of the pool in bytes
     50 * @free: The amount of free memory in the pool
     51 */
     52struct vio_cmo_pool {
     53	size_t size;
     54	size_t free;
     55};
     56
     57/* How many ms to delay queued balance work */
     58#define VIO_CMO_BALANCE_DELAY 100
     59
     60/* Portion out IO memory to CMO devices by this chunk size */
     61#define VIO_CMO_BALANCE_CHUNK 131072
     62
     63/**
     64 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
     65 *
     66 * @vio_dev: struct vio_dev pointer
     67 * @list: pointer to other devices on bus that are being tracked
     68 */
     69struct vio_cmo_dev_entry {
     70	struct vio_dev *viodev;
     71	struct list_head list;
     72};
     73
     74/**
     75 * vio_cmo - VIO bus accounting structure for CMO entitlement
     76 *
     77 * @lock: spinlock for entire structure
     78 * @balance_q: work queue for balancing system entitlement
     79 * @device_list: list of CMO-enabled devices requiring entitlement
     80 * @entitled: total system entitlement in bytes
     81 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
     82 * @excess: pool of excess entitlement not needed for device reserves or spare
     83 * @spare: IO memory for device hotplug functionality
     84 * @min: minimum necessary for system operation
     85 * @desired: desired memory for system operation
     86 * @curr: bytes currently allocated
     87 * @high: high water mark for IO data usage
     88 */
     89static struct vio_cmo {
     90	spinlock_t lock;
     91	struct delayed_work balance_q;
     92	struct list_head device_list;
     93	size_t entitled;
     94	struct vio_cmo_pool reserve;
     95	struct vio_cmo_pool excess;
     96	size_t spare;
     97	size_t min;
     98	size_t desired;
     99	size_t curr;
    100	size_t high;
    101} vio_cmo;
    102
    103/**
    104 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
    105 */
    106static int vio_cmo_num_OF_devs(void)
    107{
    108	struct device_node *node_vroot;
    109	int count = 0;
    110
    111	/*
    112	 * Count the number of vdevice entries with an
    113	 * ibm,my-dma-window OF property
    114	 */
    115	node_vroot = of_find_node_by_name(NULL, "vdevice");
    116	if (node_vroot) {
    117		struct device_node *of_node;
    118		struct property *prop;
    119
    120		for_each_child_of_node(node_vroot, of_node) {
    121			prop = of_find_property(of_node, "ibm,my-dma-window",
    122			                       NULL);
    123			if (prop)
    124				count++;
    125		}
    126	}
    127	of_node_put(node_vroot);
    128	return count;
    129}
    130
    131/**
    132 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
    133 *
    134 * @viodev: VIO device requesting IO memory
    135 * @size: size of allocation requested
    136 *
    137 * Allocations come from memory reserved for the devices and any excess
    138 * IO memory available to all devices.  The spare pool used to service
    139 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
    140 * made available.
    141 *
    142 * Return codes:
    143 *  0 for successful allocation and -ENOMEM for a failure
    144 */
    145static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
    146{
    147	unsigned long flags;
    148	size_t reserve_free = 0;
    149	size_t excess_free = 0;
    150	int ret = -ENOMEM;
    151
    152	spin_lock_irqsave(&vio_cmo.lock, flags);
    153
    154	/* Determine the amount of free entitlement available in reserve */
    155	if (viodev->cmo.entitled > viodev->cmo.allocated)
    156		reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
    157
    158	/* If spare is not fulfilled, the excess pool can not be used. */
    159	if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
    160		excess_free = vio_cmo.excess.free;
    161
    162	/* The request can be satisfied */
    163	if ((reserve_free + excess_free) >= size) {
    164		vio_cmo.curr += size;
    165		if (vio_cmo.curr > vio_cmo.high)
    166			vio_cmo.high = vio_cmo.curr;
    167		viodev->cmo.allocated += size;
    168		size -= min(reserve_free, size);
    169		vio_cmo.excess.free -= size;
    170		ret = 0;
    171	}
    172
    173	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    174	return ret;
    175}
    176
    177/**
    178 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
    179 * @viodev: VIO device freeing IO memory
    180 * @size: size of deallocation
    181 *
    182 * IO memory is freed by the device back to the correct memory pools.
    183 * The spare pool is replenished first from either memory pool, then
    184 * the reserve pool is used to reduce device entitlement, the excess
    185 * pool is used to increase the reserve pool toward the desired entitlement
    186 * target, and then the remaining memory is returned to the pools.
    187 *
    188 */
    189static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
    190{
    191	unsigned long flags;
    192	size_t spare_needed = 0;
    193	size_t excess_freed = 0;
    194	size_t reserve_freed = size;
    195	size_t tmp;
    196	int balance = 0;
    197
    198	spin_lock_irqsave(&vio_cmo.lock, flags);
    199	vio_cmo.curr -= size;
    200
    201	/* Amount of memory freed from the excess pool */
    202	if (viodev->cmo.allocated > viodev->cmo.entitled) {
    203		excess_freed = min(reserve_freed, (viodev->cmo.allocated -
    204		                                   viodev->cmo.entitled));
    205		reserve_freed -= excess_freed;
    206	}
    207
    208	/* Remove allocation from device */
    209	viodev->cmo.allocated -= (reserve_freed + excess_freed);
    210
    211	/* Spare is a subset of the reserve pool, replenish it first. */
    212	spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
    213
    214	/*
    215	 * Replenish the spare in the reserve pool from the excess pool.
    216	 * This moves entitlement into the reserve pool.
    217	 */
    218	if (spare_needed && excess_freed) {
    219		tmp = min(excess_freed, spare_needed);
    220		vio_cmo.excess.size -= tmp;
    221		vio_cmo.reserve.size += tmp;
    222		vio_cmo.spare += tmp;
    223		excess_freed -= tmp;
    224		spare_needed -= tmp;
    225		balance = 1;
    226	}
    227
    228	/*
    229	 * Replenish the spare in the reserve pool from the reserve pool.
    230	 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
    231	 * if needed, and gives it to the spare pool. The amount of used
    232	 * memory in this pool does not change.
    233	 */
    234	if (spare_needed && reserve_freed) {
    235		tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
    236
    237		vio_cmo.spare += tmp;
    238		viodev->cmo.entitled -= tmp;
    239		reserve_freed -= tmp;
    240		spare_needed -= tmp;
    241		balance = 1;
    242	}
    243
    244	/*
    245	 * Increase the reserve pool until the desired allocation is met.
    246	 * Move an allocation freed from the excess pool into the reserve
    247	 * pool and schedule a balance operation.
    248	 */
    249	if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
    250		tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
    251
    252		vio_cmo.excess.size -= tmp;
    253		vio_cmo.reserve.size += tmp;
    254		excess_freed -= tmp;
    255		balance = 1;
    256	}
    257
    258	/* Return memory from the excess pool to that pool */
    259	if (excess_freed)
    260		vio_cmo.excess.free += excess_freed;
    261
    262	if (balance)
    263		schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
    264	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    265}
    266
    267/**
    268 * vio_cmo_entitlement_update - Manage system entitlement changes
    269 *
    270 * @new_entitlement: new system entitlement to attempt to accommodate
    271 *
    272 * Increases in entitlement will be used to fulfill the spare entitlement
    273 * and the rest is given to the excess pool.  Decreases, if they are
    274 * possible, come from the excess pool and from unused device entitlement
    275 *
    276 * Returns: 0 on success, -ENOMEM when change can not be made
    277 */
    278int vio_cmo_entitlement_update(size_t new_entitlement)
    279{
    280	struct vio_dev *viodev;
    281	struct vio_cmo_dev_entry *dev_ent;
    282	unsigned long flags;
    283	size_t avail, delta, tmp;
    284
    285	spin_lock_irqsave(&vio_cmo.lock, flags);
    286
    287	/* Entitlement increases */
    288	if (new_entitlement > vio_cmo.entitled) {
    289		delta = new_entitlement - vio_cmo.entitled;
    290
    291		/* Fulfill spare allocation */
    292		if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
    293			tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
    294			vio_cmo.spare += tmp;
    295			vio_cmo.reserve.size += tmp;
    296			delta -= tmp;
    297		}
    298
    299		/* Remaining new allocation goes to the excess pool */
    300		vio_cmo.entitled += delta;
    301		vio_cmo.excess.size += delta;
    302		vio_cmo.excess.free += delta;
    303
    304		goto out;
    305	}
    306
    307	/* Entitlement decreases */
    308	delta = vio_cmo.entitled - new_entitlement;
    309	avail = vio_cmo.excess.free;
    310
    311	/*
    312	 * Need to check how much unused entitlement each device can
    313	 * sacrifice to fulfill entitlement change.
    314	 */
    315	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
    316		if (avail >= delta)
    317			break;
    318
    319		viodev = dev_ent->viodev;
    320		if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
    321		    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
    322				avail += viodev->cmo.entitled -
    323				         max_t(size_t, viodev->cmo.allocated,
    324				               VIO_CMO_MIN_ENT);
    325	}
    326
    327	if (delta <= avail) {
    328		vio_cmo.entitled -= delta;
    329
    330		/* Take entitlement from the excess pool first */
    331		tmp = min(vio_cmo.excess.free, delta);
    332		vio_cmo.excess.size -= tmp;
    333		vio_cmo.excess.free -= tmp;
    334		delta -= tmp;
    335
    336		/*
    337		 * Remove all but VIO_CMO_MIN_ENT bytes from devices
    338		 * until entitlement change is served
    339		 */
    340		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
    341			if (!delta)
    342				break;
    343
    344			viodev = dev_ent->viodev;
    345			tmp = 0;
    346			if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
    347			    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
    348				tmp = viodev->cmo.entitled -
    349				      max_t(size_t, viodev->cmo.allocated,
    350				            VIO_CMO_MIN_ENT);
    351			viodev->cmo.entitled -= min(tmp, delta);
    352			delta -= min(tmp, delta);
    353		}
    354	} else {
    355		spin_unlock_irqrestore(&vio_cmo.lock, flags);
    356		return -ENOMEM;
    357	}
    358
    359out:
    360	schedule_delayed_work(&vio_cmo.balance_q, 0);
    361	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    362	return 0;
    363}
    364
    365/**
    366 * vio_cmo_balance - Balance entitlement among devices
    367 *
    368 * @work: work queue structure for this operation
    369 *
    370 * Any system entitlement above the minimum needed for devices, or
    371 * already allocated to devices, can be distributed to the devices.
    372 * The list of devices is iterated through to recalculate the desired
    373 * entitlement level and to determine how much entitlement above the
    374 * minimum entitlement is allocated to devices.
    375 *
    376 * Small chunks of the available entitlement are given to devices until
    377 * their requirements are fulfilled or there is no entitlement left to give.
    378 * Upon completion sizes of the reserve and excess pools are calculated.
    379 *
    380 * The system minimum entitlement level is also recalculated here.
    381 * Entitlement will be reserved for devices even after vio_bus_remove to
    382 * accommodate reloading the driver.  The OF tree is walked to count the
    383 * number of devices present and this will remove entitlement for devices
    384 * that have actually left the system after having vio_bus_remove called.
    385 */
    386static void vio_cmo_balance(struct work_struct *work)
    387{
    388	struct vio_cmo *cmo;
    389	struct vio_dev *viodev;
    390	struct vio_cmo_dev_entry *dev_ent;
    391	unsigned long flags;
    392	size_t avail = 0, level, chunk, need;
    393	int devcount = 0, fulfilled;
    394
    395	cmo = container_of(work, struct vio_cmo, balance_q.work);
    396
    397	spin_lock_irqsave(&vio_cmo.lock, flags);
    398
    399	/* Calculate minimum entitlement and fulfill spare */
    400	cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
    401	BUG_ON(cmo->min > cmo->entitled);
    402	cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
    403	cmo->min += cmo->spare;
    404	cmo->desired = cmo->min;
    405
    406	/*
    407	 * Determine how much entitlement is available and reset device
    408	 * entitlements
    409	 */
    410	avail = cmo->entitled - cmo->spare;
    411	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
    412		viodev = dev_ent->viodev;
    413		devcount++;
    414		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
    415		cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
    416		avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
    417	}
    418
    419	/*
    420	 * Having provided each device with the minimum entitlement, loop
    421	 * over the devices portioning out the remaining entitlement
    422	 * until there is nothing left.
    423	 */
    424	level = VIO_CMO_MIN_ENT;
    425	while (avail) {
    426		fulfilled = 0;
    427		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
    428			viodev = dev_ent->viodev;
    429
    430			if (viodev->cmo.desired <= level) {
    431				fulfilled++;
    432				continue;
    433			}
    434
    435			/*
    436			 * Give the device up to VIO_CMO_BALANCE_CHUNK
    437			 * bytes of entitlement, but do not exceed the
    438			 * desired level of entitlement for the device.
    439			 */
    440			chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
    441			chunk = min(chunk, (viodev->cmo.desired -
    442			                    viodev->cmo.entitled));
    443			viodev->cmo.entitled += chunk;
    444
    445			/*
    446			 * If the memory for this entitlement increase was
    447			 * already allocated to the device it does not come
    448			 * from the available pool being portioned out.
    449			 */
    450			need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
    451			       max(viodev->cmo.allocated, level);
    452			avail -= need;
    453
    454		}
    455		if (fulfilled == devcount)
    456			break;
    457		level += VIO_CMO_BALANCE_CHUNK;
    458	}
    459
    460	/* Calculate new reserve and excess pool sizes */
    461	cmo->reserve.size = cmo->min;
    462	cmo->excess.free = 0;
    463	cmo->excess.size = 0;
    464	need = 0;
    465	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
    466		viodev = dev_ent->viodev;
    467		/* Calculated reserve size above the minimum entitlement */
    468		if (viodev->cmo.entitled)
    469			cmo->reserve.size += (viodev->cmo.entitled -
    470			                      VIO_CMO_MIN_ENT);
    471		/* Calculated used excess entitlement */
    472		if (viodev->cmo.allocated > viodev->cmo.entitled)
    473			need += viodev->cmo.allocated - viodev->cmo.entitled;
    474	}
    475	cmo->excess.size = cmo->entitled - cmo->reserve.size;
    476	cmo->excess.free = cmo->excess.size - need;
    477
    478	cancel_delayed_work(to_delayed_work(work));
    479	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    480}
    481
    482static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
    483					  dma_addr_t *dma_handle, gfp_t flag,
    484					  unsigned long attrs)
    485{
    486	struct vio_dev *viodev = to_vio_dev(dev);
    487	void *ret;
    488
    489	if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
    490		atomic_inc(&viodev->cmo.allocs_failed);
    491		return NULL;
    492	}
    493
    494	ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
    495				    dma_handle, dev->coherent_dma_mask, flag,
    496				    dev_to_node(dev));
    497	if (unlikely(ret == NULL)) {
    498		vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
    499		atomic_inc(&viodev->cmo.allocs_failed);
    500	}
    501
    502	return ret;
    503}
    504
    505static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
    506					void *vaddr, dma_addr_t dma_handle,
    507					unsigned long attrs)
    508{
    509	struct vio_dev *viodev = to_vio_dev(dev);
    510
    511	iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
    512	vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
    513}
    514
    515static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
    516                                         unsigned long offset, size_t size,
    517                                         enum dma_data_direction direction,
    518                                         unsigned long attrs)
    519{
    520	struct vio_dev *viodev = to_vio_dev(dev);
    521	struct iommu_table *tbl = get_iommu_table_base(dev);
    522	dma_addr_t ret = DMA_MAPPING_ERROR;
    523
    524	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
    525		goto out_fail;
    526	ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev),
    527			direction, attrs);
    528	if (unlikely(ret == DMA_MAPPING_ERROR))
    529		goto out_deallocate;
    530	return ret;
    531
    532out_deallocate:
    533	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
    534out_fail:
    535	atomic_inc(&viodev->cmo.allocs_failed);
    536	return DMA_MAPPING_ERROR;
    537}
    538
    539static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
    540				     size_t size,
    541				     enum dma_data_direction direction,
    542				     unsigned long attrs)
    543{
    544	struct vio_dev *viodev = to_vio_dev(dev);
    545	struct iommu_table *tbl = get_iommu_table_base(dev);
    546
    547	iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
    548	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
    549}
    550
    551static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
    552                                int nelems, enum dma_data_direction direction,
    553                                unsigned long attrs)
    554{
    555	struct vio_dev *viodev = to_vio_dev(dev);
    556	struct iommu_table *tbl = get_iommu_table_base(dev);
    557	struct scatterlist *sgl;
    558	int ret, count;
    559	size_t alloc_size = 0;
    560
    561	for_each_sg(sglist, sgl, nelems, count)
    562		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
    563
    564	ret = vio_cmo_alloc(viodev, alloc_size);
    565	if (ret)
    566		goto out_fail;
    567	ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
    568			direction, attrs);
    569	if (unlikely(!ret))
    570		goto out_deallocate;
    571
    572	for_each_sg(sglist, sgl, ret, count)
    573		alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
    574	if (alloc_size)
    575		vio_cmo_dealloc(viodev, alloc_size);
    576	return ret;
    577
    578out_deallocate:
    579	vio_cmo_dealloc(viodev, alloc_size);
    580out_fail:
    581	atomic_inc(&viodev->cmo.allocs_failed);
    582	return ret;
    583}
    584
    585static void vio_dma_iommu_unmap_sg(struct device *dev,
    586		struct scatterlist *sglist, int nelems,
    587		enum dma_data_direction direction,
    588		unsigned long attrs)
    589{
    590	struct vio_dev *viodev = to_vio_dev(dev);
    591	struct iommu_table *tbl = get_iommu_table_base(dev);
    592	struct scatterlist *sgl;
    593	size_t alloc_size = 0;
    594	int count;
    595
    596	for_each_sg(sglist, sgl, nelems, count)
    597		alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
    598
    599	ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
    600	vio_cmo_dealloc(viodev, alloc_size);
    601}
    602
    603static const struct dma_map_ops vio_dma_mapping_ops = {
    604	.alloc             = vio_dma_iommu_alloc_coherent,
    605	.free              = vio_dma_iommu_free_coherent,
    606	.map_sg            = vio_dma_iommu_map_sg,
    607	.unmap_sg          = vio_dma_iommu_unmap_sg,
    608	.map_page          = vio_dma_iommu_map_page,
    609	.unmap_page        = vio_dma_iommu_unmap_page,
    610	.dma_supported     = dma_iommu_dma_supported,
    611	.get_required_mask = dma_iommu_get_required_mask,
    612	.mmap		   = dma_common_mmap,
    613	.get_sgtable	   = dma_common_get_sgtable,
    614	.alloc_pages	   = dma_common_alloc_pages,
    615	.free_pages	   = dma_common_free_pages,
    616};
    617
    618/**
    619 * vio_cmo_set_dev_desired - Set desired entitlement for a device
    620 *
    621 * @viodev: struct vio_dev for device to alter
    622 * @desired: new desired entitlement level in bytes
    623 *
    624 * For use by devices to request a change to their entitlement at runtime or
    625 * through sysfs.  The desired entitlement level is changed and a balancing
    626 * of system resources is scheduled to run in the future.
    627 */
    628void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
    629{
    630	unsigned long flags;
    631	struct vio_cmo_dev_entry *dev_ent;
    632	int found = 0;
    633
    634	if (!firmware_has_feature(FW_FEATURE_CMO))
    635		return;
    636
    637	spin_lock_irqsave(&vio_cmo.lock, flags);
    638	if (desired < VIO_CMO_MIN_ENT)
    639		desired = VIO_CMO_MIN_ENT;
    640
    641	/*
    642	 * Changes will not be made for devices not in the device list.
    643	 * If it is not in the device list, then no driver is loaded
    644	 * for the device and it can not receive entitlement.
    645	 */
    646	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
    647		if (viodev == dev_ent->viodev) {
    648			found = 1;
    649			break;
    650		}
    651	if (!found) {
    652		spin_unlock_irqrestore(&vio_cmo.lock, flags);
    653		return;
    654	}
    655
    656	/* Increase/decrease in desired device entitlement */
    657	if (desired >= viodev->cmo.desired) {
    658		/* Just bump the bus and device values prior to a balance*/
    659		vio_cmo.desired += desired - viodev->cmo.desired;
    660		viodev->cmo.desired = desired;
    661	} else {
    662		/* Decrease bus and device values for desired entitlement */
    663		vio_cmo.desired -= viodev->cmo.desired - desired;
    664		viodev->cmo.desired = desired;
    665		/*
    666		 * If less entitlement is desired than current entitlement, move
    667		 * any reserve memory in the change region to the excess pool.
    668		 */
    669		if (viodev->cmo.entitled > desired) {
    670			vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
    671			vio_cmo.excess.size += viodev->cmo.entitled - desired;
    672			/*
    673			 * If entitlement moving from the reserve pool to the
    674			 * excess pool is currently unused, add to the excess
    675			 * free counter.
    676			 */
    677			if (viodev->cmo.allocated < viodev->cmo.entitled)
    678				vio_cmo.excess.free += viodev->cmo.entitled -
    679				                       max(viodev->cmo.allocated, desired);
    680			viodev->cmo.entitled = desired;
    681		}
    682	}
    683	schedule_delayed_work(&vio_cmo.balance_q, 0);
    684	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    685}
    686
    687/**
    688 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
    689 *
    690 * @viodev - Pointer to struct vio_dev for device
    691 *
    692 * Determine the devices IO memory entitlement needs, attempting
    693 * to satisfy the system minimum entitlement at first and scheduling
    694 * a balance operation to take care of the rest at a later time.
    695 *
    696 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
    697 *          -ENOMEM when entitlement is not available for device or
    698 *          device entry.
    699 *
    700 */
    701static int vio_cmo_bus_probe(struct vio_dev *viodev)
    702{
    703	struct vio_cmo_dev_entry *dev_ent;
    704	struct device *dev = &viodev->dev;
    705	struct iommu_table *tbl;
    706	struct vio_driver *viodrv = to_vio_driver(dev->driver);
    707	unsigned long flags;
    708	size_t size;
    709	bool dma_capable = false;
    710
    711	tbl = get_iommu_table_base(dev);
    712
    713	/* A device requires entitlement if it has a DMA window property */
    714	switch (viodev->family) {
    715	case VDEVICE:
    716		if (of_get_property(viodev->dev.of_node,
    717					"ibm,my-dma-window", NULL))
    718			dma_capable = true;
    719		break;
    720	case PFO:
    721		dma_capable = false;
    722		break;
    723	default:
    724		dev_warn(dev, "unknown device family: %d\n", viodev->family);
    725		BUG();
    726		break;
    727	}
    728
    729	/* Configure entitlement for the device. */
    730	if (dma_capable) {
    731		/* Check that the driver is CMO enabled and get desired DMA */
    732		if (!viodrv->get_desired_dma) {
    733			dev_err(dev, "%s: device driver does not support CMO\n",
    734			        __func__);
    735			return -EINVAL;
    736		}
    737
    738		viodev->cmo.desired =
    739			IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
    740		if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
    741			viodev->cmo.desired = VIO_CMO_MIN_ENT;
    742		size = VIO_CMO_MIN_ENT;
    743
    744		dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
    745		                  GFP_KERNEL);
    746		if (!dev_ent)
    747			return -ENOMEM;
    748
    749		dev_ent->viodev = viodev;
    750		spin_lock_irqsave(&vio_cmo.lock, flags);
    751		list_add(&dev_ent->list, &vio_cmo.device_list);
    752	} else {
    753		viodev->cmo.desired = 0;
    754		size = 0;
    755		spin_lock_irqsave(&vio_cmo.lock, flags);
    756	}
    757
    758	/*
    759	 * If the needs for vio_cmo.min have not changed since they
    760	 * were last set, the number of devices in the OF tree has
    761	 * been constant and the IO memory for this is already in
    762	 * the reserve pool.
    763	 */
    764	if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
    765	                    VIO_CMO_MIN_ENT)) {
    766		/* Updated desired entitlement if device requires it */
    767		if (size)
    768			vio_cmo.desired += (viodev->cmo.desired -
    769		                        VIO_CMO_MIN_ENT);
    770	} else {
    771		size_t tmp;
    772
    773		tmp = vio_cmo.spare + vio_cmo.excess.free;
    774		if (tmp < size) {
    775			dev_err(dev, "%s: insufficient free "
    776			        "entitlement to add device. "
    777			        "Need %lu, have %lu\n", __func__,
    778				size, (vio_cmo.spare + tmp));
    779			spin_unlock_irqrestore(&vio_cmo.lock, flags);
    780			return -ENOMEM;
    781		}
    782
    783		/* Use excess pool first to fulfill request */
    784		tmp = min(size, vio_cmo.excess.free);
    785		vio_cmo.excess.free -= tmp;
    786		vio_cmo.excess.size -= tmp;
    787		vio_cmo.reserve.size += tmp;
    788
    789		/* Use spare if excess pool was insufficient */
    790		vio_cmo.spare -= size - tmp;
    791
    792		/* Update bus accounting */
    793		vio_cmo.min += size;
    794		vio_cmo.desired += viodev->cmo.desired;
    795	}
    796	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    797	return 0;
    798}
    799
    800/**
    801 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
    802 *
    803 * @viodev - Pointer to struct vio_dev for device
    804 *
    805 * Remove the device from the cmo device list.  The minimum entitlement
    806 * will be reserved for the device as long as it is in the system.  The
    807 * rest of the entitlement the device had been allocated will be returned
    808 * to the system.
    809 */
    810static void vio_cmo_bus_remove(struct vio_dev *viodev)
    811{
    812	struct vio_cmo_dev_entry *dev_ent;
    813	unsigned long flags;
    814	size_t tmp;
    815
    816	spin_lock_irqsave(&vio_cmo.lock, flags);
    817	if (viodev->cmo.allocated) {
    818		dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
    819		        "allocated after remove operation.\n",
    820		        __func__, viodev->cmo.allocated);
    821		BUG();
    822	}
    823
    824	/*
    825	 * Remove the device from the device list being maintained for
    826	 * CMO enabled devices.
    827	 */
    828	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
    829		if (viodev == dev_ent->viodev) {
    830			list_del(&dev_ent->list);
    831			kfree(dev_ent);
    832			break;
    833		}
    834
    835	/*
    836	 * Devices may not require any entitlement and they do not need
    837	 * to be processed.  Otherwise, return the device's entitlement
    838	 * back to the pools.
    839	 */
    840	if (viodev->cmo.entitled) {
    841		/*
    842		 * This device has not yet left the OF tree, it's
    843		 * minimum entitlement remains in vio_cmo.min and
    844		 * vio_cmo.desired
    845		 */
    846		vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
    847
    848		/*
    849		 * Save min allocation for device in reserve as long
    850		 * as it exists in OF tree as determined by later
    851		 * balance operation
    852		 */
    853		viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
    854
    855		/* Replenish spare from freed reserve pool */
    856		if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
    857			tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
    858			                                 vio_cmo.spare));
    859			vio_cmo.spare += tmp;
    860			viodev->cmo.entitled -= tmp;
    861		}
    862
    863		/* Remaining reserve goes to excess pool */
    864		vio_cmo.excess.size += viodev->cmo.entitled;
    865		vio_cmo.excess.free += viodev->cmo.entitled;
    866		vio_cmo.reserve.size -= viodev->cmo.entitled;
    867
    868		/*
    869		 * Until the device is removed it will keep a
    870		 * minimum entitlement; this will guarantee that
    871		 * a module unload/load will result in a success.
    872		 */
    873		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
    874		viodev->cmo.desired = VIO_CMO_MIN_ENT;
    875		atomic_set(&viodev->cmo.allocs_failed, 0);
    876	}
    877
    878	spin_unlock_irqrestore(&vio_cmo.lock, flags);
    879}
    880
    881static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
    882{
    883	set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
    884}
    885
    886/**
    887 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
    888 *
    889 * Set up the reserve and excess entitlement pools based on available
    890 * system entitlement and the number of devices in the OF tree that
    891 * require entitlement in the reserve pool.
    892 */
    893static void vio_cmo_bus_init(void)
    894{
    895	struct hvcall_mpp_data mpp_data;
    896	int err;
    897
    898	memset(&vio_cmo, 0, sizeof(struct vio_cmo));
    899	spin_lock_init(&vio_cmo.lock);
    900	INIT_LIST_HEAD(&vio_cmo.device_list);
    901	INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
    902
    903	/* Get current system entitlement */
    904	err = h_get_mpp(&mpp_data);
    905
    906	/*
    907	 * On failure, continue with entitlement set to 0, will panic()
    908	 * later when spare is reserved.
    909	 */
    910	if (err != H_SUCCESS) {
    911		printk(KERN_ERR "%s: unable to determine system IO "\
    912		       "entitlement. (%d)\n", __func__, err);
    913		vio_cmo.entitled = 0;
    914	} else {
    915		vio_cmo.entitled = mpp_data.entitled_mem;
    916	}
    917
    918	/* Set reservation and check against entitlement */
    919	vio_cmo.spare = VIO_CMO_MIN_ENT;
    920	vio_cmo.reserve.size = vio_cmo.spare;
    921	vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
    922	                         VIO_CMO_MIN_ENT);
    923	if (vio_cmo.reserve.size > vio_cmo.entitled) {
    924		printk(KERN_ERR "%s: insufficient system entitlement\n",
    925		       __func__);
    926		panic("%s: Insufficient system entitlement", __func__);
    927	}
    928
    929	/* Set the remaining accounting variables */
    930	vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
    931	vio_cmo.excess.free = vio_cmo.excess.size;
    932	vio_cmo.min = vio_cmo.reserve.size;
    933	vio_cmo.desired = vio_cmo.reserve.size;
    934}
    935
    936/* sysfs device functions and data structures for CMO */
    937
    938#define viodev_cmo_rd_attr(name)                                        \
    939static ssize_t cmo_##name##_show(struct device *dev,                    \
    940                                        struct device_attribute *attr,  \
    941                                         char *buf)                     \
    942{                                                                       \
    943	return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
    944}
    945
    946static ssize_t cmo_allocs_failed_show(struct device *dev,
    947		struct device_attribute *attr, char *buf)
    948{
    949	struct vio_dev *viodev = to_vio_dev(dev);
    950	return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
    951}
    952
    953static ssize_t cmo_allocs_failed_store(struct device *dev,
    954		struct device_attribute *attr, const char *buf, size_t count)
    955{
    956	struct vio_dev *viodev = to_vio_dev(dev);
    957	atomic_set(&viodev->cmo.allocs_failed, 0);
    958	return count;
    959}
    960
    961static ssize_t cmo_desired_store(struct device *dev,
    962		struct device_attribute *attr, const char *buf, size_t count)
    963{
    964	struct vio_dev *viodev = to_vio_dev(dev);
    965	size_t new_desired;
    966	int ret;
    967
    968	ret = kstrtoul(buf, 10, &new_desired);
    969	if (ret)
    970		return ret;
    971
    972	vio_cmo_set_dev_desired(viodev, new_desired);
    973	return count;
    974}
    975
    976viodev_cmo_rd_attr(desired);
    977viodev_cmo_rd_attr(entitled);
    978viodev_cmo_rd_attr(allocated);
    979
    980static ssize_t name_show(struct device *, struct device_attribute *, char *);
    981static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
    982static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
    983			     char *buf);
    984
    985static struct device_attribute dev_attr_name;
    986static struct device_attribute dev_attr_devspec;
    987static struct device_attribute dev_attr_modalias;
    988
    989static DEVICE_ATTR_RO(cmo_entitled);
    990static DEVICE_ATTR_RO(cmo_allocated);
    991static DEVICE_ATTR_RW(cmo_desired);
    992static DEVICE_ATTR_RW(cmo_allocs_failed);
    993
    994static struct attribute *vio_cmo_dev_attrs[] = {
    995	&dev_attr_name.attr,
    996	&dev_attr_devspec.attr,
    997	&dev_attr_modalias.attr,
    998	&dev_attr_cmo_entitled.attr,
    999	&dev_attr_cmo_allocated.attr,
   1000	&dev_attr_cmo_desired.attr,
   1001	&dev_attr_cmo_allocs_failed.attr,
   1002	NULL,
   1003};
   1004ATTRIBUTE_GROUPS(vio_cmo_dev);
   1005
   1006/* sysfs bus functions and data structures for CMO */
   1007
   1008#define viobus_cmo_rd_attr(name)                                        \
   1009static ssize_t cmo_bus_##name##_show(struct bus_type *bt, char *buf)    \
   1010{                                                                       \
   1011	return sprintf(buf, "%lu\n", vio_cmo.name);                     \
   1012}                                                                       \
   1013static struct bus_attribute bus_attr_cmo_bus_##name =			\
   1014	__ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
   1015
   1016#define viobus_cmo_pool_rd_attr(name, var)                              \
   1017static ssize_t                                                          \
   1018cmo_##name##_##var##_show(struct bus_type *bt, char *buf)               \
   1019{                                                                       \
   1020	return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
   1021}                                                                       \
   1022static BUS_ATTR_RO(cmo_##name##_##var)
   1023
   1024viobus_cmo_rd_attr(entitled);
   1025viobus_cmo_rd_attr(spare);
   1026viobus_cmo_rd_attr(min);
   1027viobus_cmo_rd_attr(desired);
   1028viobus_cmo_rd_attr(curr);
   1029viobus_cmo_pool_rd_attr(reserve, size);
   1030viobus_cmo_pool_rd_attr(excess, size);
   1031viobus_cmo_pool_rd_attr(excess, free);
   1032
   1033static ssize_t cmo_high_show(struct bus_type *bt, char *buf)
   1034{
   1035	return sprintf(buf, "%lu\n", vio_cmo.high);
   1036}
   1037
   1038static ssize_t cmo_high_store(struct bus_type *bt, const char *buf,
   1039			      size_t count)
   1040{
   1041	unsigned long flags;
   1042
   1043	spin_lock_irqsave(&vio_cmo.lock, flags);
   1044	vio_cmo.high = vio_cmo.curr;
   1045	spin_unlock_irqrestore(&vio_cmo.lock, flags);
   1046
   1047	return count;
   1048}
   1049static BUS_ATTR_RW(cmo_high);
   1050
   1051static struct attribute *vio_bus_attrs[] = {
   1052	&bus_attr_cmo_bus_entitled.attr,
   1053	&bus_attr_cmo_bus_spare.attr,
   1054	&bus_attr_cmo_bus_min.attr,
   1055	&bus_attr_cmo_bus_desired.attr,
   1056	&bus_attr_cmo_bus_curr.attr,
   1057	&bus_attr_cmo_high.attr,
   1058	&bus_attr_cmo_reserve_size.attr,
   1059	&bus_attr_cmo_excess_size.attr,
   1060	&bus_attr_cmo_excess_free.attr,
   1061	NULL,
   1062};
   1063ATTRIBUTE_GROUPS(vio_bus);
   1064
   1065static void __init vio_cmo_sysfs_init(void)
   1066{
   1067	vio_bus_type.dev_groups = vio_cmo_dev_groups;
   1068	vio_bus_type.bus_groups = vio_bus_groups;
   1069}
   1070#else /* CONFIG_PPC_SMLPAR */
   1071int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
   1072void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
   1073static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
   1074static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
   1075static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
   1076static void vio_cmo_bus_init(void) {}
   1077static void __init vio_cmo_sysfs_init(void) { }
   1078#endif /* CONFIG_PPC_SMLPAR */
   1079EXPORT_SYMBOL(vio_cmo_entitlement_update);
   1080EXPORT_SYMBOL(vio_cmo_set_dev_desired);
   1081
   1082
   1083/*
   1084 * Platform Facilities Option (PFO) support
   1085 */
   1086
   1087/**
   1088 * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
   1089 *
   1090 * @vdev - Pointer to a struct vio_dev for device
   1091 * @op - Pointer to a struct vio_pfo_op for the operation parameters
   1092 *
   1093 * Calls the hypervisor to synchronously perform the PFO operation
   1094 * described in @op.  In the case of a busy response from the hypervisor,
   1095 * the operation will be re-submitted indefinitely unless a non-zero timeout
   1096 * is specified or an error occurs. The timeout places a limit on when to
   1097 * stop re-submitting a operation, the total time can be exceeded if an
   1098 * operation is in progress.
   1099 *
   1100 * If op->hcall_ret is not NULL, this will be set to the return from the
   1101 * last h_cop_op call or it will be 0 if an error not involving the h_call
   1102 * was encountered.
   1103 *
   1104 * Returns:
   1105 *	0 on success,
   1106 *	-EINVAL if the h_call fails due to an invalid parameter,
   1107 *	-E2BIG if the h_call can not be performed synchronously,
   1108 *	-EBUSY if a timeout is specified and has elapsed,
   1109 *	-EACCES if the memory area for data/status has been rescinded, or
   1110 *	-EPERM if a hardware fault has been indicated
   1111 */
   1112int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
   1113{
   1114	struct device *dev = &vdev->dev;
   1115	unsigned long deadline = 0;
   1116	long hret = 0;
   1117	int ret = 0;
   1118
   1119	if (op->timeout)
   1120		deadline = jiffies + msecs_to_jiffies(op->timeout);
   1121
   1122	while (true) {
   1123		hret = plpar_hcall_norets(H_COP, op->flags,
   1124				vdev->resource_id,
   1125				op->in, op->inlen, op->out,
   1126				op->outlen, op->csbcpb);
   1127
   1128		if (hret == H_SUCCESS ||
   1129		    (hret != H_NOT_ENOUGH_RESOURCES &&
   1130		     hret != H_BUSY && hret != H_RESOURCE) ||
   1131		    (op->timeout && time_after(deadline, jiffies)))
   1132			break;
   1133
   1134		dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
   1135	}
   1136
   1137	switch (hret) {
   1138	case H_SUCCESS:
   1139		ret = 0;
   1140		break;
   1141	case H_OP_MODE:
   1142	case H_TOO_BIG:
   1143		ret = -E2BIG;
   1144		break;
   1145	case H_RESCINDED:
   1146		ret = -EACCES;
   1147		break;
   1148	case H_HARDWARE:
   1149		ret = -EPERM;
   1150		break;
   1151	case H_NOT_ENOUGH_RESOURCES:
   1152	case H_RESOURCE:
   1153	case H_BUSY:
   1154		ret = -EBUSY;
   1155		break;
   1156	default:
   1157		ret = -EINVAL;
   1158		break;
   1159	}
   1160
   1161	if (ret)
   1162		dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
   1163				__func__, ret, hret);
   1164
   1165	op->hcall_err = hret;
   1166	return ret;
   1167}
   1168EXPORT_SYMBOL(vio_h_cop_sync);
   1169
   1170static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
   1171{
   1172	const __be32 *dma_window;
   1173	struct iommu_table *tbl;
   1174	unsigned long offset, size;
   1175
   1176	dma_window = of_get_property(dev->dev.of_node,
   1177				  "ibm,my-dma-window", NULL);
   1178	if (!dma_window)
   1179		return NULL;
   1180
   1181	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
   1182	if (tbl == NULL)
   1183		return NULL;
   1184
   1185	kref_init(&tbl->it_kref);
   1186
   1187	of_parse_dma_window(dev->dev.of_node, dma_window,
   1188			    &tbl->it_index, &offset, &size);
   1189
   1190	/* TCE table size - measured in tce entries */
   1191	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
   1192	tbl->it_size = size >> tbl->it_page_shift;
   1193	/* offset for VIO should always be 0 */
   1194	tbl->it_offset = offset >> tbl->it_page_shift;
   1195	tbl->it_busno = 0;
   1196	tbl->it_type = TCE_VB;
   1197	tbl->it_blocksize = 16;
   1198
   1199	if (firmware_has_feature(FW_FEATURE_LPAR))
   1200		tbl->it_ops = &iommu_table_lpar_multi_ops;
   1201	else
   1202		tbl->it_ops = &iommu_table_pseries_ops;
   1203
   1204	return iommu_init_table(tbl, -1, 0, 0);
   1205}
   1206
   1207/**
   1208 * vio_match_device: - Tell if a VIO device has a matching
   1209 *			VIO device id structure.
   1210 * @ids:	array of VIO device id structures to search in
   1211 * @dev:	the VIO device structure to match against
   1212 *
   1213 * Used by a driver to check whether a VIO device present in the
   1214 * system is in its list of supported devices. Returns the matching
   1215 * vio_device_id structure or NULL if there is no match.
   1216 */
   1217static const struct vio_device_id *vio_match_device(
   1218		const struct vio_device_id *ids, const struct vio_dev *dev)
   1219{
   1220	while (ids->type[0] != '\0') {
   1221		if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
   1222		    of_device_is_compatible(dev->dev.of_node,
   1223					 ids->compat))
   1224			return ids;
   1225		ids++;
   1226	}
   1227	return NULL;
   1228}
   1229
   1230/*
   1231 * Convert from struct device to struct vio_dev and pass to driver.
   1232 * dev->driver has already been set by generic code because vio_bus_match
   1233 * succeeded.
   1234 */
   1235static int vio_bus_probe(struct device *dev)
   1236{
   1237	struct vio_dev *viodev = to_vio_dev(dev);
   1238	struct vio_driver *viodrv = to_vio_driver(dev->driver);
   1239	const struct vio_device_id *id;
   1240	int error = -ENODEV;
   1241
   1242	if (!viodrv->probe)
   1243		return error;
   1244
   1245	id = vio_match_device(viodrv->id_table, viodev);
   1246	if (id) {
   1247		memset(&viodev->cmo, 0, sizeof(viodev->cmo));
   1248		if (firmware_has_feature(FW_FEATURE_CMO)) {
   1249			error = vio_cmo_bus_probe(viodev);
   1250			if (error)
   1251				return error;
   1252		}
   1253		error = viodrv->probe(viodev, id);
   1254		if (error && firmware_has_feature(FW_FEATURE_CMO))
   1255			vio_cmo_bus_remove(viodev);
   1256	}
   1257
   1258	return error;
   1259}
   1260
   1261/* convert from struct device to struct vio_dev and pass to driver. */
   1262static void vio_bus_remove(struct device *dev)
   1263{
   1264	struct vio_dev *viodev = to_vio_dev(dev);
   1265	struct vio_driver *viodrv = to_vio_driver(dev->driver);
   1266	struct device *devptr;
   1267
   1268	/*
   1269	 * Hold a reference to the device after the remove function is called
   1270	 * to allow for CMO accounting cleanup for the device.
   1271	 */
   1272	devptr = get_device(dev);
   1273
   1274	if (viodrv->remove)
   1275		viodrv->remove(viodev);
   1276
   1277	if (firmware_has_feature(FW_FEATURE_CMO))
   1278		vio_cmo_bus_remove(viodev);
   1279
   1280	put_device(devptr);
   1281}
   1282
   1283static void vio_bus_shutdown(struct device *dev)
   1284{
   1285	struct vio_dev *viodev = to_vio_dev(dev);
   1286	struct vio_driver *viodrv;
   1287
   1288	if (dev->driver) {
   1289		viodrv = to_vio_driver(dev->driver);
   1290		if (viodrv->shutdown)
   1291			viodrv->shutdown(viodev);
   1292		else if (kexec_in_progress)
   1293			vio_bus_remove(dev);
   1294	}
   1295}
   1296
   1297/**
   1298 * vio_register_driver: - Register a new vio driver
   1299 * @viodrv:	The vio_driver structure to be registered.
   1300 */
   1301int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
   1302			  const char *mod_name)
   1303{
   1304	// vio_bus_type is only initialised for pseries
   1305	if (!machine_is(pseries))
   1306		return -ENODEV;
   1307
   1308	pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
   1309
   1310	/* fill in 'struct driver' fields */
   1311	viodrv->driver.name = viodrv->name;
   1312	viodrv->driver.pm = viodrv->pm;
   1313	viodrv->driver.bus = &vio_bus_type;
   1314	viodrv->driver.owner = owner;
   1315	viodrv->driver.mod_name = mod_name;
   1316
   1317	return driver_register(&viodrv->driver);
   1318}
   1319EXPORT_SYMBOL(__vio_register_driver);
   1320
   1321/**
   1322 * vio_unregister_driver - Remove registration of vio driver.
   1323 * @viodrv:	The vio_driver struct to be removed form registration
   1324 */
   1325void vio_unregister_driver(struct vio_driver *viodrv)
   1326{
   1327	driver_unregister(&viodrv->driver);
   1328}
   1329EXPORT_SYMBOL(vio_unregister_driver);
   1330
   1331/* vio_dev refcount hit 0 */
   1332static void vio_dev_release(struct device *dev)
   1333{
   1334	struct iommu_table *tbl = get_iommu_table_base(dev);
   1335
   1336	if (tbl)
   1337		iommu_tce_table_put(tbl);
   1338	of_node_put(dev->of_node);
   1339	kfree(to_vio_dev(dev));
   1340}
   1341
   1342/**
   1343 * vio_register_device_node: - Register a new vio device.
   1344 * @of_node:	The OF node for this device.
   1345 *
   1346 * Creates and initializes a vio_dev structure from the data in
   1347 * of_node and adds it to the list of virtual devices.
   1348 * Returns a pointer to the created vio_dev or NULL if node has
   1349 * NULL device_type or compatible fields.
   1350 */
   1351struct vio_dev *vio_register_device_node(struct device_node *of_node)
   1352{
   1353	struct vio_dev *viodev;
   1354	struct device_node *parent_node;
   1355	const __be32 *prop;
   1356	enum vio_dev_family family;
   1357
   1358	/*
   1359	 * Determine if this node is a under the /vdevice node or under the
   1360	 * /ibm,platform-facilities node.  This decides the device's family.
   1361	 */
   1362	parent_node = of_get_parent(of_node);
   1363	if (parent_node) {
   1364		if (of_node_is_type(parent_node, "ibm,platform-facilities"))
   1365			family = PFO;
   1366		else if (of_node_is_type(parent_node, "vdevice"))
   1367			family = VDEVICE;
   1368		else {
   1369			pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
   1370					__func__,
   1371					parent_node,
   1372					of_node);
   1373			of_node_put(parent_node);
   1374			return NULL;
   1375		}
   1376		of_node_put(parent_node);
   1377	} else {
   1378		pr_warn("%s: could not determine the parent of node %pOFn.\n",
   1379				__func__, of_node);
   1380		return NULL;
   1381	}
   1382
   1383	if (family == PFO) {
   1384		if (of_get_property(of_node, "interrupt-controller", NULL)) {
   1385			pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
   1386					__func__, of_node);
   1387			return NULL;
   1388		}
   1389	}
   1390
   1391	/* allocate a vio_dev for this node */
   1392	viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
   1393	if (viodev == NULL) {
   1394		pr_warn("%s: allocation failure for VIO device.\n", __func__);
   1395		return NULL;
   1396	}
   1397
   1398	/* we need the 'device_type' property, in order to match with drivers */
   1399	viodev->family = family;
   1400	if (viodev->family == VDEVICE) {
   1401		unsigned int unit_address;
   1402
   1403		viodev->type = of_node_get_device_type(of_node);
   1404		if (!viodev->type) {
   1405			pr_warn("%s: node %pOFn is missing the 'device_type' "
   1406					"property.\n", __func__, of_node);
   1407			goto out;
   1408		}
   1409
   1410		prop = of_get_property(of_node, "reg", NULL);
   1411		if (prop == NULL) {
   1412			pr_warn("%s: node %pOFn missing 'reg'\n",
   1413					__func__, of_node);
   1414			goto out;
   1415		}
   1416		unit_address = of_read_number(prop, 1);
   1417		dev_set_name(&viodev->dev, "%x", unit_address);
   1418		viodev->irq = irq_of_parse_and_map(of_node, 0);
   1419		viodev->unit_address = unit_address;
   1420	} else {
   1421		/* PFO devices need their resource_id for submitting COP_OPs
   1422		 * This is an optional field for devices, but is required when
   1423		 * performing synchronous ops */
   1424		prop = of_get_property(of_node, "ibm,resource-id", NULL);
   1425		if (prop != NULL)
   1426			viodev->resource_id = of_read_number(prop, 1);
   1427
   1428		dev_set_name(&viodev->dev, "%pOFn", of_node);
   1429		viodev->type = dev_name(&viodev->dev);
   1430		viodev->irq = 0;
   1431	}
   1432
   1433	viodev->name = of_node->name;
   1434	viodev->dev.of_node = of_node_get(of_node);
   1435
   1436	set_dev_node(&viodev->dev, of_node_to_nid(of_node));
   1437
   1438	/* init generic 'struct device' fields: */
   1439	viodev->dev.parent = &vio_bus_device.dev;
   1440	viodev->dev.bus = &vio_bus_type;
   1441	viodev->dev.release = vio_dev_release;
   1442
   1443	if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) {
   1444		if (firmware_has_feature(FW_FEATURE_CMO))
   1445			vio_cmo_set_dma_ops(viodev);
   1446		else
   1447			set_dma_ops(&viodev->dev, &dma_iommu_ops);
   1448
   1449		set_iommu_table_base(&viodev->dev,
   1450				     vio_build_iommu_table(viodev));
   1451
   1452		/* needed to ensure proper operation of coherent allocations
   1453		 * later, in case driver doesn't set it explicitly */
   1454		viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
   1455		viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
   1456	}
   1457
   1458	/* register with generic device framework */
   1459	if (device_register(&viodev->dev)) {
   1460		printk(KERN_ERR "%s: failed to register device %s\n",
   1461				__func__, dev_name(&viodev->dev));
   1462		put_device(&viodev->dev);
   1463		return NULL;
   1464	}
   1465
   1466	return viodev;
   1467
   1468out:	/* Use this exit point for any return prior to device_register */
   1469	kfree(viodev);
   1470
   1471	return NULL;
   1472}
   1473EXPORT_SYMBOL(vio_register_device_node);
   1474
   1475/*
   1476 * vio_bus_scan_for_devices - Scan OF and register each child device
   1477 * @root_name - OF node name for the root of the subtree to search.
   1478 *		This must be non-NULL
   1479 *
   1480 * Starting from the root node provide, register the device node for
   1481 * each child beneath the root.
   1482 */
   1483static void __init vio_bus_scan_register_devices(char *root_name)
   1484{
   1485	struct device_node *node_root, *node_child;
   1486
   1487	if (!root_name)
   1488		return;
   1489
   1490	node_root = of_find_node_by_name(NULL, root_name);
   1491	if (node_root) {
   1492
   1493		/*
   1494		 * Create struct vio_devices for each virtual device in
   1495		 * the device tree. Drivers will associate with them later.
   1496		 */
   1497		node_child = of_get_next_child(node_root, NULL);
   1498		while (node_child) {
   1499			vio_register_device_node(node_child);
   1500			node_child = of_get_next_child(node_root, node_child);
   1501		}
   1502		of_node_put(node_root);
   1503	}
   1504}
   1505
   1506/**
   1507 * vio_bus_init: - Initialize the virtual IO bus
   1508 */
   1509static int __init vio_bus_init(void)
   1510{
   1511	int err;
   1512
   1513	if (firmware_has_feature(FW_FEATURE_CMO))
   1514		vio_cmo_sysfs_init();
   1515
   1516	err = bus_register(&vio_bus_type);
   1517	if (err) {
   1518		printk(KERN_ERR "failed to register VIO bus\n");
   1519		return err;
   1520	}
   1521
   1522	/*
   1523	 * The fake parent of all vio devices, just to give us
   1524	 * a nice directory
   1525	 */
   1526	err = device_register(&vio_bus_device.dev);
   1527	if (err) {
   1528		printk(KERN_WARNING "%s: device_register returned %i\n",
   1529				__func__, err);
   1530		return err;
   1531	}
   1532
   1533	if (firmware_has_feature(FW_FEATURE_CMO))
   1534		vio_cmo_bus_init();
   1535
   1536	return 0;
   1537}
   1538machine_postcore_initcall(pseries, vio_bus_init);
   1539
   1540static int __init vio_device_init(void)
   1541{
   1542	vio_bus_scan_register_devices("vdevice");
   1543	vio_bus_scan_register_devices("ibm,platform-facilities");
   1544
   1545	return 0;
   1546}
   1547machine_device_initcall(pseries, vio_device_init);
   1548
   1549static ssize_t name_show(struct device *dev,
   1550		struct device_attribute *attr, char *buf)
   1551{
   1552	return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
   1553}
   1554static DEVICE_ATTR_RO(name);
   1555
   1556static ssize_t devspec_show(struct device *dev,
   1557		struct device_attribute *attr, char *buf)
   1558{
   1559	struct device_node *of_node = dev->of_node;
   1560
   1561	return sprintf(buf, "%pOF\n", of_node);
   1562}
   1563static DEVICE_ATTR_RO(devspec);
   1564
   1565static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
   1566			     char *buf)
   1567{
   1568	const struct vio_dev *vio_dev = to_vio_dev(dev);
   1569	struct device_node *dn;
   1570	const char *cp;
   1571
   1572	dn = dev->of_node;
   1573	if (!dn) {
   1574		strcpy(buf, "\n");
   1575		return strlen(buf);
   1576	}
   1577	cp = of_get_property(dn, "compatible", NULL);
   1578	if (!cp) {
   1579		strcpy(buf, "\n");
   1580		return strlen(buf);
   1581	}
   1582
   1583	return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
   1584}
   1585static DEVICE_ATTR_RO(modalias);
   1586
   1587static struct attribute *vio_dev_attrs[] = {
   1588	&dev_attr_name.attr,
   1589	&dev_attr_devspec.attr,
   1590	&dev_attr_modalias.attr,
   1591	NULL,
   1592};
   1593ATTRIBUTE_GROUPS(vio_dev);
   1594
   1595void vio_unregister_device(struct vio_dev *viodev)
   1596{
   1597	device_unregister(&viodev->dev);
   1598	if (viodev->family == VDEVICE)
   1599		irq_dispose_mapping(viodev->irq);
   1600}
   1601EXPORT_SYMBOL(vio_unregister_device);
   1602
   1603static int vio_bus_match(struct device *dev, struct device_driver *drv)
   1604{
   1605	const struct vio_dev *vio_dev = to_vio_dev(dev);
   1606	struct vio_driver *vio_drv = to_vio_driver(drv);
   1607	const struct vio_device_id *ids = vio_drv->id_table;
   1608
   1609	return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
   1610}
   1611
   1612static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
   1613{
   1614	const struct vio_dev *vio_dev = to_vio_dev(dev);
   1615	struct device_node *dn;
   1616	const char *cp;
   1617
   1618	dn = dev->of_node;
   1619	if (!dn)
   1620		return -ENODEV;
   1621	cp = of_get_property(dn, "compatible", NULL);
   1622	if (!cp)
   1623		return -ENODEV;
   1624
   1625	add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
   1626	return 0;
   1627}
   1628
   1629struct bus_type vio_bus_type = {
   1630	.name = "vio",
   1631	.dev_groups = vio_dev_groups,
   1632	.uevent = vio_hotplug,
   1633	.match = vio_bus_match,
   1634	.probe = vio_bus_probe,
   1635	.remove = vio_bus_remove,
   1636	.shutdown = vio_bus_shutdown,
   1637};
   1638
   1639/**
   1640 * vio_get_attribute: - get attribute for virtual device
   1641 * @vdev:	The vio device to get property.
   1642 * @which:	The property/attribute to be extracted.
   1643 * @length:	Pointer to length of returned data size (unused if NULL).
   1644 *
   1645 * Calls prom.c's of_get_property() to return the value of the
   1646 * attribute specified by @which
   1647*/
   1648const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
   1649{
   1650	return of_get_property(vdev->dev.of_node, which, length);
   1651}
   1652EXPORT_SYMBOL(vio_get_attribute);
   1653
   1654/* vio_find_name() - internal because only vio.c knows how we formatted the
   1655 * kobject name
   1656 */
   1657static struct vio_dev *vio_find_name(const char *name)
   1658{
   1659	struct device *found;
   1660
   1661	found = bus_find_device_by_name(&vio_bus_type, NULL, name);
   1662	if (!found)
   1663		return NULL;
   1664
   1665	return to_vio_dev(found);
   1666}
   1667
   1668/**
   1669 * vio_find_node - find an already-registered vio_dev
   1670 * @vnode: device_node of the virtual device we're looking for
   1671 *
   1672 * Takes a reference to the embedded struct device which needs to be dropped
   1673 * after use.
   1674 */
   1675struct vio_dev *vio_find_node(struct device_node *vnode)
   1676{
   1677	char kobj_name[20];
   1678	struct device_node *vnode_parent;
   1679
   1680	vnode_parent = of_get_parent(vnode);
   1681	if (!vnode_parent)
   1682		return NULL;
   1683
   1684	/* construct the kobject name from the device node */
   1685	if (of_node_is_type(vnode_parent, "vdevice")) {
   1686		const __be32 *prop;
   1687		
   1688		prop = of_get_property(vnode, "reg", NULL);
   1689		if (!prop)
   1690			goto out;
   1691		snprintf(kobj_name, sizeof(kobj_name), "%x",
   1692			 (uint32_t)of_read_number(prop, 1));
   1693	} else if (of_node_is_type(vnode_parent, "ibm,platform-facilities"))
   1694		snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
   1695	else
   1696		goto out;
   1697
   1698	of_node_put(vnode_parent);
   1699	return vio_find_name(kobj_name);
   1700out:
   1701	of_node_put(vnode_parent);
   1702	return NULL;
   1703}
   1704EXPORT_SYMBOL(vio_find_node);
   1705
   1706int vio_enable_interrupts(struct vio_dev *dev)
   1707{
   1708	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
   1709	if (rc != H_SUCCESS)
   1710		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
   1711	return rc;
   1712}
   1713EXPORT_SYMBOL(vio_enable_interrupts);
   1714
   1715int vio_disable_interrupts(struct vio_dev *dev)
   1716{
   1717	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
   1718	if (rc != H_SUCCESS)
   1719		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
   1720	return rc;
   1721}
   1722EXPORT_SYMBOL(vio_disable_interrupts);
   1723
   1724static int __init vio_init(void)
   1725{
   1726	dma_debug_add_bus(&vio_bus_type);
   1727	return 0;
   1728}
   1729machine_fs_initcall(pseries, vio_init);