cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ccio-dma.c (47526B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3** ccio-dma.c:
      4**	DMA management routines for first generation cache-coherent machines.
      5**	Program U2/Uturn in "Virtual Mode" and use the I/O MMU.
      6**
      7**	(c) Copyright 2000 Grant Grundler
      8**	(c) Copyright 2000 Ryan Bradetich
      9**	(c) Copyright 2000 Hewlett-Packard Company
     10**
     11**
     12**
     13**  "Real Mode" operation refers to U2/Uturn chip operation.
     14**  U2/Uturn were designed to perform coherency checks w/o using
     15**  the I/O MMU - basically what x86 does.
     16**
     17**  Philipp Rumpf has a "Real Mode" driver for PCX-W machines at:
     18**      CVSROOT=:pserver:anonymous@198.186.203.37:/cvsroot/linux-parisc
     19**      cvs -z3 co linux/arch/parisc/kernel/dma-rm.c
     20**
     21**  I've rewritten his code to work under TPG's tree. See ccio-rm-dma.c.
     22**
     23**  Drawbacks of using Real Mode are:
     24**	o outbound DMA is slower - U2 won't prefetch data (GSC+ XQL signal).
     25**      o Inbound DMA less efficient - U2 can't use DMA_FAST attribute.
     26**	o Ability to do scatter/gather in HW is lost.
     27**	o Doesn't work under PCX-U/U+ machines since they didn't follow
     28**        the coherency design originally worked out. Only PCX-W does.
     29*/
     30
     31#include <linux/types.h>
     32#include <linux/kernel.h>
     33#include <linux/init.h>
     34#include <linux/mm.h>
     35#include <linux/spinlock.h>
     36#include <linux/slab.h>
     37#include <linux/string.h>
     38#include <linux/pci.h>
     39#include <linux/reboot.h>
     40#include <linux/proc_fs.h>
     41#include <linux/seq_file.h>
     42#include <linux/dma-map-ops.h>
     43#include <linux/scatterlist.h>
     44#include <linux/iommu-helper.h>
     45#include <linux/export.h>
     46
     47#include <asm/byteorder.h>
     48#include <asm/cache.h>		/* for L1_CACHE_BYTES */
     49#include <linux/uaccess.h>
     50#include <asm/page.h>
     51#include <asm/dma.h>
     52#include <asm/io.h>
     53#include <asm/hardware.h>       /* for register_module() */
     54#include <asm/parisc-device.h>
     55
     56#include "iommu.h"
     57
     58/* 
     59** Choose "ccio" since that's what HP-UX calls it.
     60** Make it easier for folks to migrate from one to the other :^)
     61*/
     62#define MODULE_NAME "ccio"
     63
     64#undef DEBUG_CCIO_RES
     65#undef DEBUG_CCIO_RUN
     66#undef DEBUG_CCIO_INIT
     67#undef DEBUG_CCIO_RUN_SG
     68
     69#ifdef CONFIG_PROC_FS
     70/* depends on proc fs support. But costs CPU performance. */
     71#undef CCIO_COLLECT_STATS
     72#endif
     73
     74#include <asm/runway.h>		/* for proc_runway_root */
     75
     76#ifdef DEBUG_CCIO_INIT
     77#define DBG_INIT(x...)  printk(x)
     78#else
     79#define DBG_INIT(x...)
     80#endif
     81
     82#ifdef DEBUG_CCIO_RUN
     83#define DBG_RUN(x...)   printk(x)
     84#else
     85#define DBG_RUN(x...)
     86#endif
     87
     88#ifdef DEBUG_CCIO_RES
     89#define DBG_RES(x...)   printk(x)
     90#else
     91#define DBG_RES(x...)
     92#endif
     93
     94#ifdef DEBUG_CCIO_RUN_SG
     95#define DBG_RUN_SG(x...) printk(x)
     96#else
     97#define DBG_RUN_SG(x...)
     98#endif
     99
    100#define CCIO_INLINE	inline
    101#define WRITE_U32(value, addr) __raw_writel(value, addr)
    102#define READ_U32(addr) __raw_readl(addr)
    103
    104#define U2_IOA_RUNWAY 0x580
    105#define U2_BC_GSC     0x501
    106#define UTURN_IOA_RUNWAY 0x581
    107#define UTURN_BC_GSC     0x502
    108
    109#define IOA_NORMAL_MODE      0x00020080 /* IO_CONTROL to turn on CCIO        */
    110#define CMD_TLB_DIRECT_WRITE 35         /* IO_COMMAND for I/O TLB Writes     */
    111#define CMD_TLB_PURGE        33         /* IO_COMMAND to Purge I/O TLB entry */
    112
    113struct ioa_registers {
    114	/* Runway Supervisory Set */
    115	int32_t    unused1[12];
    116	uint32_t   io_command;             /* Offset 12 */
    117	uint32_t   io_status;              /* Offset 13 */
    118	uint32_t   io_control;             /* Offset 14 */
    119	int32_t    unused2[1];
    120
    121	/* Runway Auxiliary Register Set */
    122	uint32_t   io_err_resp;            /* Offset  0 */
    123	uint32_t   io_err_info;            /* Offset  1 */
    124	uint32_t   io_err_req;             /* Offset  2 */
    125	uint32_t   io_err_resp_hi;         /* Offset  3 */
    126	uint32_t   io_tlb_entry_m;         /* Offset  4 */
    127	uint32_t   io_tlb_entry_l;         /* Offset  5 */
    128	uint32_t   unused3[1];
    129	uint32_t   io_pdir_base;           /* Offset  7 */
    130	uint32_t   io_io_low_hv;           /* Offset  8 */
    131	uint32_t   io_io_high_hv;          /* Offset  9 */
    132	uint32_t   unused4[1];
    133	uint32_t   io_chain_id_mask;       /* Offset 11 */
    134	uint32_t   unused5[2];
    135	uint32_t   io_io_low;              /* Offset 14 */
    136	uint32_t   io_io_high;             /* Offset 15 */
    137};
    138
    139/*
    140** IOA Registers
    141** -------------
    142**
    143** Runway IO_CONTROL Register (+0x38)
    144** 
    145** The Runway IO_CONTROL register controls the forwarding of transactions.
    146**
    147** | 0  ...  13  |  14 15 | 16 ... 21 | 22 | 23 24 |  25 ... 31 |
    148** |    HV       |   TLB  |  reserved | HV | mode  |  reserved  |
    149**
    150** o mode field indicates the address translation of transactions
    151**   forwarded from Runway to GSC+:
    152**       Mode Name     Value        Definition
    153**       Off (default)   0          Opaque to matching addresses.
    154**       Include         1          Transparent for matching addresses.
    155**       Peek            3          Map matching addresses.
    156**
    157**       + "Off" mode: Runway transactions which match the I/O range
    158**         specified by the IO_IO_LOW/IO_IO_HIGH registers will be ignored.
    159**       + "Include" mode: all addresses within the I/O range specified
    160**         by the IO_IO_LOW and IO_IO_HIGH registers are transparently
    161**         forwarded. This is the I/O Adapter's normal operating mode.
    162**       + "Peek" mode: used during system configuration to initialize the
    163**         GSC+ bus. Runway Write_Shorts in the address range specified by
    164**         IO_IO_LOW and IO_IO_HIGH are forwarded through the I/O Adapter
    165**         *AND* the GSC+ address is remapped to the Broadcast Physical
    166**         Address space by setting the 14 high order address bits of the
    167**         32 bit GSC+ address to ones.
    168**
    169** o TLB field affects transactions which are forwarded from GSC+ to Runway.
    170**   "Real" mode is the poweron default.
    171** 
    172**   TLB Mode  Value  Description
    173**   Real        0    No TLB translation. Address is directly mapped and the
    174**                    virtual address is composed of selected physical bits.
    175**   Error       1    Software fills the TLB manually.
    176**   Normal      2    IOA fetches IO TLB misses from IO PDIR (in host memory).
    177**
    178**
    179** IO_IO_LOW_HV	  +0x60 (HV dependent)
    180** IO_IO_HIGH_HV  +0x64 (HV dependent)
    181** IO_IO_LOW      +0x78	(Architected register)
    182** IO_IO_HIGH     +0x7c	(Architected register)
    183**
    184** IO_IO_LOW and IO_IO_HIGH set the lower and upper bounds of the
    185** I/O Adapter address space, respectively.
    186**
    187** 0  ... 7 | 8 ... 15 |  16   ...   31 |
    188** 11111111 | 11111111 |      address   |
    189**
    190** Each LOW/HIGH pair describes a disjoint address space region.
    191** (2 per GSC+ port). Each incoming Runway transaction address is compared
    192** with both sets of LOW/HIGH registers. If the address is in the range
    193** greater than or equal to IO_IO_LOW and less than IO_IO_HIGH the transaction
    194** for forwarded to the respective GSC+ bus.
    195** Specify IO_IO_LOW equal to or greater than IO_IO_HIGH to avoid specifying
    196** an address space region.
    197**
    198** In order for a Runway address to reside within GSC+ extended address space:
    199**	Runway Address [0:7]    must identically compare to 8'b11111111
    200**	Runway Address [8:11]   must be equal to IO_IO_LOW(_HV)[16:19]
    201**	Runway Address [12:23]  must be greater than or equal to
    202**	           IO_IO_LOW(_HV)[20:31] and less than IO_IO_HIGH(_HV)[20:31].
    203**	Runway Address [24:39]  is not used in the comparison.
    204**
    205** When the Runway transaction is forwarded to GSC+, the GSC+ address is
    206** as follows:
    207**	GSC+ Address[0:3]	4'b1111
    208**	GSC+ Address[4:29]	Runway Address[12:37]
    209**	GSC+ Address[30:31]	2'b00
    210**
    211** All 4 Low/High registers must be initialized (by PDC) once the lower bus
    212** is interrogated and address space is defined. The operating system will
    213** modify the architectural IO_IO_LOW and IO_IO_HIGH registers following
    214** the PDC initialization.  However, the hardware version dependent IO_IO_LOW
    215** and IO_IO_HIGH registers should not be subsequently altered by the OS.
    216** 
    217** Writes to both sets of registers will take effect immediately, bypassing
    218** the queues, which ensures that subsequent Runway transactions are checked
    219** against the updated bounds values. However reads are queued, introducing
    220** the possibility of a read being bypassed by a subsequent write to the same
    221** register. This sequence can be avoided by having software wait for read
    222** returns before issuing subsequent writes.
    223*/
    224
    225struct ioc {
    226	struct ioa_registers __iomem *ioc_regs;  /* I/O MMU base address */
    227	u8  *res_map;	                /* resource map, bit == pdir entry */
    228	u64 *pdir_base;	                /* physical base address */
    229	u32 pdir_size;			/* bytes, function of IOV Space size */
    230	u32 res_hint;			/* next available IOVP -
    231					   circular search */
    232	u32 res_size;			/* size of resource map in bytes */
    233	spinlock_t res_lock;
    234
    235#ifdef CCIO_COLLECT_STATS
    236#define CCIO_SEARCH_SAMPLE 0x100
    237	unsigned long avg_search[CCIO_SEARCH_SAMPLE];
    238	unsigned long avg_idx;		  /* current index into avg_search */
    239	unsigned long used_pages;
    240	unsigned long msingle_calls;
    241	unsigned long msingle_pages;
    242	unsigned long msg_calls;
    243	unsigned long msg_pages;
    244	unsigned long usingle_calls;
    245	unsigned long usingle_pages;
    246	unsigned long usg_calls;
    247	unsigned long usg_pages;
    248#endif
    249	unsigned short cujo20_bug;
    250
    251	/* STUFF We don't need in performance path */
    252	u32 chainid_shift;		/* specify bit location of chain_id */
    253	struct ioc *next;		/* Linked list of discovered iocs */
    254	const char *name;		/* device name from firmware */
    255	unsigned int hw_path;           /* the hardware path this ioc is associatd with */
    256	struct pci_dev *fake_pci_dev;   /* the fake pci_dev for non-pci devs */
    257	struct resource mmio_region[2]; /* The "routed" MMIO regions */
    258};
    259
    260static struct ioc *ioc_list;
    261static int ioc_count;
    262
    263/**************************************************************
    264*
    265*   I/O Pdir Resource Management
    266*
    267*   Bits set in the resource map are in use.
    268*   Each bit can represent a number of pages.
    269*   LSbs represent lower addresses (IOVA's).
    270*
    271*   This was was copied from sba_iommu.c. Don't try to unify
    272*   the two resource managers unless a way to have different
    273*   allocation policies is also adjusted. We'd like to avoid
    274*   I/O TLB thrashing by having resource allocation policy
    275*   match the I/O TLB replacement policy.
    276*
    277***************************************************************/
    278#define IOVP_SIZE PAGE_SIZE
    279#define IOVP_SHIFT PAGE_SHIFT
    280#define IOVP_MASK PAGE_MASK
    281
    282/* Convert from IOVP to IOVA and vice versa. */
    283#define CCIO_IOVA(iovp,offset) ((iovp) | (offset))
    284#define CCIO_IOVP(iova) ((iova) & IOVP_MASK)
    285
    286#define PDIR_INDEX(iovp)    ((iovp)>>IOVP_SHIFT)
    287#define MKIOVP(pdir_idx)    ((long)(pdir_idx) << IOVP_SHIFT)
    288#define MKIOVA(iovp,offset) (dma_addr_t)((long)iovp | (long)offset)
    289
    290/*
    291** Don't worry about the 150% average search length on a miss.
    292** If the search wraps around, and passes the res_hint, it will
    293** cause the kernel to panic anyhow.
    294*/
    295#define CCIO_SEARCH_LOOP(ioc, res_idx, mask, size)  \
    296	for (; res_ptr < res_end; ++res_ptr) { \
    297		int ret;\
    298		unsigned int idx;\
    299		idx = (unsigned int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \
    300		ret = iommu_is_span_boundary(idx << 3, pages_needed, 0, boundary_size);\
    301		if ((0 == (*res_ptr & mask)) && !ret) { \
    302			*res_ptr |= mask; \
    303			res_idx = idx;\
    304			ioc->res_hint = res_idx + (size >> 3); \
    305			goto resource_found; \
    306		} \
    307	}
    308
    309#define CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, size) \
    310       u##size *res_ptr = (u##size *)&((ioc)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \
    311       u##size *res_end = (u##size *)&(ioc)->res_map[ioa->res_size]; \
    312	CCIO_SEARCH_LOOP(ioc, res_idx, mask, size); \
    313	res_ptr = (u##size *)&(ioc)->res_map[0]; \
    314	CCIO_SEARCH_LOOP(ioa, res_idx, mask, size);
    315
    316/*
    317** Find available bit in this ioa's resource map.
    318** Use a "circular" search:
    319**   o Most IOVA's are "temporary" - avg search time should be small.
    320** o keep a history of what happened for debugging
    321** o KISS.
    322**
    323** Perf optimizations:
    324** o search for log2(size) bits at a time.
    325** o search for available resource bits using byte/word/whatever.
    326** o use different search for "large" (eg > 4 pages) or "very large"
    327**   (eg > 16 pages) mappings.
    328*/
    329
    330/**
    331 * ccio_alloc_range - Allocate pages in the ioc's resource map.
    332 * @ioc: The I/O Controller.
    333 * @pages_needed: The requested number of pages to be mapped into the
    334 * I/O Pdir...
    335 *
    336 * This function searches the resource map of the ioc to locate a range
    337 * of available pages for the requested size.
    338 */
    339static int
    340ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
    341{
    342	unsigned int pages_needed = size >> IOVP_SHIFT;
    343	unsigned int res_idx;
    344	unsigned long boundary_size;
    345#ifdef CCIO_COLLECT_STATS
    346	unsigned long cr_start = mfctl(16);
    347#endif
    348	
    349	BUG_ON(pages_needed == 0);
    350	BUG_ON((pages_needed * IOVP_SIZE) > DMA_CHUNK_SIZE);
    351
    352	DBG_RES("%s() size: %d pages_needed %d\n",
    353			__func__, size, pages_needed);
    354
    355	/*
    356	** "seek and ye shall find"...praying never hurts either...
    357	** ggg sacrifices another 710 to the computer gods.
    358	*/
    359
    360	boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT);
    361
    362	if (pages_needed <= 8) {
    363		/*
    364		 * LAN traffic will not thrash the TLB IFF the same NIC
    365		 * uses 8 adjacent pages to map separate payload data.
    366		 * ie the same byte in the resource bit map.
    367		 */
    368#if 0
    369		/* FIXME: bit search should shift it's way through
    370		 * an unsigned long - not byte at a time. As it is now,
    371		 * we effectively allocate this byte to this mapping.
    372		 */
    373		unsigned long mask = ~(~0UL >> pages_needed);
    374		CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 8);
    375#else
    376		CCIO_FIND_FREE_MAPPING(ioc, res_idx, 0xff, 8);
    377#endif
    378	} else if (pages_needed <= 16) {
    379		CCIO_FIND_FREE_MAPPING(ioc, res_idx, 0xffff, 16);
    380	} else if (pages_needed <= 32) {
    381		CCIO_FIND_FREE_MAPPING(ioc, res_idx, ~(unsigned int)0, 32);
    382#ifdef __LP64__
    383	} else if (pages_needed <= 64) {
    384		CCIO_FIND_FREE_MAPPING(ioc, res_idx, ~0UL, 64);
    385#endif
    386	} else {
    387		panic("%s: %s() Too many pages to map. pages_needed: %u\n",
    388		       __FILE__,  __func__, pages_needed);
    389	}
    390
    391	panic("%s: %s() I/O MMU is out of mapping resources.\n", __FILE__,
    392	      __func__);
    393	
    394resource_found:
    395	
    396	DBG_RES("%s() res_idx %d res_hint: %d\n",
    397		__func__, res_idx, ioc->res_hint);
    398
    399#ifdef CCIO_COLLECT_STATS
    400	{
    401		unsigned long cr_end = mfctl(16);
    402		unsigned long tmp = cr_end - cr_start;
    403		/* check for roll over */
    404		cr_start = (cr_end < cr_start) ?  -(tmp) : (tmp);
    405	}
    406	ioc->avg_search[ioc->avg_idx++] = cr_start;
    407	ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
    408	ioc->used_pages += pages_needed;
    409#endif
    410	/* 
    411	** return the bit address.
    412	*/
    413	return res_idx << 3;
    414}
    415
    416#define CCIO_FREE_MAPPINGS(ioc, res_idx, mask, size) \
    417        u##size *res_ptr = (u##size *)&((ioc)->res_map[res_idx]); \
    418        BUG_ON((*res_ptr & mask) != mask); \
    419	*res_ptr &= ~(mask);
    420
    421/**
    422 * ccio_free_range - Free pages from the ioc's resource map.
    423 * @ioc: The I/O Controller.
    424 * @iova: The I/O Virtual Address.
    425 * @pages_mapped: The requested number of pages to be freed from the
    426 * I/O Pdir.
    427 *
    428 * This function frees the resouces allocated for the iova.
    429 */
    430static void
    431ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
    432{
    433	unsigned long iovp = CCIO_IOVP(iova);
    434	unsigned int res_idx = PDIR_INDEX(iovp) >> 3;
    435
    436	BUG_ON(pages_mapped == 0);
    437	BUG_ON((pages_mapped * IOVP_SIZE) > DMA_CHUNK_SIZE);
    438	BUG_ON(pages_mapped > BITS_PER_LONG);
    439
    440	DBG_RES("%s():  res_idx: %d pages_mapped %d\n", 
    441		__func__, res_idx, pages_mapped);
    442
    443#ifdef CCIO_COLLECT_STATS
    444	ioc->used_pages -= pages_mapped;
    445#endif
    446
    447	if(pages_mapped <= 8) {
    448#if 0
    449		/* see matching comments in alloc_range */
    450		unsigned long mask = ~(~0UL >> pages_mapped);
    451		CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 8);
    452#else
    453		CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffUL, 8);
    454#endif
    455	} else if(pages_mapped <= 16) {
    456		CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffffUL, 16);
    457	} else if(pages_mapped <= 32) {
    458		CCIO_FREE_MAPPINGS(ioc, res_idx, ~(unsigned int)0, 32);
    459#ifdef __LP64__
    460	} else if(pages_mapped <= 64) {
    461		CCIO_FREE_MAPPINGS(ioc, res_idx, ~0UL, 64);
    462#endif
    463	} else {
    464		panic("%s:%s() Too many pages to unmap.\n", __FILE__,
    465		      __func__);
    466	}
    467}
    468
    469/****************************************************************
    470**
    471**          CCIO dma_ops support routines
    472**
    473*****************************************************************/
    474
    475typedef unsigned long space_t;
    476#define KERNEL_SPACE 0
    477
    478/*
    479** DMA "Page Type" and Hints 
    480** o if SAFE_DMA isn't set, mapping is for FAST_DMA. SAFE_DMA should be
    481**   set for subcacheline DMA transfers since we don't want to damage the
    482**   other part of a cacheline.
    483** o SAFE_DMA must be set for "memory" allocated via pci_alloc_consistent().
    484**   This bit tells U2 to do R/M/W for partial cachelines. "Streaming"
    485**   data can avoid this if the mapping covers full cache lines.
    486** o STOP_MOST is needed for atomicity across cachelines.
    487**   Apparently only "some EISA devices" need this.
    488**   Using CONFIG_ISA is hack. Only the IOA with EISA under it needs
    489**   to use this hint iff the EISA devices needs this feature.
    490**   According to the U2 ERS, STOP_MOST enabled pages hurt performance.
    491** o PREFETCH should *not* be set for cases like Multiple PCI devices
    492**   behind GSCtoPCI (dino) bus converter. Only one cacheline per GSC
    493**   device can be fetched and multiply DMA streams will thrash the
    494**   prefetch buffer and burn memory bandwidth. See 6.7.3 "Prefetch Rules
    495**   and Invalidation of Prefetch Entries".
    496**
    497** FIXME: the default hints need to be per GSC device - not global.
    498** 
    499** HP-UX dorks: linux device driver programming model is totally different
    500**    than HP-UX's. HP-UX always sets HINT_PREFETCH since it's drivers
    501**    do special things to work on non-coherent platforms...linux has to
    502**    be much more careful with this.
    503*/
    504#define IOPDIR_VALID    0x01UL
    505#define HINT_SAFE_DMA   0x02UL	/* used for pci_alloc_consistent() pages */
    506#ifdef CONFIG_EISA
    507#define HINT_STOP_MOST  0x04UL	/* LSL support */
    508#else
    509#define HINT_STOP_MOST  0x00UL	/* only needed for "some EISA devices" */
    510#endif
    511#define HINT_UDPATE_ENB 0x08UL  /* not used/supported by U2 */
    512#define HINT_PREFETCH   0x10UL	/* for outbound pages which are not SAFE */
    513
    514
    515/*
    516** Use direction (ie PCI_DMA_TODEVICE) to pick hint.
    517** ccio_alloc_consistent() depends on this to get SAFE_DMA
    518** when it passes in BIDIRECTIONAL flag.
    519*/
    520static u32 hint_lookup[] = {
    521	[DMA_BIDIRECTIONAL]	= HINT_STOP_MOST | HINT_SAFE_DMA | IOPDIR_VALID,
    522	[DMA_TO_DEVICE]		= HINT_STOP_MOST | HINT_PREFETCH | IOPDIR_VALID,
    523	[DMA_FROM_DEVICE]	= HINT_STOP_MOST | IOPDIR_VALID,
    524};
    525
    526/**
    527 * ccio_io_pdir_entry - Initialize an I/O Pdir.
    528 * @pdir_ptr: A pointer into I/O Pdir.
    529 * @sid: The Space Identifier.
    530 * @vba: The virtual address.
    531 * @hints: The DMA Hint.
    532 *
    533 * Given a virtual address (vba, arg2) and space id, (sid, arg1),
    534 * load the I/O PDIR entry pointed to by pdir_ptr (arg0). Each IO Pdir
    535 * entry consists of 8 bytes as shown below (MSB == bit 0):
    536 *
    537 *
    538 * WORD 0:
    539 * +------+----------------+-----------------------------------------------+
    540 * | Phys | Virtual Index  |               Phys                            |
    541 * | 0:3  |     0:11       |               4:19                            |
    542 * |4 bits|   12 bits      |              16 bits                          |
    543 * +------+----------------+-----------------------------------------------+
    544 * WORD 1:
    545 * +-----------------------+-----------------------------------------------+
    546 * |      Phys    |  Rsvd  | Prefetch |Update |Rsvd  |Lock  |Safe  |Valid  |
    547 * |     20:39    |        | Enable   |Enable |      |Enable|DMA   |       |
    548 * |    20 bits   | 5 bits | 1 bit    |1 bit  |2 bits|1 bit |1 bit |1 bit  |
    549 * +-----------------------+-----------------------------------------------+
    550 *
    551 * The virtual index field is filled with the results of the LCI
    552 * (Load Coherence Index) instruction.  The 8 bits used for the virtual
    553 * index are bits 12:19 of the value returned by LCI.
    554 */ 
    555static void CCIO_INLINE
    556ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
    557		   unsigned long hints)
    558{
    559	register unsigned long pa;
    560	register unsigned long ci; /* coherent index */
    561
    562	/* We currently only support kernel addresses */
    563	BUG_ON(sid != KERNEL_SPACE);
    564
    565	/*
    566	** WORD 1 - low order word
    567	** "hints" parm includes the VALID bit!
    568	** "dep" clobbers the physical address offset bits as well.
    569	*/
    570	pa = lpa(vba);
    571	asm volatile("depw  %1,31,12,%0" : "+r" (pa) : "r" (hints));
    572	((u32 *)pdir_ptr)[1] = (u32) pa;
    573
    574	/*
    575	** WORD 0 - high order word
    576	*/
    577
    578#ifdef __LP64__
    579	/*
    580	** get bits 12:15 of physical address
    581	** shift bits 16:31 of physical address
    582	** and deposit them
    583	*/
    584	asm volatile ("extrd,u %1,15,4,%0" : "=r" (ci) : "r" (pa));
    585	asm volatile ("extrd,u %1,31,16,%0" : "+r" (pa) : "r" (pa));
    586	asm volatile ("depd  %1,35,4,%0" : "+r" (pa) : "r" (ci));
    587#else
    588	pa = 0;
    589#endif
    590	/*
    591	** get CPU coherency index bits
    592	** Grab virtual index [0:11]
    593	** Deposit virt_idx bits into I/O PDIR word
    594	*/
    595	asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
    596	asm volatile ("extru %1,19,12,%0" : "+r" (ci) : "r" (ci));
    597	asm volatile ("depw  %1,15,12,%0" : "+r" (pa) : "r" (ci));
    598
    599	((u32 *)pdir_ptr)[0] = (u32) pa;
    600
    601
    602	/* FIXME: PCX_W platforms don't need FDC/SYNC. (eg C360)
    603	**        PCX-U/U+ do. (eg C200/C240)
    604	**        PCX-T'? Don't know. (eg C110 or similar K-class)
    605	**
    606	** See PDC_MODEL/option 0/SW_CAP word for "Non-coherent IO-PDIR bit".
    607	**
    608	** "Since PCX-U employs an offset hash that is incompatible with
    609	** the real mode coherence index generation of U2, the PDIR entry
    610	** must be flushed to memory to retain coherence."
    611	*/
    612	asm_io_fdc(pdir_ptr);
    613	asm_io_sync();
    614}
    615
    616/**
    617 * ccio_clear_io_tlb - Remove stale entries from the I/O TLB.
    618 * @ioc: The I/O Controller.
    619 * @iovp: The I/O Virtual Page.
    620 * @byte_cnt: The requested number of bytes to be freed from the I/O Pdir.
    621 *
    622 * Purge invalid I/O PDIR entries from the I/O TLB.
    623 *
    624 * FIXME: Can we change the byte_cnt to pages_mapped?
    625 */
    626static CCIO_INLINE void
    627ccio_clear_io_tlb(struct ioc *ioc, dma_addr_t iovp, size_t byte_cnt)
    628{
    629	u32 chain_size = 1 << ioc->chainid_shift;
    630
    631	iovp &= IOVP_MASK;	/* clear offset bits, just want pagenum */
    632	byte_cnt += chain_size;
    633
    634	while(byte_cnt > chain_size) {
    635		WRITE_U32(CMD_TLB_PURGE | iovp, &ioc->ioc_regs->io_command);
    636		iovp += chain_size;
    637		byte_cnt -= chain_size;
    638	}
    639}
    640
    641/**
    642 * ccio_mark_invalid - Mark the I/O Pdir entries invalid.
    643 * @ioc: The I/O Controller.
    644 * @iova: The I/O Virtual Address.
    645 * @byte_cnt: The requested number of bytes to be freed from the I/O Pdir.
    646 *
    647 * Mark the I/O Pdir entries invalid and blow away the corresponding I/O
    648 * TLB entries.
    649 *
    650 * FIXME: at some threshold it might be "cheaper" to just blow
    651 *        away the entire I/O TLB instead of individual entries.
    652 *
    653 * FIXME: Uturn has 256 TLB entries. We don't need to purge every
    654 *        PDIR entry - just once for each possible TLB entry.
    655 *        (We do need to maker I/O PDIR entries invalid regardless).
    656 *
    657 * FIXME: Can we change byte_cnt to pages_mapped?
    658 */ 
    659static CCIO_INLINE void
    660ccio_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
    661{
    662	u32 iovp = (u32)CCIO_IOVP(iova);
    663	size_t saved_byte_cnt;
    664
    665	/* round up to nearest page size */
    666	saved_byte_cnt = byte_cnt = ALIGN(byte_cnt, IOVP_SIZE);
    667
    668	while(byte_cnt > 0) {
    669		/* invalidate one page at a time */
    670		unsigned int idx = PDIR_INDEX(iovp);
    671		char *pdir_ptr = (char *) &(ioc->pdir_base[idx]);
    672
    673		BUG_ON(idx >= (ioc->pdir_size / sizeof(u64)));
    674		pdir_ptr[7] = 0;	/* clear only VALID bit */ 
    675		/*
    676		** FIXME: PCX_W platforms don't need FDC/SYNC. (eg C360)
    677		**   PCX-U/U+ do. (eg C200/C240)
    678		** See PDC_MODEL/option 0/SW_CAP for "Non-coherent IO-PDIR bit".
    679		*/
    680		asm_io_fdc(pdir_ptr);
    681
    682		iovp     += IOVP_SIZE;
    683		byte_cnt -= IOVP_SIZE;
    684	}
    685
    686	asm_io_sync();
    687	ccio_clear_io_tlb(ioc, CCIO_IOVP(iova), saved_byte_cnt);
    688}
    689
    690/****************************************************************
    691**
    692**          CCIO dma_ops
    693**
    694*****************************************************************/
    695
    696/**
    697 * ccio_dma_supported - Verify the IOMMU supports the DMA address range.
    698 * @dev: The PCI device.
    699 * @mask: A bit mask describing the DMA address range of the device.
    700 */
    701static int 
    702ccio_dma_supported(struct device *dev, u64 mask)
    703{
    704	if(dev == NULL) {
    705		printk(KERN_ERR MODULE_NAME ": EISA/ISA/et al not supported\n");
    706		BUG();
    707		return 0;
    708	}
    709
    710	/* only support 32-bit or better devices (ie PCI/GSC) */
    711	return (int)(mask >= 0xffffffffUL);
    712}
    713
    714/**
    715 * ccio_map_single - Map an address range into the IOMMU.
    716 * @dev: The PCI device.
    717 * @addr: The start address of the DMA region.
    718 * @size: The length of the DMA region.
    719 * @direction: The direction of the DMA transaction (to/from device).
    720 *
    721 * This function implements the pci_map_single function.
    722 */
    723static dma_addr_t 
    724ccio_map_single(struct device *dev, void *addr, size_t size,
    725		enum dma_data_direction direction)
    726{
    727	int idx;
    728	struct ioc *ioc;
    729	unsigned long flags;
    730	dma_addr_t iovp;
    731	dma_addr_t offset;
    732	u64 *pdir_start;
    733	unsigned long hint = hint_lookup[(int)direction];
    734
    735	BUG_ON(!dev);
    736	ioc = GET_IOC(dev);
    737	if (!ioc)
    738		return DMA_MAPPING_ERROR;
    739
    740	BUG_ON(size <= 0);
    741
    742	/* save offset bits */
    743	offset = ((unsigned long) addr) & ~IOVP_MASK;
    744
    745	/* round up to nearest IOVP_SIZE */
    746	size = ALIGN(size + offset, IOVP_SIZE);
    747	spin_lock_irqsave(&ioc->res_lock, flags);
    748
    749#ifdef CCIO_COLLECT_STATS
    750	ioc->msingle_calls++;
    751	ioc->msingle_pages += size >> IOVP_SHIFT;
    752#endif
    753
    754	idx = ccio_alloc_range(ioc, dev, size);
    755	iovp = (dma_addr_t)MKIOVP(idx);
    756
    757	pdir_start = &(ioc->pdir_base[idx]);
    758
    759	DBG_RUN("%s() 0x%p -> 0x%lx size: %0x%x\n",
    760		__func__, addr, (long)iovp | offset, size);
    761
    762	/* If not cacheline aligned, force SAFE_DMA on the whole mess */
    763	if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES))
    764		hint |= HINT_SAFE_DMA;
    765
    766	while(size > 0) {
    767		ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint);
    768
    769		DBG_RUN(" pdir %p %08x%08x\n",
    770			pdir_start,
    771			(u32) (((u32 *) pdir_start)[0]),
    772			(u32) (((u32 *) pdir_start)[1]));
    773		++pdir_start;
    774		addr += IOVP_SIZE;
    775		size -= IOVP_SIZE;
    776	}
    777
    778	spin_unlock_irqrestore(&ioc->res_lock, flags);
    779
    780	/* form complete address */
    781	return CCIO_IOVA(iovp, offset);
    782}
    783
    784
    785static dma_addr_t
    786ccio_map_page(struct device *dev, struct page *page, unsigned long offset,
    787		size_t size, enum dma_data_direction direction,
    788		unsigned long attrs)
    789{
    790	return ccio_map_single(dev, page_address(page) + offset, size,
    791			direction);
    792}
    793
    794
    795/**
    796 * ccio_unmap_page - Unmap an address range from the IOMMU.
    797 * @dev: The PCI device.
    798 * @addr: The start address of the DMA region.
    799 * @size: The length of the DMA region.
    800 * @direction: The direction of the DMA transaction (to/from device).
    801 */
    802static void 
    803ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
    804		enum dma_data_direction direction, unsigned long attrs)
    805{
    806	struct ioc *ioc;
    807	unsigned long flags; 
    808	dma_addr_t offset = iova & ~IOVP_MASK;
    809	
    810	BUG_ON(!dev);
    811	ioc = GET_IOC(dev);
    812	if (!ioc) {
    813		WARN_ON(!ioc);
    814		return;
    815	}
    816
    817	DBG_RUN("%s() iovp 0x%lx/%x\n",
    818		__func__, (long)iova, size);
    819
    820	iova ^= offset;        /* clear offset bits */
    821	size += offset;
    822	size = ALIGN(size, IOVP_SIZE);
    823
    824	spin_lock_irqsave(&ioc->res_lock, flags);
    825
    826#ifdef CCIO_COLLECT_STATS
    827	ioc->usingle_calls++;
    828	ioc->usingle_pages += size >> IOVP_SHIFT;
    829#endif
    830
    831	ccio_mark_invalid(ioc, iova, size);
    832	ccio_free_range(ioc, iova, (size >> IOVP_SHIFT));
    833	spin_unlock_irqrestore(&ioc->res_lock, flags);
    834}
    835
    836/**
    837 * ccio_alloc - Allocate a consistent DMA mapping.
    838 * @dev: The PCI device.
    839 * @size: The length of the DMA region.
    840 * @dma_handle: The DMA address handed back to the device (not the cpu).
    841 *
    842 * This function implements the pci_alloc_consistent function.
    843 */
    844static void * 
    845ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag,
    846		unsigned long attrs)
    847{
    848	void *ret;
    849#if 0
    850/* GRANT Need to establish hierarchy for non-PCI devs as well
    851** and then provide matching gsc_map_xxx() functions for them as well.
    852*/
    853	if(!hwdev) {
    854		/* only support PCI */
    855		*dma_handle = 0;
    856		return 0;
    857	}
    858#endif
    859	ret = (void *) __get_free_pages(flag, get_order(size));
    860
    861	if (ret) {
    862		memset(ret, 0, size);
    863		*dma_handle = ccio_map_single(dev, ret, size, DMA_BIDIRECTIONAL);
    864	}
    865
    866	return ret;
    867}
    868
    869/**
    870 * ccio_free - Free a consistent DMA mapping.
    871 * @dev: The PCI device.
    872 * @size: The length of the DMA region.
    873 * @cpu_addr: The cpu address returned from the ccio_alloc_consistent.
    874 * @dma_handle: The device address returned from the ccio_alloc_consistent.
    875 *
    876 * This function implements the pci_free_consistent function.
    877 */
    878static void 
    879ccio_free(struct device *dev, size_t size, void *cpu_addr,
    880		dma_addr_t dma_handle, unsigned long attrs)
    881{
    882	ccio_unmap_page(dev, dma_handle, size, 0, 0);
    883	free_pages((unsigned long)cpu_addr, get_order(size));
    884}
    885
    886/*
    887** Since 0 is a valid pdir_base index value, can't use that
    888** to determine if a value is valid or not. Use a flag to indicate
    889** the SG list entry contains a valid pdir index.
    890*/
    891#define PIDE_FLAG 0x80000000UL
    892
    893#ifdef CCIO_COLLECT_STATS
    894#define IOMMU_MAP_STATS
    895#endif
    896#include "iommu-helpers.h"
    897
    898/**
    899 * ccio_map_sg - Map the scatter/gather list into the IOMMU.
    900 * @dev: The PCI device.
    901 * @sglist: The scatter/gather list to be mapped in the IOMMU.
    902 * @nents: The number of entries in the scatter/gather list.
    903 * @direction: The direction of the DMA transaction (to/from device).
    904 *
    905 * This function implements the pci_map_sg function.
    906 */
    907static int
    908ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, 
    909	    enum dma_data_direction direction, unsigned long attrs)
    910{
    911	struct ioc *ioc;
    912	int coalesced, filled = 0;
    913	unsigned long flags;
    914	unsigned long hint = hint_lookup[(int)direction];
    915	unsigned long prev_len = 0, current_len = 0;
    916	int i;
    917	
    918	BUG_ON(!dev);
    919	ioc = GET_IOC(dev);
    920	if (!ioc)
    921		return -EINVAL;
    922	
    923	DBG_RUN_SG("%s() START %d entries\n", __func__, nents);
    924
    925	/* Fast path single entry scatterlists. */
    926	if (nents == 1) {
    927		sg_dma_address(sglist) = ccio_map_single(dev,
    928				sg_virt(sglist), sglist->length,
    929				direction);
    930		sg_dma_len(sglist) = sglist->length;
    931		return 1;
    932	}
    933
    934	for(i = 0; i < nents; i++)
    935		prev_len += sglist[i].length;
    936	
    937	spin_lock_irqsave(&ioc->res_lock, flags);
    938
    939#ifdef CCIO_COLLECT_STATS
    940	ioc->msg_calls++;
    941#endif
    942
    943	/*
    944	** First coalesce the chunks and allocate I/O pdir space
    945	**
    946	** If this is one DMA stream, we can properly map using the
    947	** correct virtual address associated with each DMA page.
    948	** w/o this association, we wouldn't have coherent DMA!
    949	** Access to the virtual address is what forces a two pass algorithm.
    950	*/
    951	coalesced = iommu_coalesce_chunks(ioc, dev, sglist, nents, ccio_alloc_range);
    952
    953	/*
    954	** Program the I/O Pdir
    955	**
    956	** map the virtual addresses to the I/O Pdir
    957	** o dma_address will contain the pdir index
    958	** o dma_len will contain the number of bytes to map 
    959	** o page/offset contain the virtual address.
    960	*/
    961	filled = iommu_fill_pdir(ioc, sglist, nents, hint, ccio_io_pdir_entry);
    962
    963	spin_unlock_irqrestore(&ioc->res_lock, flags);
    964
    965	BUG_ON(coalesced != filled);
    966
    967	DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled);
    968
    969	for (i = 0; i < filled; i++)
    970		current_len += sg_dma_len(sglist + i);
    971
    972	BUG_ON(current_len != prev_len);
    973
    974	return filled;
    975}
    976
    977/**
    978 * ccio_unmap_sg - Unmap the scatter/gather list from the IOMMU.
    979 * @dev: The PCI device.
    980 * @sglist: The scatter/gather list to be unmapped from the IOMMU.
    981 * @nents: The number of entries in the scatter/gather list.
    982 * @direction: The direction of the DMA transaction (to/from device).
    983 *
    984 * This function implements the pci_unmap_sg function.
    985 */
    986static void 
    987ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, 
    988	      enum dma_data_direction direction, unsigned long attrs)
    989{
    990	struct ioc *ioc;
    991
    992	BUG_ON(!dev);
    993	ioc = GET_IOC(dev);
    994	if (!ioc) {
    995		WARN_ON(!ioc);
    996		return;
    997	}
    998
    999	DBG_RUN_SG("%s() START %d entries, %p,%x\n",
   1000		__func__, nents, sg_virt(sglist), sglist->length);
   1001
   1002#ifdef CCIO_COLLECT_STATS
   1003	ioc->usg_calls++;
   1004#endif
   1005
   1006	while (nents && sg_dma_len(sglist)) {
   1007
   1008#ifdef CCIO_COLLECT_STATS
   1009		ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
   1010#endif
   1011		ccio_unmap_page(dev, sg_dma_address(sglist),
   1012				  sg_dma_len(sglist), direction, 0);
   1013		++sglist;
   1014		nents--;
   1015	}
   1016
   1017	DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
   1018}
   1019
   1020static const struct dma_map_ops ccio_ops = {
   1021	.dma_supported =	ccio_dma_supported,
   1022	.alloc =		ccio_alloc,
   1023	.free =			ccio_free,
   1024	.map_page =		ccio_map_page,
   1025	.unmap_page =		ccio_unmap_page,
   1026	.map_sg =		ccio_map_sg,
   1027	.unmap_sg =		ccio_unmap_sg,
   1028	.get_sgtable =		dma_common_get_sgtable,
   1029	.alloc_pages =		dma_common_alloc_pages,
   1030	.free_pages =		dma_common_free_pages,
   1031};
   1032
   1033#ifdef CONFIG_PROC_FS
   1034static int ccio_proc_info(struct seq_file *m, void *p)
   1035{
   1036	struct ioc *ioc = ioc_list;
   1037
   1038	while (ioc != NULL) {
   1039		unsigned int total_pages = ioc->res_size << 3;
   1040#ifdef CCIO_COLLECT_STATS
   1041		unsigned long avg = 0, min, max;
   1042		int j;
   1043#endif
   1044
   1045		seq_printf(m, "%s\n", ioc->name);
   1046		
   1047		seq_printf(m, "Cujo 2.0 bug    : %s\n",
   1048			   (ioc->cujo20_bug ? "yes" : "no"));
   1049		
   1050		seq_printf(m, "IO PDIR size    : %d bytes (%d entries)\n",
   1051			   total_pages * 8, total_pages);
   1052
   1053#ifdef CCIO_COLLECT_STATS
   1054		seq_printf(m, "IO PDIR entries : %ld free  %ld used (%d%%)\n",
   1055			   total_pages - ioc->used_pages, ioc->used_pages,
   1056			   (int)(ioc->used_pages * 100 / total_pages));
   1057#endif
   1058
   1059		seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n",
   1060			   ioc->res_size, total_pages);
   1061
   1062#ifdef CCIO_COLLECT_STATS
   1063		min = max = ioc->avg_search[0];
   1064		for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) {
   1065			avg += ioc->avg_search[j];
   1066			if(ioc->avg_search[j] > max) 
   1067				max = ioc->avg_search[j];
   1068			if(ioc->avg_search[j] < min) 
   1069				min = ioc->avg_search[j];
   1070		}
   1071		avg /= CCIO_SEARCH_SAMPLE;
   1072		seq_printf(m, "  Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
   1073			   min, avg, max);
   1074
   1075		seq_printf(m, "pci_map_single(): %8ld calls  %8ld pages (avg %d/1000)\n",
   1076			   ioc->msingle_calls, ioc->msingle_pages,
   1077			   (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
   1078
   1079		/* KLUGE - unmap_sg calls unmap_page for each mapped page */
   1080		min = ioc->usingle_calls - ioc->usg_calls;
   1081		max = ioc->usingle_pages - ioc->usg_pages;
   1082		seq_printf(m, "pci_unmap_single: %8ld calls  %8ld pages (avg %d/1000)\n",
   1083			   min, max, (int)((max * 1000)/min));
   1084
   1085		seq_printf(m, "pci_map_sg()    : %8ld calls  %8ld pages (avg %d/1000)\n",
   1086			   ioc->msg_calls, ioc->msg_pages,
   1087			   (int)((ioc->msg_pages * 1000)/ioc->msg_calls));
   1088
   1089		seq_printf(m, "pci_unmap_sg()  : %8ld calls  %8ld pages (avg %d/1000)\n\n\n",
   1090			   ioc->usg_calls, ioc->usg_pages,
   1091			   (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
   1092#endif	/* CCIO_COLLECT_STATS */
   1093
   1094		ioc = ioc->next;
   1095	}
   1096
   1097	return 0;
   1098}
   1099
   1100static int ccio_proc_bitmap_info(struct seq_file *m, void *p)
   1101{
   1102	struct ioc *ioc = ioc_list;
   1103
   1104	while (ioc != NULL) {
   1105		seq_hex_dump(m, "   ", DUMP_PREFIX_NONE, 32, 4, ioc->res_map,
   1106			     ioc->res_size, false);
   1107		seq_putc(m, '\n');
   1108		ioc = ioc->next;
   1109		break; /* XXX - remove me */
   1110	}
   1111
   1112	return 0;
   1113}
   1114#endif /* CONFIG_PROC_FS */
   1115
   1116/**
   1117 * ccio_find_ioc - Find the ioc in the ioc_list
   1118 * @hw_path: The hardware path of the ioc.
   1119 *
   1120 * This function searches the ioc_list for an ioc that matches
   1121 * the provide hardware path.
   1122 */
   1123static struct ioc * ccio_find_ioc(int hw_path)
   1124{
   1125	int i;
   1126	struct ioc *ioc;
   1127
   1128	ioc = ioc_list;
   1129	for (i = 0; i < ioc_count; i++) {
   1130		if (ioc->hw_path == hw_path)
   1131			return ioc;
   1132
   1133		ioc = ioc->next;
   1134	}
   1135
   1136	return NULL;
   1137}
   1138
   1139/**
   1140 * ccio_get_iommu - Find the iommu which controls this device
   1141 * @dev: The parisc device.
   1142 *
   1143 * This function searches through the registered IOMMU's and returns
   1144 * the appropriate IOMMU for the device based on its hardware path.
   1145 */
   1146void * ccio_get_iommu(const struct parisc_device *dev)
   1147{
   1148	dev = find_pa_parent_type(dev, HPHW_IOA);
   1149	if (!dev)
   1150		return NULL;
   1151
   1152	return ccio_find_ioc(dev->hw_path);
   1153}
   1154
   1155#define CUJO_20_STEP       0x10000000	/* inc upper nibble */
   1156
   1157/* Cujo 2.0 has a bug which will silently corrupt data being transferred
   1158 * to/from certain pages.  To avoid this happening, we mark these pages
   1159 * as `used', and ensure that nothing will try to allocate from them.
   1160 */
   1161void __init ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp)
   1162{
   1163	unsigned int idx;
   1164	struct parisc_device *dev = parisc_parent(cujo);
   1165	struct ioc *ioc = ccio_get_iommu(dev);
   1166	u8 *res_ptr;
   1167
   1168	ioc->cujo20_bug = 1;
   1169	res_ptr = ioc->res_map;
   1170	idx = PDIR_INDEX(iovp) >> 3;
   1171
   1172	while (idx < ioc->res_size) {
   1173		res_ptr[idx] |= 0xff;
   1174		idx += PDIR_INDEX(CUJO_20_STEP) >> 3;
   1175	}
   1176}
   1177
   1178#if 0
   1179/* GRANT -  is this needed for U2 or not? */
   1180
   1181/*
   1182** Get the size of the I/O TLB for this I/O MMU.
   1183**
   1184** If spa_shift is non-zero (ie probably U2),
   1185** then calculate the I/O TLB size using spa_shift.
   1186**
   1187** Otherwise we are supposed to get the IODC entry point ENTRY TLB
   1188** and execute it. However, both U2 and Uturn firmware supplies spa_shift.
   1189** I think only Java (K/D/R-class too?) systems don't do this.
   1190*/
   1191static int
   1192ccio_get_iotlb_size(struct parisc_device *dev)
   1193{
   1194	if (dev->spa_shift == 0) {
   1195		panic("%s() : Can't determine I/O TLB size.\n", __func__);
   1196	}
   1197	return (1 << dev->spa_shift);
   1198}
   1199#else
   1200
   1201/* Uturn supports 256 TLB entries */
   1202#define CCIO_CHAINID_SHIFT	8
   1203#define CCIO_CHAINID_MASK	0xff
   1204#endif /* 0 */
   1205
   1206/* We *can't* support JAVA (T600). Venture there at your own risk. */
   1207static const struct parisc_device_id ccio_tbl[] __initconst = {
   1208	{ HPHW_IOA, HVERSION_REV_ANY_ID, U2_IOA_RUNWAY, 0xb }, /* U2 */
   1209	{ HPHW_IOA, HVERSION_REV_ANY_ID, UTURN_IOA_RUNWAY, 0xb }, /* UTurn */
   1210	{ 0, }
   1211};
   1212
   1213static int ccio_probe(struct parisc_device *dev);
   1214
   1215static struct parisc_driver ccio_driver __refdata = {
   1216	.name =		"ccio",
   1217	.id_table =	ccio_tbl,
   1218	.probe =	ccio_probe,
   1219};
   1220
   1221/**
   1222 * ccio_ioc_init - Initialize the I/O Controller
   1223 * @ioc: The I/O Controller.
   1224 *
   1225 * Initialize the I/O Controller which includes setting up the
   1226 * I/O Page Directory, the resource map, and initalizing the
   1227 * U2/Uturn chip into virtual mode.
   1228 */
   1229static void __init
   1230ccio_ioc_init(struct ioc *ioc)
   1231{
   1232	int i;
   1233	unsigned int iov_order;
   1234	u32 iova_space_size;
   1235
   1236	/*
   1237	** Determine IOVA Space size from memory size.
   1238	**
   1239	** Ideally, PCI drivers would register the maximum number
   1240	** of DMA they can have outstanding for each device they
   1241	** own.  Next best thing would be to guess how much DMA
   1242	** can be outstanding based on PCI Class/sub-class. Both
   1243	** methods still require some "extra" to support PCI
   1244	** Hot-Plug/Removal of PCI cards. (aka PCI OLARD).
   1245	*/
   1246
   1247	iova_space_size = (u32) (totalram_pages() / count_parisc_driver(&ccio_driver));
   1248
   1249	/* limit IOVA space size to 1MB-1GB */
   1250
   1251	if (iova_space_size < (1 << (20 - PAGE_SHIFT))) {
   1252		iova_space_size =  1 << (20 - PAGE_SHIFT);
   1253#ifdef __LP64__
   1254	} else if (iova_space_size > (1 << (30 - PAGE_SHIFT))) {
   1255		iova_space_size =  1 << (30 - PAGE_SHIFT);
   1256#endif
   1257	}
   1258
   1259	/*
   1260	** iova space must be log2() in size.
   1261	** thus, pdir/res_map will also be log2().
   1262	*/
   1263
   1264	/* We could use larger page sizes in order to *decrease* the number
   1265	** of mappings needed.  (ie 8k pages means 1/2 the mappings).
   1266	**
   1267	** Note: Grant Grunder says "Using 8k I/O pages isn't trivial either
   1268	**   since the pages must also be physically contiguous - typically
   1269	**   this is the case under linux."
   1270	*/
   1271
   1272	iov_order = get_order(iova_space_size << PAGE_SHIFT);
   1273
   1274	/* iova_space_size is now bytes, not pages */
   1275	iova_space_size = 1 << (iov_order + PAGE_SHIFT);
   1276
   1277	ioc->pdir_size = (iova_space_size / IOVP_SIZE) * sizeof(u64);
   1278
   1279	BUG_ON(ioc->pdir_size > 8 * 1024 * 1024);   /* max pdir size <= 8MB */
   1280
   1281	/* Verify it's a power of two */
   1282	BUG_ON((1 << get_order(ioc->pdir_size)) != (ioc->pdir_size >> PAGE_SHIFT));
   1283
   1284	DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n",
   1285			__func__, ioc->ioc_regs,
   1286			(unsigned long) totalram_pages() >> (20 - PAGE_SHIFT),
   1287			iova_space_size>>20,
   1288			iov_order + PAGE_SHIFT);
   1289
   1290	ioc->pdir_base = (u64 *)__get_free_pages(GFP_KERNEL, 
   1291						 get_order(ioc->pdir_size));
   1292	if(NULL == ioc->pdir_base) {
   1293		panic("%s() could not allocate I/O Page Table\n", __func__);
   1294	}
   1295	memset(ioc->pdir_base, 0, ioc->pdir_size);
   1296
   1297	BUG_ON((((unsigned long)ioc->pdir_base) & PAGE_MASK) != (unsigned long)ioc->pdir_base);
   1298	DBG_INIT(" base %p\n", ioc->pdir_base);
   1299
   1300	/* resource map size dictated by pdir_size */
   1301	ioc->res_size = (ioc->pdir_size / sizeof(u64)) >> 3;
   1302	DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size);
   1303	
   1304	ioc->res_map = (u8 *)__get_free_pages(GFP_KERNEL, 
   1305					      get_order(ioc->res_size));
   1306	if(NULL == ioc->res_map) {
   1307		panic("%s() could not allocate resource map\n", __func__);
   1308	}
   1309	memset(ioc->res_map, 0, ioc->res_size);
   1310
   1311	/* Initialize the res_hint to 16 */
   1312	ioc->res_hint = 16;
   1313
   1314	/* Initialize the spinlock */
   1315	spin_lock_init(&ioc->res_lock);
   1316
   1317	/*
   1318	** Chainid is the upper most bits of an IOVP used to determine
   1319	** which TLB entry an IOVP will use.
   1320	*/
   1321	ioc->chainid_shift = get_order(iova_space_size) + PAGE_SHIFT - CCIO_CHAINID_SHIFT;
   1322	DBG_INIT(" chainid_shift 0x%x\n", ioc->chainid_shift);
   1323
   1324	/*
   1325	** Initialize IOA hardware
   1326	*/
   1327	WRITE_U32(CCIO_CHAINID_MASK << ioc->chainid_shift, 
   1328		  &ioc->ioc_regs->io_chain_id_mask);
   1329
   1330	WRITE_U32(virt_to_phys(ioc->pdir_base), 
   1331		  &ioc->ioc_regs->io_pdir_base);
   1332
   1333	/*
   1334	** Go to "Virtual Mode"
   1335	*/
   1336	WRITE_U32(IOA_NORMAL_MODE, &ioc->ioc_regs->io_control);
   1337
   1338	/*
   1339	** Initialize all I/O TLB entries to 0 (Valid bit off).
   1340	*/
   1341	WRITE_U32(0, &ioc->ioc_regs->io_tlb_entry_m);
   1342	WRITE_U32(0, &ioc->ioc_regs->io_tlb_entry_l);
   1343
   1344	for(i = 1 << CCIO_CHAINID_SHIFT; i ; i--) {
   1345		WRITE_U32((CMD_TLB_DIRECT_WRITE | (i << ioc->chainid_shift)),
   1346			  &ioc->ioc_regs->io_command);
   1347	}
   1348}
   1349
   1350static void __init
   1351ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
   1352{
   1353	int result;
   1354
   1355	res->parent = NULL;
   1356	res->flags = IORESOURCE_MEM;
   1357	/*
   1358	 * bracing ((signed) ...) are required for 64bit kernel because
   1359	 * we only want to sign extend the lower 16 bits of the register.
   1360	 * The upper 16-bits of range registers are hardcoded to 0xffff.
   1361	 */
   1362	res->start = (unsigned long)((signed) READ_U32(ioaddr) << 16);
   1363	res->end = (unsigned long)((signed) (READ_U32(ioaddr + 4) << 16) - 1);
   1364	res->name = name;
   1365	/*
   1366	 * Check if this MMIO range is disable
   1367	 */
   1368	if (res->end + 1 == res->start)
   1369		return;
   1370
   1371	/* On some platforms (e.g. K-Class), we have already registered
   1372	 * resources for devices reported by firmware. Some are children
   1373	 * of ccio.
   1374	 * "insert" ccio ranges in the mmio hierarchy (/proc/iomem).
   1375	 */
   1376	result = insert_resource(&iomem_resource, res);
   1377	if (result < 0) {
   1378		printk(KERN_ERR "%s() failed to claim CCIO bus address space (%08lx,%08lx)\n", 
   1379			__func__, (unsigned long)res->start, (unsigned long)res->end);
   1380	}
   1381}
   1382
   1383static void __init ccio_init_resources(struct ioc *ioc)
   1384{
   1385	struct resource *res = ioc->mmio_region;
   1386	char *name = kmalloc(14, GFP_KERNEL);
   1387
   1388	snprintf(name, 14, "GSC Bus [%d/]", ioc->hw_path);
   1389
   1390	ccio_init_resource(res, name, &ioc->ioc_regs->io_io_low);
   1391	ccio_init_resource(res + 1, name, &ioc->ioc_regs->io_io_low_hv);
   1392}
   1393
   1394static int new_ioc_area(struct resource *res, unsigned long size,
   1395		unsigned long min, unsigned long max, unsigned long align)
   1396{
   1397	if (max <= min)
   1398		return -EBUSY;
   1399
   1400	res->start = (max - size + 1) &~ (align - 1);
   1401	res->end = res->start + size;
   1402	
   1403	/* We might be trying to expand the MMIO range to include
   1404	 * a child device that has already registered it's MMIO space.
   1405	 * Use "insert" instead of request_resource().
   1406	 */
   1407	if (!insert_resource(&iomem_resource, res))
   1408		return 0;
   1409
   1410	return new_ioc_area(res, size, min, max - size, align);
   1411}
   1412
   1413static int expand_ioc_area(struct resource *res, unsigned long size,
   1414		unsigned long min, unsigned long max, unsigned long align)
   1415{
   1416	unsigned long start, len;
   1417
   1418	if (!res->parent)
   1419		return new_ioc_area(res, size, min, max, align);
   1420
   1421	start = (res->start - size) &~ (align - 1);
   1422	len = res->end - start + 1;
   1423	if (start >= min) {
   1424		if (!adjust_resource(res, start, len))
   1425			return 0;
   1426	}
   1427
   1428	start = res->start;
   1429	len = ((size + res->end + align) &~ (align - 1)) - start;
   1430	if (start + len <= max) {
   1431		if (!adjust_resource(res, start, len))
   1432			return 0;
   1433	}
   1434
   1435	return -EBUSY;
   1436}
   1437
   1438/*
   1439 * Dino calls this function.  Beware that we may get called on systems
   1440 * which have no IOC (725, B180, C160L, etc) but do have a Dino.
   1441 * So it's legal to find no parent IOC.
   1442 *
   1443 * Some other issues: one of the resources in the ioc may be unassigned.
   1444 */
   1445int ccio_allocate_resource(const struct parisc_device *dev,
   1446		struct resource *res, unsigned long size,
   1447		unsigned long min, unsigned long max, unsigned long align)
   1448{
   1449	struct resource *parent = &iomem_resource;
   1450	struct ioc *ioc = ccio_get_iommu(dev);
   1451	if (!ioc)
   1452		goto out;
   1453
   1454	parent = ioc->mmio_region;
   1455	if (parent->parent &&
   1456	    !allocate_resource(parent, res, size, min, max, align, NULL, NULL))
   1457		return 0;
   1458
   1459	if ((parent + 1)->parent &&
   1460	    !allocate_resource(parent + 1, res, size, min, max, align,
   1461				NULL, NULL))
   1462		return 0;
   1463
   1464	if (!expand_ioc_area(parent, size, min, max, align)) {
   1465		__raw_writel(((parent->start)>>16) | 0xffff0000,
   1466			     &ioc->ioc_regs->io_io_low);
   1467		__raw_writel(((parent->end)>>16) | 0xffff0000,
   1468			     &ioc->ioc_regs->io_io_high);
   1469	} else if (!expand_ioc_area(parent + 1, size, min, max, align)) {
   1470		parent++;
   1471		__raw_writel(((parent->start)>>16) | 0xffff0000,
   1472			     &ioc->ioc_regs->io_io_low_hv);
   1473		__raw_writel(((parent->end)>>16) | 0xffff0000,
   1474			     &ioc->ioc_regs->io_io_high_hv);
   1475	} else {
   1476		return -EBUSY;
   1477	}
   1478
   1479 out:
   1480	return allocate_resource(parent, res, size, min, max, align, NULL,NULL);
   1481}
   1482
   1483int ccio_request_resource(const struct parisc_device *dev,
   1484		struct resource *res)
   1485{
   1486	struct resource *parent;
   1487	struct ioc *ioc = ccio_get_iommu(dev);
   1488
   1489	if (!ioc) {
   1490		parent = &iomem_resource;
   1491	} else if ((ioc->mmio_region->start <= res->start) &&
   1492			(res->end <= ioc->mmio_region->end)) {
   1493		parent = ioc->mmio_region;
   1494	} else if (((ioc->mmio_region + 1)->start <= res->start) &&
   1495			(res->end <= (ioc->mmio_region + 1)->end)) {
   1496		parent = ioc->mmio_region + 1;
   1497	} else {
   1498		return -EBUSY;
   1499	}
   1500
   1501	/* "transparent" bus bridges need to register MMIO resources
   1502	 * firmware assigned them. e.g. children of hppb.c (e.g. K-class)
   1503	 * registered their resources in the PDC "bus walk" (See
   1504	 * arch/parisc/kernel/inventory.c).
   1505	 */
   1506	return insert_resource(parent, res);
   1507}
   1508
   1509/**
   1510 * ccio_probe - Determine if ccio should claim this device.
   1511 * @dev: The device which has been found
   1512 *
   1513 * Determine if ccio should claim this chip (return 0) or not (return 1).
   1514 * If so, initialize the chip and tell other partners in crime they
   1515 * have work to do.
   1516 */
   1517static int __init ccio_probe(struct parisc_device *dev)
   1518{
   1519	int i;
   1520	struct ioc *ioc, **ioc_p = &ioc_list;
   1521	struct pci_hba_data *hba;
   1522
   1523	ioc = kzalloc(sizeof(struct ioc), GFP_KERNEL);
   1524	if (ioc == NULL) {
   1525		printk(KERN_ERR MODULE_NAME ": memory allocation failure\n");
   1526		return -ENOMEM;
   1527	}
   1528
   1529	ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn";
   1530
   1531	printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name,
   1532		(unsigned long)dev->hpa.start);
   1533
   1534	for (i = 0; i < ioc_count; i++) {
   1535		ioc_p = &(*ioc_p)->next;
   1536	}
   1537	*ioc_p = ioc;
   1538
   1539	ioc->hw_path = dev->hw_path;
   1540	ioc->ioc_regs = ioremap(dev->hpa.start, 4096);
   1541	if (!ioc->ioc_regs) {
   1542		kfree(ioc);
   1543		return -ENOMEM;
   1544	}
   1545	ccio_ioc_init(ioc);
   1546	ccio_init_resources(ioc);
   1547	hppa_dma_ops = &ccio_ops;
   1548
   1549	hba = kzalloc(sizeof(*hba), GFP_KERNEL);
   1550	/* if this fails, no I/O cards will work, so may as well bug */
   1551	BUG_ON(hba == NULL);
   1552
   1553	hba->iommu = ioc;
   1554	dev->dev.platform_data = hba;
   1555
   1556#ifdef CONFIG_PROC_FS
   1557	if (ioc_count == 0) {
   1558		proc_create_single(MODULE_NAME, 0, proc_runway_root,
   1559				ccio_proc_info);
   1560		proc_create_single(MODULE_NAME"-bitmap", 0, proc_runway_root,
   1561				ccio_proc_bitmap_info);
   1562	}
   1563#endif
   1564	ioc_count++;
   1565	return 0;
   1566}
   1567
   1568/**
   1569 * ccio_init - ccio initialization procedure.
   1570 *
   1571 * Register this driver.
   1572 */
   1573void __init ccio_init(void)
   1574{
   1575	register_parisc_driver(&ccio_driver);
   1576}
   1577