cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cppi_dma.c (44809B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) 2005-2006 by Texas Instruments
      4 *
      5 * This file implements a DMA  interface using TI's CPPI DMA.
      6 * For now it's DaVinci-only, but CPPI isn't specific to DaVinci or USB.
      7 * The TUSB6020, using VLYNQ, has CPPI that looks much like DaVinci.
      8 */
      9
     10#include <linux/module.h>
     11#include <linux/platform_device.h>
     12#include <linux/slab.h>
     13#include <linux/usb.h>
     14
     15#include "musb_core.h"
     16#include "musb_debug.h"
     17#include "cppi_dma.h"
     18#include "davinci.h"
     19
     20
     21/* CPPI DMA status 7-mar-2006:
     22 *
     23 * - See musb_{host,gadget}.c for more info
     24 *
     25 * - Correct RX DMA generally forces the engine into irq-per-packet mode,
     26 *   which can easily saturate the CPU under non-mass-storage loads.
     27 *
     28 * NOTES 24-aug-2006 (2.6.18-rc4):
     29 *
     30 * - peripheral RXDMA wedged in a test with packets of length 512/512/1.
     31 *   evidently after the 1 byte packet was received and acked, the queue
     32 *   of BDs got garbaged so it wouldn't empty the fifo.  (rxcsr 0x2003,
     33 *   and RX DMA0: 4 left, 80000000 8feff880, 8feff860 8feff860; 8f321401
     34 *   004001ff 00000001 .. 8feff860)  Host was just getting NAKed on tx
     35 *   of its next (512 byte) packet.  IRQ issues?
     36 *
     37 * REVISIT:  the "transfer DMA" glue between CPPI and USB fifos will
     38 * evidently also directly update the RX and TX CSRs ... so audit all
     39 * host and peripheral side DMA code to avoid CSR access after DMA has
     40 * been started.
     41 */
     42
     43/* REVISIT now we can avoid preallocating these descriptors; or
     44 * more simply, switch to a global freelist not per-channel ones.
     45 * Note: at full speed, 64 descriptors == 4K bulk data.
     46 */
     47#define NUM_TXCHAN_BD       64
     48#define NUM_RXCHAN_BD       64
     49
     50static inline void cpu_drain_writebuffer(void)
     51{
     52	wmb();
     53#ifdef	CONFIG_CPU_ARM926T
     54	/* REVISIT this "should not be needed",
     55	 * but lack of it sure seemed to hurt ...
     56	 */
     57	asm("mcr p15, 0, r0, c7, c10, 4 @ drain write buffer\n");
     58#endif
     59}
     60
     61static inline struct cppi_descriptor *cppi_bd_alloc(struct cppi_channel *c)
     62{
     63	struct cppi_descriptor	*bd = c->freelist;
     64
     65	if (bd)
     66		c->freelist = bd->next;
     67	return bd;
     68}
     69
     70static inline void
     71cppi_bd_free(struct cppi_channel *c, struct cppi_descriptor *bd)
     72{
     73	if (!bd)
     74		return;
     75	bd->next = c->freelist;
     76	c->freelist = bd;
     77}
     78
     79/*
     80 *  Start DMA controller
     81 *
     82 *  Initialize the DMA controller as necessary.
     83 */
     84
     85/* zero out entire rx state RAM entry for the channel */
     86static void cppi_reset_rx(struct cppi_rx_stateram __iomem *rx)
     87{
     88	musb_writel(&rx->rx_skipbytes, 0, 0);
     89	musb_writel(&rx->rx_head, 0, 0);
     90	musb_writel(&rx->rx_sop, 0, 0);
     91	musb_writel(&rx->rx_current, 0, 0);
     92	musb_writel(&rx->rx_buf_current, 0, 0);
     93	musb_writel(&rx->rx_len_len, 0, 0);
     94	musb_writel(&rx->rx_cnt_cnt, 0, 0);
     95}
     96
     97/* zero out entire tx state RAM entry for the channel */
     98static void cppi_reset_tx(struct cppi_tx_stateram __iomem *tx, u32 ptr)
     99{
    100	musb_writel(&tx->tx_head, 0, 0);
    101	musb_writel(&tx->tx_buf, 0, 0);
    102	musb_writel(&tx->tx_current, 0, 0);
    103	musb_writel(&tx->tx_buf_current, 0, 0);
    104	musb_writel(&tx->tx_info, 0, 0);
    105	musb_writel(&tx->tx_rem_len, 0, 0);
    106	/* musb_writel(&tx->tx_dummy, 0, 0); */
    107	musb_writel(&tx->tx_complete, 0, ptr);
    108}
    109
    110static void cppi_pool_init(struct cppi *cppi, struct cppi_channel *c)
    111{
    112	int	j;
    113
    114	/* initialize channel fields */
    115	c->head = NULL;
    116	c->tail = NULL;
    117	c->last_processed = NULL;
    118	c->channel.status = MUSB_DMA_STATUS_UNKNOWN;
    119	c->controller = cppi;
    120	c->is_rndis = 0;
    121	c->freelist = NULL;
    122
    123	/* build the BD Free list for the channel */
    124	for (j = 0; j < NUM_TXCHAN_BD + 1; j++) {
    125		struct cppi_descriptor	*bd;
    126		dma_addr_t		dma;
    127
    128		bd = dma_pool_alloc(cppi->pool, GFP_KERNEL, &dma);
    129		bd->dma = dma;
    130		cppi_bd_free(c, bd);
    131	}
    132}
    133
    134static int cppi_channel_abort(struct dma_channel *);
    135
    136static void cppi_pool_free(struct cppi_channel *c)
    137{
    138	struct cppi		*cppi = c->controller;
    139	struct cppi_descriptor	*bd;
    140
    141	(void) cppi_channel_abort(&c->channel);
    142	c->channel.status = MUSB_DMA_STATUS_UNKNOWN;
    143	c->controller = NULL;
    144
    145	/* free all its bds */
    146	bd = c->last_processed;
    147	do {
    148		if (bd)
    149			dma_pool_free(cppi->pool, bd, bd->dma);
    150		bd = cppi_bd_alloc(c);
    151	} while (bd);
    152	c->last_processed = NULL;
    153}
    154
    155static void cppi_controller_start(struct cppi *controller)
    156{
    157	void __iomem	*tibase;
    158	int		i;
    159
    160	/* do whatever is necessary to start controller */
    161	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
    162		controller->tx[i].transmit = true;
    163		controller->tx[i].index = i;
    164	}
    165	for (i = 0; i < ARRAY_SIZE(controller->rx); i++) {
    166		controller->rx[i].transmit = false;
    167		controller->rx[i].index = i;
    168	}
    169
    170	/* setup BD list on a per channel basis */
    171	for (i = 0; i < ARRAY_SIZE(controller->tx); i++)
    172		cppi_pool_init(controller, controller->tx + i);
    173	for (i = 0; i < ARRAY_SIZE(controller->rx); i++)
    174		cppi_pool_init(controller, controller->rx + i);
    175
    176	tibase =  controller->tibase;
    177	INIT_LIST_HEAD(&controller->tx_complete);
    178
    179	/* initialise tx/rx channel head pointers to zero */
    180	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
    181		struct cppi_channel	*tx_ch = controller->tx + i;
    182		struct cppi_tx_stateram __iomem *tx;
    183
    184		INIT_LIST_HEAD(&tx_ch->tx_complete);
    185
    186		tx = tibase + DAVINCI_TXCPPI_STATERAM_OFFSET(i);
    187		tx_ch->state_ram = tx;
    188		cppi_reset_tx(tx, 0);
    189	}
    190	for (i = 0; i < ARRAY_SIZE(controller->rx); i++) {
    191		struct cppi_channel	*rx_ch = controller->rx + i;
    192		struct cppi_rx_stateram __iomem *rx;
    193
    194		INIT_LIST_HEAD(&rx_ch->tx_complete);
    195
    196		rx = tibase + DAVINCI_RXCPPI_STATERAM_OFFSET(i);
    197		rx_ch->state_ram = rx;
    198		cppi_reset_rx(rx);
    199	}
    200
    201	/* enable individual cppi channels */
    202	musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG,
    203			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
    204	musb_writel(tibase, DAVINCI_RXCPPI_INTENAB_REG,
    205			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
    206
    207	/* enable tx/rx CPPI control */
    208	musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
    209	musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
    210
    211	/* disable RNDIS mode, also host rx RNDIS autorequest */
    212	musb_writel(tibase, DAVINCI_RNDIS_REG, 0);
    213	musb_writel(tibase, DAVINCI_AUTOREQ_REG, 0);
    214}
    215
    216/*
    217 *  Stop DMA controller
    218 *
    219 *  De-Init the DMA controller as necessary.
    220 */
    221
    222static void cppi_controller_stop(struct cppi *controller)
    223{
    224	void __iomem		*tibase;
    225	int			i;
    226	struct musb		*musb;
    227
    228	musb = controller->controller.musb;
    229
    230	tibase = controller->tibase;
    231	/* DISABLE INDIVIDUAL CHANNEL Interrupts */
    232	musb_writel(tibase, DAVINCI_TXCPPI_INTCLR_REG,
    233			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
    234	musb_writel(tibase, DAVINCI_RXCPPI_INTCLR_REG,
    235			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
    236
    237	musb_dbg(musb, "Tearing down RX and TX Channels");
    238	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
    239		/* FIXME restructure of txdma to use bds like rxdma */
    240		controller->tx[i].last_processed = NULL;
    241		cppi_pool_free(controller->tx + i);
    242	}
    243	for (i = 0; i < ARRAY_SIZE(controller->rx); i++)
    244		cppi_pool_free(controller->rx + i);
    245
    246	/* in Tx Case proper teardown is supported. We resort to disabling
    247	 * Tx/Rx CPPI after cleanup of Tx channels. Before TX teardown is
    248	 * complete TX CPPI cannot be disabled.
    249	 */
    250	/*disable tx/rx cppi */
    251	musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE);
    252	musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE);
    253}
    254
    255/* While dma channel is allocated, we only want the core irqs active
    256 * for fault reports, otherwise we'd get irqs that we don't care about.
    257 * Except for TX irqs, where dma done != fifo empty and reusable ...
    258 *
    259 * NOTE: docs don't say either way, but irq masking **enables** irqs.
    260 *
    261 * REVISIT same issue applies to pure PIO usage too, and non-cppi dma...
    262 */
    263static inline void core_rxirq_disable(void __iomem *tibase, unsigned epnum)
    264{
    265	musb_writel(tibase, DAVINCI_USB_INT_MASK_CLR_REG, 1 << (epnum + 8));
    266}
    267
    268static inline void core_rxirq_enable(void __iomem *tibase, unsigned epnum)
    269{
    270	musb_writel(tibase, DAVINCI_USB_INT_MASK_SET_REG, 1 << (epnum + 8));
    271}
    272
    273
    274/*
    275 * Allocate a CPPI Channel for DMA.  With CPPI, channels are bound to
    276 * each transfer direction of a non-control endpoint, so allocating
    277 * (and deallocating) is mostly a way to notice bad housekeeping on
    278 * the software side.  We assume the irqs are always active.
    279 */
    280static struct dma_channel *
    281cppi_channel_allocate(struct dma_controller *c,
    282		struct musb_hw_ep *ep, u8 transmit)
    283{
    284	struct cppi		*controller;
    285	u8			index;
    286	struct cppi_channel	*cppi_ch;
    287	void __iomem		*tibase;
    288	struct musb		*musb;
    289
    290	controller = container_of(c, struct cppi, controller);
    291	tibase = controller->tibase;
    292	musb = c->musb;
    293
    294	/* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */
    295	index = ep->epnum - 1;
    296
    297	/* return the corresponding CPPI Channel Handle, and
    298	 * probably disable the non-CPPI irq until we need it.
    299	 */
    300	if (transmit) {
    301		if (index >= ARRAY_SIZE(controller->tx)) {
    302			musb_dbg(musb, "no %cX%d CPPI channel", 'T', index);
    303			return NULL;
    304		}
    305		cppi_ch = controller->tx + index;
    306	} else {
    307		if (index >= ARRAY_SIZE(controller->rx)) {
    308			musb_dbg(musb, "no %cX%d CPPI channel", 'R', index);
    309			return NULL;
    310		}
    311		cppi_ch = controller->rx + index;
    312		core_rxirq_disable(tibase, ep->epnum);
    313	}
    314
    315	/* REVISIT make this an error later once the same driver code works
    316	 * with the other DMA engine too
    317	 */
    318	if (cppi_ch->hw_ep)
    319		musb_dbg(musb, "re-allocating DMA%d %cX channel %p",
    320				index, transmit ? 'T' : 'R', cppi_ch);
    321	cppi_ch->hw_ep = ep;
    322	cppi_ch->channel.status = MUSB_DMA_STATUS_FREE;
    323	cppi_ch->channel.max_len = 0x7fffffff;
    324
    325	musb_dbg(musb, "Allocate CPPI%d %cX", index, transmit ? 'T' : 'R');
    326	return &cppi_ch->channel;
    327}
    328
    329/* Release a CPPI Channel.  */
    330static void cppi_channel_release(struct dma_channel *channel)
    331{
    332	struct cppi_channel	*c;
    333	void __iomem		*tibase;
    334
    335	/* REVISIT:  for paranoia, check state and abort if needed... */
    336
    337	c = container_of(channel, struct cppi_channel, channel);
    338	tibase = c->controller->tibase;
    339	if (!c->hw_ep)
    340		musb_dbg(c->controller->controller.musb,
    341			"releasing idle DMA channel %p", c);
    342	else if (!c->transmit)
    343		core_rxirq_enable(tibase, c->index + 1);
    344
    345	/* for now, leave its cppi IRQ enabled (we won't trigger it) */
    346	c->hw_ep = NULL;
    347	channel->status = MUSB_DMA_STATUS_UNKNOWN;
    348}
    349
    350/* Context: controller irqlocked */
    351static void
    352cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
    353{
    354	void __iomem			*base = c->controller->mregs;
    355	struct cppi_rx_stateram __iomem	*rx = c->state_ram;
    356
    357	musb_ep_select(base, c->index + 1);
    358
    359	musb_dbg(c->controller->controller.musb,
    360		"RX DMA%d%s: %d left, csr %04x, "
    361		"%08x H%08x S%08x C%08x, "
    362		"B%08x L%08x %08x .. %08x",
    363		c->index, tag,
    364		musb_readl(c->controller->tibase,
    365			DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index),
    366		musb_readw(c->hw_ep->regs, MUSB_RXCSR),
    367
    368		musb_readl(&rx->rx_skipbytes, 0),
    369		musb_readl(&rx->rx_head, 0),
    370		musb_readl(&rx->rx_sop, 0),
    371		musb_readl(&rx->rx_current, 0),
    372
    373		musb_readl(&rx->rx_buf_current, 0),
    374		musb_readl(&rx->rx_len_len, 0),
    375		musb_readl(&rx->rx_cnt_cnt, 0),
    376		musb_readl(&rx->rx_complete, 0)
    377		);
    378}
    379
    380/* Context: controller irqlocked */
    381static void
    382cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
    383{
    384	void __iomem			*base = c->controller->mregs;
    385	struct cppi_tx_stateram __iomem	*tx = c->state_ram;
    386
    387	musb_ep_select(base, c->index + 1);
    388
    389	musb_dbg(c->controller->controller.musb,
    390		"TX DMA%d%s: csr %04x, "
    391		"H%08x S%08x C%08x %08x, "
    392		"F%08x L%08x .. %08x",
    393		c->index, tag,
    394		musb_readw(c->hw_ep->regs, MUSB_TXCSR),
    395
    396		musb_readl(&tx->tx_head, 0),
    397		musb_readl(&tx->tx_buf, 0),
    398		musb_readl(&tx->tx_current, 0),
    399		musb_readl(&tx->tx_buf_current, 0),
    400
    401		musb_readl(&tx->tx_info, 0),
    402		musb_readl(&tx->tx_rem_len, 0),
    403		/* dummy/unused word 6 */
    404		musb_readl(&tx->tx_complete, 0)
    405		);
    406}
    407
    408/* Context: controller irqlocked */
    409static inline void
    410cppi_rndis_update(struct cppi_channel *c, int is_rx,
    411		void __iomem *tibase, int is_rndis)
    412{
    413	/* we may need to change the rndis flag for this cppi channel */
    414	if (c->is_rndis != is_rndis) {
    415		u32	value = musb_readl(tibase, DAVINCI_RNDIS_REG);
    416		u32	temp = 1 << (c->index);
    417
    418		if (is_rx)
    419			temp <<= 16;
    420		if (is_rndis)
    421			value |= temp;
    422		else
    423			value &= ~temp;
    424		musb_writel(tibase, DAVINCI_RNDIS_REG, value);
    425		c->is_rndis = is_rndis;
    426	}
    427}
    428
    429static void cppi_dump_rxbd(const char *tag, struct cppi_descriptor *bd)
    430{
    431	pr_debug("RXBD/%s %08x: "
    432			"nxt %08x buf %08x off.blen %08x opt.plen %08x\n",
    433			tag, bd->dma,
    434			bd->hw_next, bd->hw_bufp, bd->hw_off_len,
    435			bd->hw_options);
    436}
    437
    438static void cppi_dump_rxq(int level, const char *tag, struct cppi_channel *rx)
    439{
    440	struct cppi_descriptor	*bd;
    441
    442	cppi_dump_rx(level, rx, tag);
    443	if (rx->last_processed)
    444		cppi_dump_rxbd("last", rx->last_processed);
    445	for (bd = rx->head; bd; bd = bd->next)
    446		cppi_dump_rxbd("active", bd);
    447}
    448
    449
    450/* NOTE:  DaVinci autoreq is ignored except for host side "RNDIS" mode RX;
    451 * so we won't ever use it (see "CPPI RX Woes" below).
    452 */
    453static inline int cppi_autoreq_update(struct cppi_channel *rx,
    454		void __iomem *tibase, int onepacket, unsigned n_bds)
    455{
    456	u32	val;
    457
    458#ifdef	RNDIS_RX_IS_USABLE
    459	u32	tmp;
    460	/* assert(is_host_active(musb)) */
    461
    462	/* start from "AutoReq never" */
    463	tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
    464	val = tmp & ~((0x3) << (rx->index * 2));
    465
    466	/* HCD arranged reqpkt for packet #1.  we arrange int
    467	 * for all but the last one, maybe in two segments.
    468	 */
    469	if (!onepacket) {
    470#if 0
    471		/* use two segments, autoreq "all" then the last "never" */
    472		val |= ((0x3) << (rx->index * 2));
    473		n_bds--;
    474#else
    475		/* one segment, autoreq "all-but-last" */
    476		val |= ((0x1) << (rx->index * 2));
    477#endif
    478	}
    479
    480	if (val != tmp) {
    481		int n = 100;
    482
    483		/* make sure that autoreq is updated before continuing */
    484		musb_writel(tibase, DAVINCI_AUTOREQ_REG, val);
    485		do {
    486			tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
    487			if (tmp == val)
    488				break;
    489			cpu_relax();
    490		} while (n-- > 0);
    491	}
    492#endif
    493
    494	/* REQPKT is turned off after each segment */
    495	if (n_bds && rx->channel.actual_len) {
    496		void __iomem	*regs = rx->hw_ep->regs;
    497
    498		val = musb_readw(regs, MUSB_RXCSR);
    499		if (!(val & MUSB_RXCSR_H_REQPKT)) {
    500			val |= MUSB_RXCSR_H_REQPKT | MUSB_RXCSR_H_WZC_BITS;
    501			musb_writew(regs, MUSB_RXCSR, val);
    502			/* flush writebuffer */
    503			val = musb_readw(regs, MUSB_RXCSR);
    504		}
    505	}
    506	return n_bds;
    507}
    508
    509
    510/* Buffer enqueuing Logic:
    511 *
    512 *  - RX builds new queues each time, to help handle routine "early
    513 *    termination" cases (faults, including errors and short reads)
    514 *    more correctly.
    515 *
    516 *  - for now, TX reuses the same queue of BDs every time
    517 *
    518 * REVISIT long term, we want a normal dynamic model.
    519 * ... the goal will be to append to the
    520 * existing queue, processing completed "dma buffers" (segments) on the fly.
    521 *
    522 * Otherwise we force an IRQ latency between requests, which slows us a lot
    523 * (especially in "transparent" dma).  Unfortunately that model seems to be
    524 * inherent in the DMA model from the Mentor code, except in the rare case
    525 * of transfers big enough (~128+ KB) that we could append "middle" segments
    526 * in the TX paths.  (RX can't do this, see below.)
    527 *
    528 * That's true even in the CPPI- friendly iso case, where most urbs have
    529 * several small segments provided in a group and where the "packet at a time"
    530 * "transparent" DMA model is always correct, even on the RX side.
    531 */
    532
    533/*
    534 * CPPI TX:
    535 * ========
    536 * TX is a lot more reasonable than RX; it doesn't need to run in
    537 * irq-per-packet mode very often.  RNDIS mode seems to behave too
    538 * (except how it handles the exactly-N-packets case).  Building a
    539 * txdma queue with multiple requests (urb or usb_request) looks
    540 * like it would work ... but fault handling would need much testing.
    541 *
    542 * The main issue with TX mode RNDIS relates to transfer lengths that
    543 * are an exact multiple of the packet length.  It appears that there's
    544 * a hiccup in that case (maybe the DMA completes before the ZLP gets
    545 * written?) boiling down to not being able to rely on CPPI writing any
    546 * terminating zero length packet before the next transfer is written.
    547 * So that's punted to PIO; better yet, gadget drivers can avoid it.
    548 *
    549 * Plus, there's allegedly an undocumented constraint that rndis transfer
    550 * length be a multiple of 64 bytes ... but the chip doesn't act that
    551 * way, and we really don't _want_ that behavior anyway.
    552 *
    553 * On TX, "transparent" mode works ... although experiments have shown
    554 * problems trying to use the SOP/EOP bits in different USB packets.
    555 *
    556 * REVISIT try to handle terminating zero length packets using CPPI
    557 * instead of doing it by PIO after an IRQ.  (Meanwhile, make Ethernet
    558 * links avoid that issue by forcing them to avoid zlps.)
    559 */
    560static void
    561cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
    562{
    563	unsigned		maxpacket = tx->maxpacket;
    564	dma_addr_t		addr = tx->buf_dma + tx->offset;
    565	size_t			length = tx->buf_len - tx->offset;
    566	struct cppi_descriptor	*bd;
    567	unsigned		n_bds;
    568	unsigned		i;
    569	struct cppi_tx_stateram	__iomem *tx_ram = tx->state_ram;
    570	int			rndis;
    571
    572	/* TX can use the CPPI "rndis" mode, where we can probably fit this
    573	 * transfer in one BD and one IRQ.  The only time we would NOT want
    574	 * to use it is when hardware constraints prevent it, or if we'd
    575	 * trigger the "send a ZLP?" confusion.
    576	 */
    577	rndis = (maxpacket & 0x3f) == 0
    578		&& length > maxpacket
    579		&& length < 0xffff
    580		&& (length % maxpacket) != 0;
    581
    582	if (rndis) {
    583		maxpacket = length;
    584		n_bds = 1;
    585	} else {
    586		if (length)
    587			n_bds = DIV_ROUND_UP(length, maxpacket);
    588		else
    589			n_bds = 1;
    590		n_bds = min(n_bds, (unsigned) NUM_TXCHAN_BD);
    591		length = min(n_bds * maxpacket, length);
    592	}
    593
    594	musb_dbg(musb, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u",
    595			tx->index,
    596			maxpacket,
    597			rndis ? "rndis" : "transparent",
    598			n_bds,
    599			(unsigned long long)addr, length);
    600
    601	cppi_rndis_update(tx, 0, musb->ctrl_base, rndis);
    602
    603	/* assuming here that channel_program is called during
    604	 * transfer initiation ... current code maintains state
    605	 * for one outstanding request only (no queues, not even
    606	 * the implicit ones of an iso urb).
    607	 */
    608
    609	bd = tx->freelist;
    610	tx->head = bd;
    611	tx->last_processed = NULL;
    612
    613	/* FIXME use BD pool like RX side does, and just queue
    614	 * the minimum number for this request.
    615	 */
    616
    617	/* Prepare queue of BDs first, then hand it to hardware.
    618	 * All BDs except maybe the last should be of full packet
    619	 * size; for RNDIS there _is_ only that last packet.
    620	 */
    621	for (i = 0; i < n_bds; ) {
    622		if (++i < n_bds && bd->next)
    623			bd->hw_next = bd->next->dma;
    624		else
    625			bd->hw_next = 0;
    626
    627		bd->hw_bufp = tx->buf_dma + tx->offset;
    628
    629		/* FIXME set EOP only on the last packet,
    630		 * SOP only on the first ... avoid IRQs
    631		 */
    632		if ((tx->offset + maxpacket) <= tx->buf_len) {
    633			tx->offset += maxpacket;
    634			bd->hw_off_len = maxpacket;
    635			bd->hw_options = CPPI_SOP_SET | CPPI_EOP_SET
    636				| CPPI_OWN_SET | maxpacket;
    637		} else {
    638			/* only this one may be a partial USB Packet */
    639			u32		partial_len;
    640
    641			partial_len = tx->buf_len - tx->offset;
    642			tx->offset = tx->buf_len;
    643			bd->hw_off_len = partial_len;
    644
    645			bd->hw_options = CPPI_SOP_SET | CPPI_EOP_SET
    646				| CPPI_OWN_SET | partial_len;
    647			if (partial_len == 0)
    648				bd->hw_options |= CPPI_ZERO_SET;
    649		}
    650
    651		musb_dbg(musb, "TXBD %p: nxt %08x buf %08x len %04x opt %08x",
    652				bd, bd->hw_next, bd->hw_bufp,
    653				bd->hw_off_len, bd->hw_options);
    654
    655		/* update the last BD enqueued to the list */
    656		tx->tail = bd;
    657		bd = bd->next;
    658	}
    659
    660	/* BDs live in DMA-coherent memory, but writes might be pending */
    661	cpu_drain_writebuffer();
    662
    663	/* Write to the HeadPtr in state RAM to trigger */
    664	musb_writel(&tx_ram->tx_head, 0, (u32)tx->freelist->dma);
    665
    666	cppi_dump_tx(5, tx, "/S");
    667}
    668
    669/*
    670 * CPPI RX Woes:
    671 * =============
    672 * Consider a 1KB bulk RX buffer in two scenarios:  (a) it's fed two 300 byte
    673 * packets back-to-back, and (b) it's fed two 512 byte packets back-to-back.
    674 * (Full speed transfers have similar scenarios.)
    675 *
    676 * The correct behavior for Linux is that (a) fills the buffer with 300 bytes,
    677 * and the next packet goes into a buffer that's queued later; while (b) fills
    678 * the buffer with 1024 bytes.  How to do that with CPPI?
    679 *
    680 * - RX queues in "rndis" mode -- one single BD -- handle (a) correctly, but
    681 *   (b) loses **BADLY** because nothing (!) happens when that second packet
    682 *   fills the buffer, much less when a third one arrives.  (Which makes this
    683 *   not a "true" RNDIS mode.  In the RNDIS protocol short-packet termination
    684 *   is optional, and it's fine if peripherals -- not hosts! -- pad messages
    685 *   out to end-of-buffer.  Standard PCI host controller DMA descriptors
    686 *   implement that mode by default ... which is no accident.)
    687 *
    688 * - RX queues in "transparent" mode -- two BDs with 512 bytes each -- have
    689 *   converse problems:  (b) is handled right, but (a) loses badly.  CPPI RX
    690 *   ignores SOP/EOP markings and processes both of those BDs; so both packets
    691 *   are loaded into the buffer (with a 212 byte gap between them), and the next
    692 *   buffer queued will NOT get its 300 bytes of data. (It seems like SOP/EOP
    693 *   are intended as outputs for RX queues, not inputs...)
    694 *
    695 * - A variant of "transparent" mode -- one BD at a time -- is the only way to
    696 *   reliably make both cases work, with software handling both cases correctly
    697 *   and at the significant penalty of needing an IRQ per packet.  (The lack of
    698 *   I/O overlap can be slightly ameliorated by enabling double buffering.)
    699 *
    700 * So how to get rid of IRQ-per-packet?  The transparent multi-BD case could
    701 * be used in special cases like mass storage, which sets URB_SHORT_NOT_OK
    702 * (or maybe its peripheral side counterpart) to flag (a) scenarios as errors
    703 * with guaranteed driver level fault recovery and scrubbing out what's left
    704 * of that garbaged datastream.
    705 *
    706 * But there seems to be no way to identify the cases where CPPI RNDIS mode
    707 * is appropriate -- which do NOT include RNDIS host drivers, but do include
    708 * the CDC Ethernet driver! -- and the documentation is incomplete/wrong.
    709 * So we can't _ever_ use RX RNDIS mode ... except by using a heuristic
    710 * that applies best on the peripheral side (and which could fail rudely).
    711 *
    712 * Leaving only "transparent" mode; we avoid multi-bd modes in almost all
    713 * cases other than mass storage class.  Otherwise we're correct but slow,
    714 * since CPPI penalizes our need for a "true RNDIS" default mode.
    715 */
    716
    717
    718/* Heuristic, intended to kick in for ethernet/rndis peripheral ONLY
    719 *
    720 * IFF
    721 *  (a)	peripheral mode ... since rndis peripherals could pad their
    722 *	writes to hosts, causing i/o failure; or we'd have to cope with
    723 *	a largely unknowable variety of host side protocol variants
    724 *  (b)	and short reads are NOT errors ... since full reads would
    725 *	cause those same i/o failures
    726 *  (c)	and read length is
    727 *	- less than 64KB (max per cppi descriptor)
    728 *	- not a multiple of 4096 (g_zero default, full reads typical)
    729 *	- N (>1) packets long, ditto (full reads not EXPECTED)
    730 * THEN
    731 *   try rx rndis mode
    732 *
    733 * Cost of heuristic failing:  RXDMA wedges at the end of transfers that
    734 * fill out the whole buffer.  Buggy host side usb network drivers could
    735 * trigger that, but "in the field" such bugs seem to be all but unknown.
    736 *
    737 * So this module parameter lets the heuristic be disabled.  When using
    738 * gadgetfs, the heuristic will probably need to be disabled.
    739 */
    740static bool cppi_rx_rndis = 1;
    741
    742module_param(cppi_rx_rndis, bool, 0);
    743MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic");
    744
    745
    746/**
    747 * cppi_next_rx_segment - dma read for the next chunk of a buffer
    748 * @musb: the controller
    749 * @rx: dma channel
    750 * @onepacket: true unless caller treats short reads as errors, and
    751 *	performs fault recovery above usbcore.
    752 * Context: controller irqlocked
    753 *
    754 * See above notes about why we can't use multi-BD RX queues except in
    755 * rare cases (mass storage class), and can never use the hardware "rndis"
    756 * mode (since it's not a "true" RNDIS mode) with complete safety..
    757 *
    758 * It's ESSENTIAL that callers specify "onepacket" mode unless they kick in
    759 * code to recover from corrupted datastreams after each short transfer.
    760 */
    761static void
    762cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
    763{
    764	unsigned		maxpacket = rx->maxpacket;
    765	dma_addr_t		addr = rx->buf_dma + rx->offset;
    766	size_t			length = rx->buf_len - rx->offset;
    767	struct cppi_descriptor	*bd, *tail;
    768	unsigned		n_bds;
    769	unsigned		i;
    770	void __iomem		*tibase = musb->ctrl_base;
    771	int			is_rndis = 0;
    772	struct cppi_rx_stateram	__iomem *rx_ram = rx->state_ram;
    773	struct cppi_descriptor	*d;
    774
    775	if (onepacket) {
    776		/* almost every USB driver, host or peripheral side */
    777		n_bds = 1;
    778
    779		/* maybe apply the heuristic above */
    780		if (cppi_rx_rndis
    781				&& is_peripheral_active(musb)
    782				&& length > maxpacket
    783				&& (length & ~0xffff) == 0
    784				&& (length & 0x0fff) != 0
    785				&& (length & (maxpacket - 1)) == 0) {
    786			maxpacket = length;
    787			is_rndis = 1;
    788		}
    789	} else {
    790		/* virtually nothing except mass storage class */
    791		if (length > 0xffff) {
    792			n_bds = 0xffff / maxpacket;
    793			length = n_bds * maxpacket;
    794		} else {
    795			n_bds = DIV_ROUND_UP(length, maxpacket);
    796		}
    797		if (n_bds == 1)
    798			onepacket = 1;
    799		else
    800			n_bds = min(n_bds, (unsigned) NUM_RXCHAN_BD);
    801	}
    802
    803	/* In host mode, autorequest logic can generate some IN tokens; it's
    804	 * tricky since we can't leave REQPKT set in RXCSR after the transfer
    805	 * finishes. So:  multipacket transfers involve two or more segments.
    806	 * And always at least two IRQs ... RNDIS mode is not an option.
    807	 */
    808	if (is_host_active(musb))
    809		n_bds = cppi_autoreq_update(rx, tibase, onepacket, n_bds);
    810
    811	cppi_rndis_update(rx, 1, musb->ctrl_base, is_rndis);
    812
    813	length = min(n_bds * maxpacket, length);
    814
    815	musb_dbg(musb, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) "
    816			"dma 0x%llx len %u %u/%u",
    817			rx->index, maxpacket,
    818			onepacket
    819				? (is_rndis ? "rndis" : "onepacket")
    820				: "multipacket",
    821			n_bds,
    822			musb_readl(tibase,
    823				DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
    824					& 0xffff,
    825			(unsigned long long)addr, length,
    826			rx->channel.actual_len, rx->buf_len);
    827
    828	/* only queue one segment at a time, since the hardware prevents
    829	 * correct queue shutdown after unexpected short packets
    830	 */
    831	bd = cppi_bd_alloc(rx);
    832	rx->head = bd;
    833
    834	/* Build BDs for all packets in this segment */
    835	for (i = 0, tail = NULL; bd && i < n_bds; i++, tail = bd) {
    836		u32	bd_len;
    837
    838		if (i) {
    839			bd = cppi_bd_alloc(rx);
    840			if (!bd)
    841				break;
    842			tail->next = bd;
    843			tail->hw_next = bd->dma;
    844		}
    845		bd->hw_next = 0;
    846
    847		/* all but the last packet will be maxpacket size */
    848		if (maxpacket < length)
    849			bd_len = maxpacket;
    850		else
    851			bd_len = length;
    852
    853		bd->hw_bufp = addr;
    854		addr += bd_len;
    855		rx->offset += bd_len;
    856
    857		bd->hw_off_len = (0 /*offset*/ << 16) + bd_len;
    858		bd->buflen = bd_len;
    859
    860		bd->hw_options = CPPI_OWN_SET | (i == 0 ? length : 0);
    861		length -= bd_len;
    862	}
    863
    864	/* we always expect at least one reusable BD! */
    865	if (!tail) {
    866		WARNING("rx dma%d -- no BDs? need %d\n", rx->index, n_bds);
    867		return;
    868	} else if (i < n_bds)
    869		WARNING("rx dma%d -- only %d of %d BDs\n", rx->index, i, n_bds);
    870
    871	tail->next = NULL;
    872	tail->hw_next = 0;
    873
    874	bd = rx->head;
    875	rx->tail = tail;
    876
    877	/* short reads and other faults should terminate this entire
    878	 * dma segment.  we want one "dma packet" per dma segment, not
    879	 * one per USB packet, terminating the whole queue at once...
    880	 * NOTE that current hardware seems to ignore SOP and EOP.
    881	 */
    882	bd->hw_options |= CPPI_SOP_SET;
    883	tail->hw_options |= CPPI_EOP_SET;
    884
    885	for (d = rx->head; d; d = d->next)
    886		cppi_dump_rxbd("S", d);
    887
    888	/* in case the preceding transfer left some state... */
    889	tail = rx->last_processed;
    890	if (tail) {
    891		tail->next = bd;
    892		tail->hw_next = bd->dma;
    893	}
    894
    895	core_rxirq_enable(tibase, rx->index + 1);
    896
    897	/* BDs live in DMA-coherent memory, but writes might be pending */
    898	cpu_drain_writebuffer();
    899
    900	/* REVISIT specs say to write this AFTER the BUFCNT register
    901	 * below ... but that loses badly.
    902	 */
    903	musb_writel(&rx_ram->rx_head, 0, bd->dma);
    904
    905	/* bufferCount must be at least 3, and zeroes on completion
    906	 * unless it underflows below zero, or stops at two, or keeps
    907	 * growing ... grr.
    908	 */
    909	i = musb_readl(tibase,
    910			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
    911			& 0xffff;
    912
    913	if (!i)
    914		musb_writel(tibase,
    915			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
    916			n_bds + 2);
    917	else if (n_bds > (i - 3))
    918		musb_writel(tibase,
    919			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
    920			n_bds - (i - 3));
    921
    922	i = musb_readl(tibase,
    923			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
    924			& 0xffff;
    925	if (i < (2 + n_bds)) {
    926		musb_dbg(musb, "bufcnt%d underrun - %d (for %d)",
    927					rx->index, i, n_bds);
    928		musb_writel(tibase,
    929			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
    930			n_bds + 2);
    931	}
    932
    933	cppi_dump_rx(4, rx, "/S");
    934}
    935
    936/**
    937 * cppi_channel_program - program channel for data transfer
    938 * @ch: the channel
    939 * @maxpacket: max packet size
    940 * @mode: For RX, 1 unless the usb protocol driver promised to treat
    941 *	all short reads as errors and kick in high level fault recovery.
    942 *	For TX, ignored because of RNDIS mode races/glitches.
    943 * @dma_addr: dma address of buffer
    944 * @len: length of buffer
    945 * Context: controller irqlocked
    946 */
    947static int cppi_channel_program(struct dma_channel *ch,
    948		u16 maxpacket, u8 mode,
    949		dma_addr_t dma_addr, u32 len)
    950{
    951	struct cppi_channel	*cppi_ch;
    952	struct cppi		*controller;
    953	struct musb		*musb;
    954
    955	cppi_ch = container_of(ch, struct cppi_channel, channel);
    956	controller = cppi_ch->controller;
    957	musb = controller->controller.musb;
    958
    959	switch (ch->status) {
    960	case MUSB_DMA_STATUS_BUS_ABORT:
    961	case MUSB_DMA_STATUS_CORE_ABORT:
    962		/* fault irq handler should have handled cleanup */
    963		WARNING("%cX DMA%d not cleaned up after abort!\n",
    964				cppi_ch->transmit ? 'T' : 'R',
    965				cppi_ch->index);
    966		/* WARN_ON(1); */
    967		break;
    968	case MUSB_DMA_STATUS_BUSY:
    969		WARNING("program active channel?  %cX DMA%d\n",
    970				cppi_ch->transmit ? 'T' : 'R',
    971				cppi_ch->index);
    972		/* WARN_ON(1); */
    973		break;
    974	case MUSB_DMA_STATUS_UNKNOWN:
    975		musb_dbg(musb, "%cX DMA%d not allocated!",
    976				cppi_ch->transmit ? 'T' : 'R',
    977				cppi_ch->index);
    978		fallthrough;
    979	case MUSB_DMA_STATUS_FREE:
    980		break;
    981	}
    982
    983	ch->status = MUSB_DMA_STATUS_BUSY;
    984
    985	/* set transfer parameters, then queue up its first segment */
    986	cppi_ch->buf_dma = dma_addr;
    987	cppi_ch->offset = 0;
    988	cppi_ch->maxpacket = maxpacket;
    989	cppi_ch->buf_len = len;
    990	cppi_ch->channel.actual_len = 0;
    991
    992	/* TX channel? or RX? */
    993	if (cppi_ch->transmit)
    994		cppi_next_tx_segment(musb, cppi_ch);
    995	else
    996		cppi_next_rx_segment(musb, cppi_ch, mode);
    997
    998	return true;
    999}
   1000
   1001static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
   1002{
   1003	struct cppi_channel		*rx = &cppi->rx[ch];
   1004	struct cppi_rx_stateram __iomem	*state = rx->state_ram;
   1005	struct cppi_descriptor		*bd;
   1006	struct cppi_descriptor		*last = rx->last_processed;
   1007	bool				completed = false;
   1008	bool				acked = false;
   1009	int				i;
   1010	dma_addr_t			safe2ack;
   1011	void __iomem			*regs = rx->hw_ep->regs;
   1012	struct musb			*musb = cppi->controller.musb;
   1013
   1014	cppi_dump_rx(6, rx, "/K");
   1015
   1016	bd = last ? last->next : rx->head;
   1017	if (!bd)
   1018		return false;
   1019
   1020	/* run through all completed BDs */
   1021	for (i = 0, safe2ack = musb_readl(&state->rx_complete, 0);
   1022			(safe2ack || completed) && bd && i < NUM_RXCHAN_BD;
   1023			i++, bd = bd->next) {
   1024		u16	len;
   1025
   1026		/* catch latest BD writes from CPPI */
   1027		rmb();
   1028		if (!completed && (bd->hw_options & CPPI_OWN_SET))
   1029			break;
   1030
   1031		musb_dbg(musb, "C/RXBD %llx: nxt %08x buf %08x "
   1032			"off.len %08x opt.len %08x (%d)",
   1033			(unsigned long long)bd->dma, bd->hw_next, bd->hw_bufp,
   1034			bd->hw_off_len, bd->hw_options,
   1035			rx->channel.actual_len);
   1036
   1037		/* actual packet received length */
   1038		if ((bd->hw_options & CPPI_SOP_SET) && !completed)
   1039			len = bd->hw_off_len & CPPI_RECV_PKTLEN_MASK;
   1040		else
   1041			len = 0;
   1042
   1043		if (bd->hw_options & CPPI_EOQ_MASK)
   1044			completed = true;
   1045
   1046		if (!completed && len < bd->buflen) {
   1047			/* NOTE:  when we get a short packet, RXCSR_H_REQPKT
   1048			 * must have been cleared, and no more DMA packets may
   1049			 * active be in the queue... TI docs didn't say, but
   1050			 * CPPI ignores those BDs even though OWN is still set.
   1051			 */
   1052			completed = true;
   1053			musb_dbg(musb, "rx short %d/%d (%d)",
   1054					len, bd->buflen,
   1055					rx->channel.actual_len);
   1056		}
   1057
   1058		/* If we got here, we expect to ack at least one BD; meanwhile
   1059		 * CPPI may completing other BDs while we scan this list...
   1060		 *
   1061		 * RACE: we can notice OWN cleared before CPPI raises the
   1062		 * matching irq by writing that BD as the completion pointer.
   1063		 * In such cases, stop scanning and wait for the irq, avoiding
   1064		 * lost acks and states where BD ownership is unclear.
   1065		 */
   1066		if (bd->dma == safe2ack) {
   1067			musb_writel(&state->rx_complete, 0, safe2ack);
   1068			safe2ack = musb_readl(&state->rx_complete, 0);
   1069			acked = true;
   1070			if (bd->dma == safe2ack)
   1071				safe2ack = 0;
   1072		}
   1073
   1074		rx->channel.actual_len += len;
   1075
   1076		cppi_bd_free(rx, last);
   1077		last = bd;
   1078
   1079		/* stop scanning on end-of-segment */
   1080		if (bd->hw_next == 0)
   1081			completed = true;
   1082	}
   1083	rx->last_processed = last;
   1084
   1085	/* dma abort, lost ack, or ... */
   1086	if (!acked && last) {
   1087		int	csr;
   1088
   1089		if (safe2ack == 0 || safe2ack == rx->last_processed->dma)
   1090			musb_writel(&state->rx_complete, 0, safe2ack);
   1091		if (safe2ack == 0) {
   1092			cppi_bd_free(rx, last);
   1093			rx->last_processed = NULL;
   1094
   1095			/* if we land here on the host side, H_REQPKT will
   1096			 * be clear and we need to restart the queue...
   1097			 */
   1098			WARN_ON(rx->head);
   1099		}
   1100		musb_ep_select(cppi->mregs, rx->index + 1);
   1101		csr = musb_readw(regs, MUSB_RXCSR);
   1102		if (csr & MUSB_RXCSR_DMAENAB) {
   1103			musb_dbg(musb, "list%d %p/%p, last %llx%s, csr %04x",
   1104				rx->index,
   1105				rx->head, rx->tail,
   1106				rx->last_processed
   1107					? (unsigned long long)
   1108						rx->last_processed->dma
   1109					: 0,
   1110				completed ? ", completed" : "",
   1111				csr);
   1112			cppi_dump_rxq(4, "/what?", rx);
   1113		}
   1114	}
   1115	if (!completed) {
   1116		int	csr;
   1117
   1118		rx->head = bd;
   1119
   1120		/* REVISIT seems like "autoreq all but EOP" doesn't...
   1121		 * setting it here "should" be racey, but seems to work
   1122		 */
   1123		csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR);
   1124		if (is_host_active(cppi->controller.musb)
   1125				&& bd
   1126				&& !(csr & MUSB_RXCSR_H_REQPKT)) {
   1127			csr |= MUSB_RXCSR_H_REQPKT;
   1128			musb_writew(regs, MUSB_RXCSR,
   1129					MUSB_RXCSR_H_WZC_BITS | csr);
   1130			csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR);
   1131		}
   1132	} else {
   1133		rx->head = NULL;
   1134		rx->tail = NULL;
   1135	}
   1136
   1137	cppi_dump_rx(6, rx, completed ? "/completed" : "/cleaned");
   1138	return completed;
   1139}
   1140
   1141irqreturn_t cppi_interrupt(int irq, void *dev_id)
   1142{
   1143	struct musb		*musb = dev_id;
   1144	struct cppi		*cppi;
   1145	void __iomem		*tibase;
   1146	struct musb_hw_ep	*hw_ep = NULL;
   1147	u32			rx, tx;
   1148	int			i, index;
   1149	unsigned long		flags;
   1150
   1151	cppi = container_of(musb->dma_controller, struct cppi, controller);
   1152	if (cppi->irq)
   1153		spin_lock_irqsave(&musb->lock, flags);
   1154
   1155	tibase = musb->ctrl_base;
   1156
   1157	tx = musb_readl(tibase, DAVINCI_TXCPPI_MASKED_REG);
   1158	rx = musb_readl(tibase, DAVINCI_RXCPPI_MASKED_REG);
   1159
   1160	if (!tx && !rx) {
   1161		if (cppi->irq)
   1162			spin_unlock_irqrestore(&musb->lock, flags);
   1163		return IRQ_NONE;
   1164	}
   1165
   1166	musb_dbg(musb, "CPPI IRQ Tx%x Rx%x", tx, rx);
   1167
   1168	/* process TX channels */
   1169	for (index = 0; tx; tx = tx >> 1, index++) {
   1170		struct cppi_channel		*tx_ch;
   1171		struct cppi_tx_stateram __iomem	*tx_ram;
   1172		bool				completed = false;
   1173		struct cppi_descriptor		*bd;
   1174
   1175		if (!(tx & 1))
   1176			continue;
   1177
   1178		tx_ch = cppi->tx + index;
   1179		tx_ram = tx_ch->state_ram;
   1180
   1181		/* FIXME  need a cppi_tx_scan() routine, which
   1182		 * can also be called from abort code
   1183		 */
   1184
   1185		cppi_dump_tx(5, tx_ch, "/E");
   1186
   1187		bd = tx_ch->head;
   1188
   1189		/*
   1190		 * If Head is null then this could mean that a abort interrupt
   1191		 * that needs to be acknowledged.
   1192		 */
   1193		if (NULL == bd) {
   1194			musb_dbg(musb, "null BD");
   1195			musb_writel(&tx_ram->tx_complete, 0, 0);
   1196			continue;
   1197		}
   1198
   1199		/* run through all completed BDs */
   1200		for (i = 0; !completed && bd && i < NUM_TXCHAN_BD;
   1201				i++, bd = bd->next) {
   1202			u16	len;
   1203
   1204			/* catch latest BD writes from CPPI */
   1205			rmb();
   1206			if (bd->hw_options & CPPI_OWN_SET)
   1207				break;
   1208
   1209			musb_dbg(musb, "C/TXBD %p n %x b %x off %x opt %x",
   1210					bd, bd->hw_next, bd->hw_bufp,
   1211					bd->hw_off_len, bd->hw_options);
   1212
   1213			len = bd->hw_off_len & CPPI_BUFFER_LEN_MASK;
   1214			tx_ch->channel.actual_len += len;
   1215
   1216			tx_ch->last_processed = bd;
   1217
   1218			/* write completion register to acknowledge
   1219			 * processing of completed BDs, and possibly
   1220			 * release the IRQ; EOQ might not be set ...
   1221			 *
   1222			 * REVISIT use the same ack strategy as rx
   1223			 *
   1224			 * REVISIT have observed bit 18 set; huh??
   1225			 */
   1226			/* if ((bd->hw_options & CPPI_EOQ_MASK)) */
   1227				musb_writel(&tx_ram->tx_complete, 0, bd->dma);
   1228
   1229			/* stop scanning on end-of-segment */
   1230			if (bd->hw_next == 0)
   1231				completed = true;
   1232		}
   1233
   1234		/* on end of segment, maybe go to next one */
   1235		if (completed) {
   1236			/* cppi_dump_tx(4, tx_ch, "/complete"); */
   1237
   1238			/* transfer more, or report completion */
   1239			if (tx_ch->offset >= tx_ch->buf_len) {
   1240				tx_ch->head = NULL;
   1241				tx_ch->tail = NULL;
   1242				tx_ch->channel.status = MUSB_DMA_STATUS_FREE;
   1243
   1244				hw_ep = tx_ch->hw_ep;
   1245
   1246				musb_dma_completion(musb, index + 1, 1);
   1247
   1248			} else {
   1249				/* Bigger transfer than we could fit in
   1250				 * that first batch of descriptors...
   1251				 */
   1252				cppi_next_tx_segment(musb, tx_ch);
   1253			}
   1254		} else
   1255			tx_ch->head = bd;
   1256	}
   1257
   1258	/* Start processing the RX block */
   1259	for (index = 0; rx; rx = rx >> 1, index++) {
   1260
   1261		if (rx & 1) {
   1262			struct cppi_channel		*rx_ch;
   1263
   1264			rx_ch = cppi->rx + index;
   1265
   1266			/* let incomplete dma segments finish */
   1267			if (!cppi_rx_scan(cppi, index))
   1268				continue;
   1269
   1270			/* start another dma segment if needed */
   1271			if (rx_ch->channel.actual_len != rx_ch->buf_len
   1272					&& rx_ch->channel.actual_len
   1273						== rx_ch->offset) {
   1274				cppi_next_rx_segment(musb, rx_ch, 1);
   1275				continue;
   1276			}
   1277
   1278			/* all segments completed! */
   1279			rx_ch->channel.status = MUSB_DMA_STATUS_FREE;
   1280
   1281			hw_ep = rx_ch->hw_ep;
   1282
   1283			core_rxirq_disable(tibase, index + 1);
   1284			musb_dma_completion(musb, index + 1, 0);
   1285		}
   1286	}
   1287
   1288	/* write to CPPI EOI register to re-enable interrupts */
   1289	musb_writel(tibase, DAVINCI_CPPI_EOI_REG, 0);
   1290
   1291	if (cppi->irq)
   1292		spin_unlock_irqrestore(&musb->lock, flags);
   1293
   1294	return IRQ_HANDLED;
   1295}
   1296EXPORT_SYMBOL_GPL(cppi_interrupt);
   1297
   1298/* Instantiate a software object representing a DMA controller. */
   1299struct dma_controller *
   1300cppi_dma_controller_create(struct musb *musb, void __iomem *mregs)
   1301{
   1302	struct cppi		*controller;
   1303	struct device		*dev = musb->controller;
   1304	struct platform_device	*pdev = to_platform_device(dev);
   1305	int			irq = platform_get_irq_byname(pdev, "dma");
   1306
   1307	controller = kzalloc(sizeof *controller, GFP_KERNEL);
   1308	if (!controller)
   1309		return NULL;
   1310
   1311	controller->mregs = mregs;
   1312	controller->tibase = mregs - DAVINCI_BASE_OFFSET;
   1313
   1314	controller->controller.musb = musb;
   1315	controller->controller.channel_alloc = cppi_channel_allocate;
   1316	controller->controller.channel_release = cppi_channel_release;
   1317	controller->controller.channel_program = cppi_channel_program;
   1318	controller->controller.channel_abort = cppi_channel_abort;
   1319
   1320	/* NOTE: allocating from on-chip SRAM would give the least
   1321	 * contention for memory access, if that ever matters here.
   1322	 */
   1323
   1324	/* setup BufferPool */
   1325	controller->pool = dma_pool_create("cppi",
   1326			controller->controller.musb->controller,
   1327			sizeof(struct cppi_descriptor),
   1328			CPPI_DESCRIPTOR_ALIGN, 0);
   1329	if (!controller->pool) {
   1330		kfree(controller);
   1331		return NULL;
   1332	}
   1333
   1334	if (irq > 0) {
   1335		if (request_irq(irq, cppi_interrupt, 0, "cppi-dma", musb)) {
   1336			dev_err(dev, "request_irq %d failed!\n", irq);
   1337			musb_dma_controller_destroy(&controller->controller);
   1338			return NULL;
   1339		}
   1340		controller->irq = irq;
   1341	}
   1342
   1343	cppi_controller_start(controller);
   1344	return &controller->controller;
   1345}
   1346EXPORT_SYMBOL_GPL(cppi_dma_controller_create);
   1347
   1348/*
   1349 *  Destroy a previously-instantiated DMA controller.
   1350 */
   1351void cppi_dma_controller_destroy(struct dma_controller *c)
   1352{
   1353	struct cppi	*cppi;
   1354
   1355	cppi = container_of(c, struct cppi, controller);
   1356
   1357	cppi_controller_stop(cppi);
   1358
   1359	if (cppi->irq)
   1360		free_irq(cppi->irq, cppi->controller.musb);
   1361
   1362	/* assert:  caller stopped the controller first */
   1363	dma_pool_destroy(cppi->pool);
   1364
   1365	kfree(cppi);
   1366}
   1367EXPORT_SYMBOL_GPL(cppi_dma_controller_destroy);
   1368
   1369/*
   1370 * Context: controller irqlocked, endpoint selected
   1371 */
   1372static int cppi_channel_abort(struct dma_channel *channel)
   1373{
   1374	struct cppi_channel	*cppi_ch;
   1375	struct cppi		*controller;
   1376	void __iomem		*mbase;
   1377	void __iomem		*tibase;
   1378	void __iomem		*regs;
   1379	u32			value;
   1380	struct cppi_descriptor	*queue;
   1381
   1382	cppi_ch = container_of(channel, struct cppi_channel, channel);
   1383
   1384	controller = cppi_ch->controller;
   1385
   1386	switch (channel->status) {
   1387	case MUSB_DMA_STATUS_BUS_ABORT:
   1388	case MUSB_DMA_STATUS_CORE_ABORT:
   1389		/* from RX or TX fault irq handler */
   1390	case MUSB_DMA_STATUS_BUSY:
   1391		/* the hardware needs shutting down */
   1392		regs = cppi_ch->hw_ep->regs;
   1393		break;
   1394	case MUSB_DMA_STATUS_UNKNOWN:
   1395	case MUSB_DMA_STATUS_FREE:
   1396		return 0;
   1397	default:
   1398		return -EINVAL;
   1399	}
   1400
   1401	if (!cppi_ch->transmit && cppi_ch->head)
   1402		cppi_dump_rxq(3, "/abort", cppi_ch);
   1403
   1404	mbase = controller->mregs;
   1405	tibase = controller->tibase;
   1406
   1407	queue = cppi_ch->head;
   1408	cppi_ch->head = NULL;
   1409	cppi_ch->tail = NULL;
   1410
   1411	/* REVISIT should rely on caller having done this,
   1412	 * and caller should rely on us not changing it.
   1413	 * peripheral code is safe ... check host too.
   1414	 */
   1415	musb_ep_select(mbase, cppi_ch->index + 1);
   1416
   1417	if (cppi_ch->transmit) {
   1418		struct cppi_tx_stateram __iomem *tx_ram;
   1419		/* REVISIT put timeouts on these controller handshakes */
   1420
   1421		cppi_dump_tx(6, cppi_ch, " (teardown)");
   1422
   1423		/* teardown DMA engine then usb core */
   1424		do {
   1425			value = musb_readl(tibase, DAVINCI_TXCPPI_TEAR_REG);
   1426		} while (!(value & CPPI_TEAR_READY));
   1427		musb_writel(tibase, DAVINCI_TXCPPI_TEAR_REG, cppi_ch->index);
   1428
   1429		tx_ram = cppi_ch->state_ram;
   1430		do {
   1431			value = musb_readl(&tx_ram->tx_complete, 0);
   1432		} while (0xFFFFFFFC != value);
   1433
   1434		/* FIXME clean up the transfer state ... here?
   1435		 * the completion routine should get called with
   1436		 * an appropriate status code.
   1437		 */
   1438
   1439		value = musb_readw(regs, MUSB_TXCSR);
   1440		value &= ~MUSB_TXCSR_DMAENAB;
   1441		value |= MUSB_TXCSR_FLUSHFIFO;
   1442		musb_writew(regs, MUSB_TXCSR, value);
   1443		musb_writew(regs, MUSB_TXCSR, value);
   1444
   1445		/*
   1446		 * 1. Write to completion Ptr value 0x1(bit 0 set)
   1447		 *    (write back mode)
   1448		 * 2. Wait for abort interrupt and then put the channel in
   1449		 *    compare mode by writing 1 to the tx_complete register.
   1450		 */
   1451		cppi_reset_tx(tx_ram, 1);
   1452		cppi_ch->head = NULL;
   1453		musb_writel(&tx_ram->tx_complete, 0, 1);
   1454		cppi_dump_tx(5, cppi_ch, " (done teardown)");
   1455
   1456		/* REVISIT tx side _should_ clean up the same way
   1457		 * as the RX side ... this does no cleanup at all!
   1458		 */
   1459
   1460	} else /* RX */ {
   1461		u16			csr;
   1462
   1463		/* NOTE: docs don't guarantee any of this works ...  we
   1464		 * expect that if the usb core stops telling the cppi core
   1465		 * to pull more data from it, then it'll be safe to flush
   1466		 * current RX DMA state iff any pending fifo transfer is done.
   1467		 */
   1468
   1469		core_rxirq_disable(tibase, cppi_ch->index + 1);
   1470
   1471		/* for host, ensure ReqPkt is never set again */
   1472		if (is_host_active(cppi_ch->controller->controller.musb)) {
   1473			value = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
   1474			value &= ~((0x3) << (cppi_ch->index * 2));
   1475			musb_writel(tibase, DAVINCI_AUTOREQ_REG, value);
   1476		}
   1477
   1478		csr = musb_readw(regs, MUSB_RXCSR);
   1479
   1480		/* for host, clear (just) ReqPkt at end of current packet(s) */
   1481		if (is_host_active(cppi_ch->controller->controller.musb)) {
   1482			csr |= MUSB_RXCSR_H_WZC_BITS;
   1483			csr &= ~MUSB_RXCSR_H_REQPKT;
   1484		} else
   1485			csr |= MUSB_RXCSR_P_WZC_BITS;
   1486
   1487		/* clear dma enable */
   1488		csr &= ~(MUSB_RXCSR_DMAENAB);
   1489		musb_writew(regs, MUSB_RXCSR, csr);
   1490		csr = musb_readw(regs, MUSB_RXCSR);
   1491
   1492		/* Quiesce: wait for current dma to finish (if not cleanup).
   1493		 * We can't use bit zero of stateram->rx_sop, since that
   1494		 * refers to an entire "DMA packet" not just emptying the
   1495		 * current fifo.  Most segments need multiple usb packets.
   1496		 */
   1497		if (channel->status == MUSB_DMA_STATUS_BUSY)
   1498			udelay(50);
   1499
   1500		/* scan the current list, reporting any data that was
   1501		 * transferred and acking any IRQ
   1502		 */
   1503		cppi_rx_scan(controller, cppi_ch->index);
   1504
   1505		/* clobber the existing state once it's idle
   1506		 *
   1507		 * NOTE:  arguably, we should also wait for all the other
   1508		 * RX channels to quiesce (how??) and then temporarily
   1509		 * disable RXCPPI_CTRL_REG ... but it seems that we can
   1510		 * rely on the controller restarting from state ram, with
   1511		 * only RXCPPI_BUFCNT state being bogus.  BUFCNT will
   1512		 * correct itself after the next DMA transfer though.
   1513		 *
   1514		 * REVISIT does using rndis mode change that?
   1515		 */
   1516		cppi_reset_rx(cppi_ch->state_ram);
   1517
   1518		/* next DMA request _should_ load cppi head ptr */
   1519
   1520		/* ... we don't "free" that list, only mutate it in place.  */
   1521		cppi_dump_rx(5, cppi_ch, " (done abort)");
   1522
   1523		/* clean up previously pending bds */
   1524		cppi_bd_free(cppi_ch, cppi_ch->last_processed);
   1525		cppi_ch->last_processed = NULL;
   1526
   1527		while (queue) {
   1528			struct cppi_descriptor	*tmp = queue->next;
   1529
   1530			cppi_bd_free(cppi_ch, queue);
   1531			queue = tmp;
   1532		}
   1533	}
   1534
   1535	channel->status = MUSB_DMA_STATUS_FREE;
   1536	cppi_ch->buf_dma = 0;
   1537	cppi_ch->offset = 0;
   1538	cppi_ch->buf_len = 0;
   1539	cppi_ch->maxpacket = 0;
   1540	return 0;
   1541}
   1542
   1543/* TBD Queries:
   1544 *
   1545 * Power Management ... probably turn off cppi during suspend, restart;
   1546 * check state ram?  Clocking is presumably shared with usb core.
   1547 */