cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

spi-bcm2835.c (42498B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Driver for Broadcom BCM2835 SPI Controllers
      4 *
      5 * Copyright (C) 2012 Chris Boot
      6 * Copyright (C) 2013 Stephen Warren
      7 * Copyright (C) 2015 Martin Sperl
      8 *
      9 * This driver is inspired by:
     10 * spi-ath79.c, Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
     11 * spi-atmel.c, Copyright (C) 2006 Atmel Corporation
     12 */
     13
     14#include <linux/clk.h>
     15#include <linux/completion.h>
     16#include <linux/debugfs.h>
     17#include <linux/delay.h>
     18#include <linux/dma-mapping.h>
     19#include <linux/dmaengine.h>
     20#include <linux/err.h>
     21#include <linux/interrupt.h>
     22#include <linux/io.h>
     23#include <linux/kernel.h>
     24#include <linux/module.h>
     25#include <linux/of.h>
     26#include <linux/of_address.h>
     27#include <linux/of_device.h>
     28#include <linux/gpio/consumer.h>
     29#include <linux/gpio/machine.h> /* FIXME: using chip internals */
     30#include <linux/gpio/driver.h> /* FIXME: using chip internals */
     31#include <linux/of_irq.h>
     32#include <linux/spi/spi.h>
     33
     34/* SPI register offsets */
     35#define BCM2835_SPI_CS			0x00
     36#define BCM2835_SPI_FIFO		0x04
     37#define BCM2835_SPI_CLK			0x08
     38#define BCM2835_SPI_DLEN		0x0c
     39#define BCM2835_SPI_LTOH		0x10
     40#define BCM2835_SPI_DC			0x14
     41
     42/* Bitfields in CS */
     43#define BCM2835_SPI_CS_LEN_LONG		0x02000000
     44#define BCM2835_SPI_CS_DMA_LEN		0x01000000
     45#define BCM2835_SPI_CS_CSPOL2		0x00800000
     46#define BCM2835_SPI_CS_CSPOL1		0x00400000
     47#define BCM2835_SPI_CS_CSPOL0		0x00200000
     48#define BCM2835_SPI_CS_RXF		0x00100000
     49#define BCM2835_SPI_CS_RXR		0x00080000
     50#define BCM2835_SPI_CS_TXD		0x00040000
     51#define BCM2835_SPI_CS_RXD		0x00020000
     52#define BCM2835_SPI_CS_DONE		0x00010000
     53#define BCM2835_SPI_CS_LEN		0x00002000
     54#define BCM2835_SPI_CS_REN		0x00001000
     55#define BCM2835_SPI_CS_ADCS		0x00000800
     56#define BCM2835_SPI_CS_INTR		0x00000400
     57#define BCM2835_SPI_CS_INTD		0x00000200
     58#define BCM2835_SPI_CS_DMAEN		0x00000100
     59#define BCM2835_SPI_CS_TA		0x00000080
     60#define BCM2835_SPI_CS_CSPOL		0x00000040
     61#define BCM2835_SPI_CS_CLEAR_RX		0x00000020
     62#define BCM2835_SPI_CS_CLEAR_TX		0x00000010
     63#define BCM2835_SPI_CS_CPOL		0x00000008
     64#define BCM2835_SPI_CS_CPHA		0x00000004
     65#define BCM2835_SPI_CS_CS_10		0x00000002
     66#define BCM2835_SPI_CS_CS_01		0x00000001
     67
     68#define BCM2835_SPI_FIFO_SIZE		64
     69#define BCM2835_SPI_FIFO_SIZE_3_4	48
     70#define BCM2835_SPI_DMA_MIN_LENGTH	96
     71#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
     72				| SPI_NO_CS | SPI_3WIRE)
     73
     74#define DRV_NAME	"spi-bcm2835"
     75
     76/* define polling limits */
     77static unsigned int polling_limit_us = 30;
     78module_param(polling_limit_us, uint, 0664);
     79MODULE_PARM_DESC(polling_limit_us,
     80		 "time in us to run a transfer in polling mode\n");
     81
     82/**
     83 * struct bcm2835_spi - BCM2835 SPI controller
     84 * @regs: base address of register map
     85 * @clk: core clock, divided to calculate serial clock
     86 * @clk_hz: core clock cached speed
     87 * @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full
     88 * @tfr: SPI transfer currently processed
     89 * @ctlr: SPI controller reverse lookup
     90 * @tx_buf: pointer whence next transmitted byte is read
     91 * @rx_buf: pointer where next received byte is written
     92 * @tx_len: remaining bytes to transmit
     93 * @rx_len: remaining bytes to receive
     94 * @tx_prologue: bytes transmitted without DMA if first TX sglist entry's
     95 *	length is not a multiple of 4 (to overcome hardware limitation)
     96 * @rx_prologue: bytes received without DMA if first RX sglist entry's
     97 *	length is not a multiple of 4 (to overcome hardware limitation)
     98 * @tx_spillover: whether @tx_prologue spills over to second TX sglist entry
     99 * @debugfs_dir: the debugfs directory - neede to remove debugfs when
    100 *      unloading the module
    101 * @count_transfer_polling: count of how often polling mode is used
    102 * @count_transfer_irq: count of how often interrupt mode is used
    103 * @count_transfer_irq_after_polling: count of how often we fall back to
    104 *      interrupt mode after starting in polling mode.
    105 *      These are counted as well in @count_transfer_polling and
    106 *      @count_transfer_irq
    107 * @count_transfer_dma: count how often dma mode is used
    108 * @slv: SPI slave currently selected
    109 *	(used by bcm2835_spi_dma_tx_done() to write @clear_rx_cs)
    110 * @tx_dma_active: whether a TX DMA descriptor is in progress
    111 * @rx_dma_active: whether a RX DMA descriptor is in progress
    112 *	(used by bcm2835_spi_dma_tx_done() to handle a race)
    113 * @fill_tx_desc: preallocated TX DMA descriptor used for RX-only transfers
    114 *	(cyclically copies from zero page to TX FIFO)
    115 * @fill_tx_addr: bus address of zero page
    116 */
    117struct bcm2835_spi {
    118	void __iomem *regs;
    119	struct clk *clk;
    120	unsigned long clk_hz;
    121	int irq;
    122	struct spi_transfer *tfr;
    123	struct spi_controller *ctlr;
    124	const u8 *tx_buf;
    125	u8 *rx_buf;
    126	int tx_len;
    127	int rx_len;
    128	int tx_prologue;
    129	int rx_prologue;
    130	unsigned int tx_spillover;
    131
    132	struct dentry *debugfs_dir;
    133	u64 count_transfer_polling;
    134	u64 count_transfer_irq;
    135	u64 count_transfer_irq_after_polling;
    136	u64 count_transfer_dma;
    137
    138	struct bcm2835_spidev *slv;
    139	unsigned int tx_dma_active;
    140	unsigned int rx_dma_active;
    141	struct dma_async_tx_descriptor *fill_tx_desc;
    142	dma_addr_t fill_tx_addr;
    143};
    144
    145/**
    146 * struct bcm2835_spidev - BCM2835 SPI slave
    147 * @prepare_cs: precalculated CS register value for ->prepare_message()
    148 *	(uses slave-specific clock polarity and phase settings)
    149 * @clear_rx_desc: preallocated RX DMA descriptor used for TX-only transfers
    150 *	(cyclically clears RX FIFO by writing @clear_rx_cs to CS register)
    151 * @clear_rx_addr: bus address of @clear_rx_cs
    152 * @clear_rx_cs: precalculated CS register value to clear RX FIFO
    153 *	(uses slave-specific clock polarity and phase settings)
    154 */
    155struct bcm2835_spidev {
    156	u32 prepare_cs;
    157	struct dma_async_tx_descriptor *clear_rx_desc;
    158	dma_addr_t clear_rx_addr;
    159	u32 clear_rx_cs ____cacheline_aligned;
    160};
    161
    162#if defined(CONFIG_DEBUG_FS)
    163static void bcm2835_debugfs_create(struct bcm2835_spi *bs,
    164				   const char *dname)
    165{
    166	char name[64];
    167	struct dentry *dir;
    168
    169	/* get full name */
    170	snprintf(name, sizeof(name), "spi-bcm2835-%s", dname);
    171
    172	/* the base directory */
    173	dir = debugfs_create_dir(name, NULL);
    174	bs->debugfs_dir = dir;
    175
    176	/* the counters */
    177	debugfs_create_u64("count_transfer_polling", 0444, dir,
    178			   &bs->count_transfer_polling);
    179	debugfs_create_u64("count_transfer_irq", 0444, dir,
    180			   &bs->count_transfer_irq);
    181	debugfs_create_u64("count_transfer_irq_after_polling", 0444, dir,
    182			   &bs->count_transfer_irq_after_polling);
    183	debugfs_create_u64("count_transfer_dma", 0444, dir,
    184			   &bs->count_transfer_dma);
    185}
    186
    187static void bcm2835_debugfs_remove(struct bcm2835_spi *bs)
    188{
    189	debugfs_remove_recursive(bs->debugfs_dir);
    190	bs->debugfs_dir = NULL;
    191}
    192#else
    193static void bcm2835_debugfs_create(struct bcm2835_spi *bs,
    194				   const char *dname)
    195{
    196}
    197
    198static void bcm2835_debugfs_remove(struct bcm2835_spi *bs)
    199{
    200}
    201#endif /* CONFIG_DEBUG_FS */
    202
    203static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned int reg)
    204{
    205	return readl(bs->regs + reg);
    206}
    207
    208static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned int reg, u32 val)
    209{
    210	writel(val, bs->regs + reg);
    211}
    212
    213static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs)
    214{
    215	u8 byte;
    216
    217	while ((bs->rx_len) &&
    218	       (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_RXD)) {
    219		byte = bcm2835_rd(bs, BCM2835_SPI_FIFO);
    220		if (bs->rx_buf)
    221			*bs->rx_buf++ = byte;
    222		bs->rx_len--;
    223	}
    224}
    225
    226static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs)
    227{
    228	u8 byte;
    229
    230	while ((bs->tx_len) &&
    231	       (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_TXD)) {
    232		byte = bs->tx_buf ? *bs->tx_buf++ : 0;
    233		bcm2835_wr(bs, BCM2835_SPI_FIFO, byte);
    234		bs->tx_len--;
    235	}
    236}
    237
    238/**
    239 * bcm2835_rd_fifo_count() - blindly read exactly @count bytes from RX FIFO
    240 * @bs: BCM2835 SPI controller
    241 * @count: bytes to read from RX FIFO
    242 *
    243 * The caller must ensure that @bs->rx_len is greater than or equal to @count,
    244 * that the RX FIFO contains at least @count bytes and that the DMA Enable flag
    245 * in the CS register is set (such that a read from the FIFO register receives
    246 * 32-bit instead of just 8-bit).  Moreover @bs->rx_buf must not be %NULL.
    247 */
    248static inline void bcm2835_rd_fifo_count(struct bcm2835_spi *bs, int count)
    249{
    250	u32 val;
    251	int len;
    252
    253	bs->rx_len -= count;
    254
    255	do {
    256		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
    257		len = min(count, 4);
    258		memcpy(bs->rx_buf, &val, len);
    259		bs->rx_buf += len;
    260		count -= 4;
    261	} while (count > 0);
    262}
    263
    264/**
    265 * bcm2835_wr_fifo_count() - blindly write exactly @count bytes to TX FIFO
    266 * @bs: BCM2835 SPI controller
    267 * @count: bytes to write to TX FIFO
    268 *
    269 * The caller must ensure that @bs->tx_len is greater than or equal to @count,
    270 * that the TX FIFO can accommodate @count bytes and that the DMA Enable flag
    271 * in the CS register is set (such that a write to the FIFO register transmits
    272 * 32-bit instead of just 8-bit).
    273 */
    274static inline void bcm2835_wr_fifo_count(struct bcm2835_spi *bs, int count)
    275{
    276	u32 val;
    277	int len;
    278
    279	bs->tx_len -= count;
    280
    281	do {
    282		if (bs->tx_buf) {
    283			len = min(count, 4);
    284			memcpy(&val, bs->tx_buf, len);
    285			bs->tx_buf += len;
    286		} else {
    287			val = 0;
    288		}
    289		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
    290		count -= 4;
    291	} while (count > 0);
    292}
    293
    294/**
    295 * bcm2835_wait_tx_fifo_empty() - busy-wait for TX FIFO to empty
    296 * @bs: BCM2835 SPI controller
    297 *
    298 * The caller must ensure that the RX FIFO can accommodate as many bytes
    299 * as have been written to the TX FIFO:  Transmission is halted once the
    300 * RX FIFO is full, causing this function to spin forever.
    301 */
    302static inline void bcm2835_wait_tx_fifo_empty(struct bcm2835_spi *bs)
    303{
    304	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
    305		cpu_relax();
    306}
    307
    308/**
    309 * bcm2835_rd_fifo_blind() - blindly read up to @count bytes from RX FIFO
    310 * @bs: BCM2835 SPI controller
    311 * @count: bytes available for reading in RX FIFO
    312 */
    313static inline void bcm2835_rd_fifo_blind(struct bcm2835_spi *bs, int count)
    314{
    315	u8 val;
    316
    317	count = min(count, bs->rx_len);
    318	bs->rx_len -= count;
    319
    320	do {
    321		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
    322		if (bs->rx_buf)
    323			*bs->rx_buf++ = val;
    324	} while (--count);
    325}
    326
    327/**
    328 * bcm2835_wr_fifo_blind() - blindly write up to @count bytes to TX FIFO
    329 * @bs: BCM2835 SPI controller
    330 * @count: bytes available for writing in TX FIFO
    331 */
    332static inline void bcm2835_wr_fifo_blind(struct bcm2835_spi *bs, int count)
    333{
    334	u8 val;
    335
    336	count = min(count, bs->tx_len);
    337	bs->tx_len -= count;
    338
    339	do {
    340		val = bs->tx_buf ? *bs->tx_buf++ : 0;
    341		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
    342	} while (--count);
    343}
    344
    345static void bcm2835_spi_reset_hw(struct bcm2835_spi *bs)
    346{
    347	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
    348
    349	/* Disable SPI interrupts and transfer */
    350	cs &= ~(BCM2835_SPI_CS_INTR |
    351		BCM2835_SPI_CS_INTD |
    352		BCM2835_SPI_CS_DMAEN |
    353		BCM2835_SPI_CS_TA);
    354	/*
    355	 * Transmission sometimes breaks unless the DONE bit is written at the
    356	 * end of every transfer.  The spec says it's a RO bit.  Either the
    357	 * spec is wrong and the bit is actually of type RW1C, or it's a
    358	 * hardware erratum.
    359	 */
    360	cs |= BCM2835_SPI_CS_DONE;
    361	/* and reset RX/TX FIFOS */
    362	cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
    363
    364	/* and reset the SPI_HW */
    365	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
    366	/* as well as DLEN */
    367	bcm2835_wr(bs, BCM2835_SPI_DLEN, 0);
    368}
    369
    370static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
    371{
    372	struct bcm2835_spi *bs = dev_id;
    373	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
    374
    375	/*
    376	 * An interrupt is signaled either if DONE is set (TX FIFO empty)
    377	 * or if RXR is set (RX FIFO >= ¾ full).
    378	 */
    379	if (cs & BCM2835_SPI_CS_RXF)
    380		bcm2835_rd_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
    381	else if (cs & BCM2835_SPI_CS_RXR)
    382		bcm2835_rd_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE_3_4);
    383
    384	if (bs->tx_len && cs & BCM2835_SPI_CS_DONE)
    385		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
    386
    387	/* Read as many bytes as possible from FIFO */
    388	bcm2835_rd_fifo(bs);
    389	/* Write as many bytes as possible to FIFO */
    390	bcm2835_wr_fifo(bs);
    391
    392	if (!bs->rx_len) {
    393		/* Transfer complete - reset SPI HW */
    394		bcm2835_spi_reset_hw(bs);
    395		/* wake up the framework */
    396		spi_finalize_current_transfer(bs->ctlr);
    397	}
    398
    399	return IRQ_HANDLED;
    400}
    401
    402static int bcm2835_spi_transfer_one_irq(struct spi_controller *ctlr,
    403					struct spi_device *spi,
    404					struct spi_transfer *tfr,
    405					u32 cs, bool fifo_empty)
    406{
    407	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
    408
    409	/* update usage statistics */
    410	bs->count_transfer_irq++;
    411
    412	/*
    413	 * Enable HW block, but with interrupts still disabled.
    414	 * Otherwise the empty TX FIFO would immediately trigger an interrupt.
    415	 */
    416	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
    417
    418	/* fill TX FIFO as much as possible */
    419	if (fifo_empty)
    420		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
    421	bcm2835_wr_fifo(bs);
    422
    423	/* enable interrupts */
    424	cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
    425	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
    426
    427	/* signal that we need to wait for completion */
    428	return 1;
    429}
    430
    431/**
    432 * bcm2835_spi_transfer_prologue() - transfer first few bytes without DMA
    433 * @ctlr: SPI master controller
    434 * @tfr: SPI transfer
    435 * @bs: BCM2835 SPI controller
    436 * @cs: CS register
    437 *
    438 * A limitation in DMA mode is that the FIFO must be accessed in 4 byte chunks.
    439 * Only the final write access is permitted to transmit less than 4 bytes, the
    440 * SPI controller deduces its intended size from the DLEN register.
    441 *
    442 * If a TX or RX sglist contains multiple entries, one per page, and the first
    443 * entry starts in the middle of a page, that first entry's length may not be
    444 * a multiple of 4.  Subsequent entries are fine because they span an entire
    445 * page, hence do have a length that's a multiple of 4.
    446 *
    447 * This cannot happen with kmalloc'ed buffers (which is what most clients use)
    448 * because they are contiguous in physical memory and therefore not split on
    449 * page boundaries by spi_map_buf().  But it *can* happen with vmalloc'ed
    450 * buffers.
    451 *
    452 * The DMA engine is incapable of combining sglist entries into a continuous
    453 * stream of 4 byte chunks, it treats every entry separately:  A TX entry is
    454 * rounded up a to a multiple of 4 bytes by transmitting surplus bytes, an RX
    455 * entry is rounded up by throwing away received bytes.
    456 *
    457 * Overcome this limitation by transferring the first few bytes without DMA:
    458 * E.g. if the first TX sglist entry's length is 23 and the first RX's is 42,
    459 * write 3 bytes to the TX FIFO but read only 2 bytes from the RX FIFO.
    460 * The residue of 1 byte in the RX FIFO is picked up by DMA.  Together with
    461 * the rest of the first RX sglist entry it makes up a multiple of 4 bytes.
    462 *
    463 * Should the RX prologue be larger, say, 3 vis-à-vis a TX prologue of 1,
    464 * write 1 + 4 = 5 bytes to the TX FIFO and read 3 bytes from the RX FIFO.
    465 * Caution, the additional 4 bytes spill over to the second TX sglist entry
    466 * if the length of the first is *exactly* 1.
    467 *
    468 * At most 6 bytes are written and at most 3 bytes read.  Do we know the
    469 * transfer has this many bytes?  Yes, see BCM2835_SPI_DMA_MIN_LENGTH.
    470 *
    471 * The FIFO is normally accessed with 8-bit width by the CPU and 32-bit width
    472 * by the DMA engine.  Toggling the DMA Enable flag in the CS register switches
    473 * the width but also garbles the FIFO's contents.  The prologue must therefore
    474 * be transmitted in 32-bit width to ensure that the following DMA transfer can
    475 * pick up the residue in the RX FIFO in ungarbled form.
    476 */
    477static void bcm2835_spi_transfer_prologue(struct spi_controller *ctlr,
    478					  struct spi_transfer *tfr,
    479					  struct bcm2835_spi *bs,
    480					  u32 cs)
    481{
    482	int tx_remaining;
    483
    484	bs->tfr		 = tfr;
    485	bs->tx_prologue  = 0;
    486	bs->rx_prologue  = 0;
    487	bs->tx_spillover = false;
    488
    489	if (bs->tx_buf && !sg_is_last(&tfr->tx_sg.sgl[0]))
    490		bs->tx_prologue = sg_dma_len(&tfr->tx_sg.sgl[0]) & 3;
    491
    492	if (bs->rx_buf && !sg_is_last(&tfr->rx_sg.sgl[0])) {
    493		bs->rx_prologue = sg_dma_len(&tfr->rx_sg.sgl[0]) & 3;
    494
    495		if (bs->rx_prologue > bs->tx_prologue) {
    496			if (!bs->tx_buf || sg_is_last(&tfr->tx_sg.sgl[0])) {
    497				bs->tx_prologue  = bs->rx_prologue;
    498			} else {
    499				bs->tx_prologue += 4;
    500				bs->tx_spillover =
    501					!(sg_dma_len(&tfr->tx_sg.sgl[0]) & ~3);
    502			}
    503		}
    504	}
    505
    506	/* rx_prologue > 0 implies tx_prologue > 0, so check only the latter */
    507	if (!bs->tx_prologue)
    508		return;
    509
    510	/* Write and read RX prologue.  Adjust first entry in RX sglist. */
    511	if (bs->rx_prologue) {
    512		bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->rx_prologue);
    513		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
    514						  | BCM2835_SPI_CS_DMAEN);
    515		bcm2835_wr_fifo_count(bs, bs->rx_prologue);
    516		bcm2835_wait_tx_fifo_empty(bs);
    517		bcm2835_rd_fifo_count(bs, bs->rx_prologue);
    518		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_CLEAR_RX
    519						  | BCM2835_SPI_CS_CLEAR_TX
    520						  | BCM2835_SPI_CS_DONE);
    521
    522		dma_sync_single_for_device(ctlr->dma_rx->device->dev,
    523					   sg_dma_address(&tfr->rx_sg.sgl[0]),
    524					   bs->rx_prologue, DMA_FROM_DEVICE);
    525
    526		sg_dma_address(&tfr->rx_sg.sgl[0]) += bs->rx_prologue;
    527		sg_dma_len(&tfr->rx_sg.sgl[0])     -= bs->rx_prologue;
    528	}
    529
    530	if (!bs->tx_buf)
    531		return;
    532
    533	/*
    534	 * Write remaining TX prologue.  Adjust first entry in TX sglist.
    535	 * Also adjust second entry if prologue spills over to it.
    536	 */
    537	tx_remaining = bs->tx_prologue - bs->rx_prologue;
    538	if (tx_remaining) {
    539		bcm2835_wr(bs, BCM2835_SPI_DLEN, tx_remaining);
    540		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
    541						  | BCM2835_SPI_CS_DMAEN);
    542		bcm2835_wr_fifo_count(bs, tx_remaining);
    543		bcm2835_wait_tx_fifo_empty(bs);
    544		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_CLEAR_TX
    545						  | BCM2835_SPI_CS_DONE);
    546	}
    547
    548	if (likely(!bs->tx_spillover)) {
    549		sg_dma_address(&tfr->tx_sg.sgl[0]) += bs->tx_prologue;
    550		sg_dma_len(&tfr->tx_sg.sgl[0])     -= bs->tx_prologue;
    551	} else {
    552		sg_dma_len(&tfr->tx_sg.sgl[0])      = 0;
    553		sg_dma_address(&tfr->tx_sg.sgl[1]) += 4;
    554		sg_dma_len(&tfr->tx_sg.sgl[1])     -= 4;
    555	}
    556}
    557
    558/**
    559 * bcm2835_spi_undo_prologue() - reconstruct original sglist state
    560 * @bs: BCM2835 SPI controller
    561 *
    562 * Undo changes which were made to an SPI transfer's sglist when transmitting
    563 * the prologue.  This is necessary to ensure the same memory ranges are
    564 * unmapped that were originally mapped.
    565 */
    566static void bcm2835_spi_undo_prologue(struct bcm2835_spi *bs)
    567{
    568	struct spi_transfer *tfr = bs->tfr;
    569
    570	if (!bs->tx_prologue)
    571		return;
    572
    573	if (bs->rx_prologue) {
    574		sg_dma_address(&tfr->rx_sg.sgl[0]) -= bs->rx_prologue;
    575		sg_dma_len(&tfr->rx_sg.sgl[0])     += bs->rx_prologue;
    576	}
    577
    578	if (!bs->tx_buf)
    579		goto out;
    580
    581	if (likely(!bs->tx_spillover)) {
    582		sg_dma_address(&tfr->tx_sg.sgl[0]) -= bs->tx_prologue;
    583		sg_dma_len(&tfr->tx_sg.sgl[0])     += bs->tx_prologue;
    584	} else {
    585		sg_dma_len(&tfr->tx_sg.sgl[0])      = bs->tx_prologue - 4;
    586		sg_dma_address(&tfr->tx_sg.sgl[1]) -= 4;
    587		sg_dma_len(&tfr->tx_sg.sgl[1])     += 4;
    588	}
    589out:
    590	bs->tx_prologue = 0;
    591}
    592
    593/**
    594 * bcm2835_spi_dma_rx_done() - callback for DMA RX channel
    595 * @data: SPI master controller
    596 *
    597 * Used for bidirectional and RX-only transfers.
    598 */
    599static void bcm2835_spi_dma_rx_done(void *data)
    600{
    601	struct spi_controller *ctlr = data;
    602	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
    603
    604	/* terminate tx-dma as we do not have an irq for it
    605	 * because when the rx dma will terminate and this callback
    606	 * is called the tx-dma must have finished - can't get to this
    607	 * situation otherwise...
    608	 */
    609	dmaengine_terminate_async(ctlr->dma_tx);
    610	bs->tx_dma_active = false;
    611	bs->rx_dma_active = false;
    612	bcm2835_spi_undo_prologue(bs);
    613
    614	/* reset fifo and HW */
    615	bcm2835_spi_reset_hw(bs);
    616
    617	/* and mark as completed */;
    618	spi_finalize_current_transfer(ctlr);
    619}
    620
    621/**
    622 * bcm2835_spi_dma_tx_done() - callback for DMA TX channel
    623 * @data: SPI master controller
    624 *
    625 * Used for TX-only transfers.
    626 */
    627static void bcm2835_spi_dma_tx_done(void *data)
    628{
    629	struct spi_controller *ctlr = data;
    630	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
    631
    632	/* busy-wait for TX FIFO to empty */
    633	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
    634		bcm2835_wr(bs, BCM2835_SPI_CS, bs->slv->clear_rx_cs);
    635
    636	bs->tx_dma_active = false;
    637	smp_wmb();
    638
    639	/*
    640	 * In case of a very short transfer, RX DMA may not have been
    641	 * issued yet.  The onus is then on bcm2835_spi_transfer_one_dma()
    642	 * to terminate it immediately after issuing.
    643	 */
    644	if (cmpxchg(&bs->rx_dma_active, true, false))
    645		dmaengine_terminate_async(ctlr->dma_rx);
    646
    647	bcm2835_spi_undo_prologue(bs);
    648	bcm2835_spi_reset_hw(bs);
    649	spi_finalize_current_transfer(ctlr);
    650}
    651
    652/**
    653 * bcm2835_spi_prepare_sg() - prepare and submit DMA descriptor for sglist
    654 * @ctlr: SPI master controller
    655 * @tfr: SPI transfer
    656 * @bs: BCM2835 SPI controller
    657 * @slv: BCM2835 SPI slave
    658 * @is_tx: whether to submit DMA descriptor for TX or RX sglist
    659 *
    660 * Prepare and submit a DMA descriptor for the TX or RX sglist of @tfr.
    661 * Return 0 on success or a negative error number.
    662 */
    663static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
    664				  struct spi_transfer *tfr,
    665				  struct bcm2835_spi *bs,
    666				  struct bcm2835_spidev *slv,
    667				  bool is_tx)
    668{
    669	struct dma_chan *chan;
    670	struct scatterlist *sgl;
    671	unsigned int nents;
    672	enum dma_transfer_direction dir;
    673	unsigned long flags;
    674
    675	struct dma_async_tx_descriptor *desc;
    676	dma_cookie_t cookie;
    677
    678	if (is_tx) {
    679		dir   = DMA_MEM_TO_DEV;
    680		chan  = ctlr->dma_tx;
    681		nents = tfr->tx_sg.nents;
    682		sgl   = tfr->tx_sg.sgl;
    683		flags = tfr->rx_buf ? 0 : DMA_PREP_INTERRUPT;
    684	} else {
    685		dir   = DMA_DEV_TO_MEM;
    686		chan  = ctlr->dma_rx;
    687		nents = tfr->rx_sg.nents;
    688		sgl   = tfr->rx_sg.sgl;
    689		flags = DMA_PREP_INTERRUPT;
    690	}
    691	/* prepare the channel */
    692	desc = dmaengine_prep_slave_sg(chan, sgl, nents, dir, flags);
    693	if (!desc)
    694		return -EINVAL;
    695
    696	/*
    697	 * Completion is signaled by the RX channel for bidirectional and
    698	 * RX-only transfers; else by the TX channel for TX-only transfers.
    699	 */
    700	if (!is_tx) {
    701		desc->callback = bcm2835_spi_dma_rx_done;
    702		desc->callback_param = ctlr;
    703	} else if (!tfr->rx_buf) {
    704		desc->callback = bcm2835_spi_dma_tx_done;
    705		desc->callback_param = ctlr;
    706		bs->slv = slv;
    707	}
    708
    709	/* submit it to DMA-engine */
    710	cookie = dmaengine_submit(desc);
    711
    712	return dma_submit_error(cookie);
    713}
    714
    715/**
    716 * bcm2835_spi_transfer_one_dma() - perform SPI transfer using DMA engine
    717 * @ctlr: SPI master controller
    718 * @tfr: SPI transfer
    719 * @slv: BCM2835 SPI slave
    720 * @cs: CS register
    721 *
    722 * For *bidirectional* transfers (both tx_buf and rx_buf are non-%NULL), set up
    723 * the TX and RX DMA channel to copy between memory and FIFO register.
    724 *
    725 * For *TX-only* transfers (rx_buf is %NULL), copying the RX FIFO's contents to
    726 * memory is pointless.  However not reading the RX FIFO isn't an option either
    727 * because transmission is halted once it's full.  As a workaround, cyclically
    728 * clear the RX FIFO by setting the CLEAR_RX bit in the CS register.
    729 *
    730 * The CS register value is precalculated in bcm2835_spi_setup().  Normally
    731 * this is called only once, on slave registration.  A DMA descriptor to write
    732 * this value is preallocated in bcm2835_dma_init().  All that's left to do
    733 * when performing a TX-only transfer is to submit this descriptor to the RX
    734 * DMA channel.  Latency is thereby minimized.  The descriptor does not
    735 * generate any interrupts while running.  It must be terminated once the
    736 * TX DMA channel is done.
    737 *
    738 * Clearing the RX FIFO is paced by the DREQ signal.  The signal is asserted
    739 * when the RX FIFO becomes half full, i.e. 32 bytes.  (Tuneable with the DC
    740 * register.)  Reading 32 bytes from the RX FIFO would normally require 8 bus
    741 * accesses, whereas clearing it requires only 1 bus access.  So an 8-fold
    742 * reduction in bus traffic and thus energy consumption is achieved.
    743 *
    744 * For *RX-only* transfers (tx_buf is %NULL), fill the TX FIFO by cyclically
    745 * copying from the zero page.  The DMA descriptor to do this is preallocated
    746 * in bcm2835_dma_init().  It must be terminated once the RX DMA channel is
    747 * done and can then be reused.
    748 *
    749 * The BCM2835 DMA driver autodetects when a transaction copies from the zero
    750 * page and utilizes the DMA controller's ability to synthesize zeroes instead
    751 * of copying them from memory.  This reduces traffic on the memory bus.  The
    752 * feature is not available on so-called "lite" channels, but normally TX DMA
    753 * is backed by a full-featured channel.
    754 *
    755 * Zero-filling the TX FIFO is paced by the DREQ signal.  Unfortunately the
    756 * BCM2835 SPI controller continues to assert DREQ even after the DLEN register
    757 * has been counted down to zero (hardware erratum).  Thus, when the transfer
    758 * has finished, the DMA engine zero-fills the TX FIFO until it is half full.
    759 * (Tuneable with the DC register.)  So up to 9 gratuitous bus accesses are
    760 * performed at the end of an RX-only transfer.
    761 */
    762static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
    763					struct spi_transfer *tfr,
    764					struct bcm2835_spidev *slv,
    765					u32 cs)
    766{
    767	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
    768	dma_cookie_t cookie;
    769	int ret;
    770
    771	/* update usage statistics */
    772	bs->count_transfer_dma++;
    773
    774	/*
    775	 * Transfer first few bytes without DMA if length of first TX or RX
    776	 * sglist entry is not a multiple of 4 bytes (hardware limitation).
    777	 */
    778	bcm2835_spi_transfer_prologue(ctlr, tfr, bs, cs);
    779
    780	/* setup tx-DMA */
    781	if (bs->tx_buf) {
    782		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, slv, true);
    783	} else {
    784		cookie = dmaengine_submit(bs->fill_tx_desc);
    785		ret = dma_submit_error(cookie);
    786	}
    787	if (ret)
    788		goto err_reset_hw;
    789
    790	/* set the DMA length */
    791	bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->tx_len);
    792
    793	/* start the HW */
    794	bcm2835_wr(bs, BCM2835_SPI_CS,
    795		   cs | BCM2835_SPI_CS_TA | BCM2835_SPI_CS_DMAEN);
    796
    797	bs->tx_dma_active = true;
    798	smp_wmb();
    799
    800	/* start TX early */
    801	dma_async_issue_pending(ctlr->dma_tx);
    802
    803	/* setup rx-DMA late - to run transfers while
    804	 * mapping of the rx buffers still takes place
    805	 * this saves 10us or more.
    806	 */
    807	if (bs->rx_buf) {
    808		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, slv, false);
    809	} else {
    810		cookie = dmaengine_submit(slv->clear_rx_desc);
    811		ret = dma_submit_error(cookie);
    812	}
    813	if (ret) {
    814		/* need to reset on errors */
    815		dmaengine_terminate_sync(ctlr->dma_tx);
    816		bs->tx_dma_active = false;
    817		goto err_reset_hw;
    818	}
    819
    820	/* start rx dma late */
    821	dma_async_issue_pending(ctlr->dma_rx);
    822	bs->rx_dma_active = true;
    823	smp_mb();
    824
    825	/*
    826	 * In case of a very short TX-only transfer, bcm2835_spi_dma_tx_done()
    827	 * may run before RX DMA is issued.  Terminate RX DMA if so.
    828	 */
    829	if (!bs->rx_buf && !bs->tx_dma_active &&
    830	    cmpxchg(&bs->rx_dma_active, true, false)) {
    831		dmaengine_terminate_async(ctlr->dma_rx);
    832		bcm2835_spi_reset_hw(bs);
    833	}
    834
    835	/* wait for wakeup in framework */
    836	return 1;
    837
    838err_reset_hw:
    839	bcm2835_spi_reset_hw(bs);
    840	bcm2835_spi_undo_prologue(bs);
    841	return ret;
    842}
    843
    844static bool bcm2835_spi_can_dma(struct spi_controller *ctlr,
    845				struct spi_device *spi,
    846				struct spi_transfer *tfr)
    847{
    848	/* we start DMA efforts only on bigger transfers */
    849	if (tfr->len < BCM2835_SPI_DMA_MIN_LENGTH)
    850		return false;
    851
    852	/* return OK */
    853	return true;
    854}
    855
    856static void bcm2835_dma_release(struct spi_controller *ctlr,
    857				struct bcm2835_spi *bs)
    858{
    859	if (ctlr->dma_tx) {
    860		dmaengine_terminate_sync(ctlr->dma_tx);
    861
    862		if (bs->fill_tx_desc)
    863			dmaengine_desc_free(bs->fill_tx_desc);
    864
    865		if (bs->fill_tx_addr)
    866			dma_unmap_page_attrs(ctlr->dma_tx->device->dev,
    867					     bs->fill_tx_addr, sizeof(u32),
    868					     DMA_TO_DEVICE,
    869					     DMA_ATTR_SKIP_CPU_SYNC);
    870
    871		dma_release_channel(ctlr->dma_tx);
    872		ctlr->dma_tx = NULL;
    873	}
    874
    875	if (ctlr->dma_rx) {
    876		dmaengine_terminate_sync(ctlr->dma_rx);
    877		dma_release_channel(ctlr->dma_rx);
    878		ctlr->dma_rx = NULL;
    879	}
    880}
    881
    882static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
    883			    struct bcm2835_spi *bs)
    884{
    885	struct dma_slave_config slave_config;
    886	const __be32 *addr;
    887	dma_addr_t dma_reg_base;
    888	int ret;
    889
    890	/* base address in dma-space */
    891	addr = of_get_address(ctlr->dev.of_node, 0, NULL, NULL);
    892	if (!addr) {
    893		dev_err(dev, "could not get DMA-register address - not using dma mode\n");
    894		/* Fall back to interrupt mode */
    895		return 0;
    896	}
    897	dma_reg_base = be32_to_cpup(addr);
    898
    899	/* get tx/rx dma */
    900	ctlr->dma_tx = dma_request_chan(dev, "tx");
    901	if (IS_ERR(ctlr->dma_tx)) {
    902		dev_err(dev, "no tx-dma configuration found - not using dma mode\n");
    903		ret = PTR_ERR(ctlr->dma_tx);
    904		ctlr->dma_tx = NULL;
    905		goto err;
    906	}
    907	ctlr->dma_rx = dma_request_chan(dev, "rx");
    908	if (IS_ERR(ctlr->dma_rx)) {
    909		dev_err(dev, "no rx-dma configuration found - not using dma mode\n");
    910		ret = PTR_ERR(ctlr->dma_rx);
    911		ctlr->dma_rx = NULL;
    912		goto err_release;
    913	}
    914
    915	/*
    916	 * The TX DMA channel either copies a transfer's TX buffer to the FIFO
    917	 * or, in case of an RX-only transfer, cyclically copies from the zero
    918	 * page to the FIFO using a preallocated, reusable descriptor.
    919	 */
    920	slave_config.dst_addr = (u32)(dma_reg_base + BCM2835_SPI_FIFO);
    921	slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    922
    923	ret = dmaengine_slave_config(ctlr->dma_tx, &slave_config);
    924	if (ret)
    925		goto err_config;
    926
    927	bs->fill_tx_addr = dma_map_page_attrs(ctlr->dma_tx->device->dev,
    928					      ZERO_PAGE(0), 0, sizeof(u32),
    929					      DMA_TO_DEVICE,
    930					      DMA_ATTR_SKIP_CPU_SYNC);
    931	if (dma_mapping_error(ctlr->dma_tx->device->dev, bs->fill_tx_addr)) {
    932		dev_err(dev, "cannot map zero page - not using DMA mode\n");
    933		bs->fill_tx_addr = 0;
    934		ret = -ENOMEM;
    935		goto err_release;
    936	}
    937
    938	bs->fill_tx_desc = dmaengine_prep_dma_cyclic(ctlr->dma_tx,
    939						     bs->fill_tx_addr,
    940						     sizeof(u32), 0,
    941						     DMA_MEM_TO_DEV, 0);
    942	if (!bs->fill_tx_desc) {
    943		dev_err(dev, "cannot prepare fill_tx_desc - not using DMA mode\n");
    944		ret = -ENOMEM;
    945		goto err_release;
    946	}
    947
    948	ret = dmaengine_desc_set_reuse(bs->fill_tx_desc);
    949	if (ret) {
    950		dev_err(dev, "cannot reuse fill_tx_desc - not using DMA mode\n");
    951		goto err_release;
    952	}
    953
    954	/*
    955	 * The RX DMA channel is used bidirectionally:  It either reads the
    956	 * RX FIFO or, in case of a TX-only transfer, cyclically writes a
    957	 * precalculated value to the CS register to clear the RX FIFO.
    958	 */
    959	slave_config.src_addr = (u32)(dma_reg_base + BCM2835_SPI_FIFO);
    960	slave_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    961	slave_config.dst_addr = (u32)(dma_reg_base + BCM2835_SPI_CS);
    962	slave_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    963
    964	ret = dmaengine_slave_config(ctlr->dma_rx, &slave_config);
    965	if (ret)
    966		goto err_config;
    967
    968	/* all went well, so set can_dma */
    969	ctlr->can_dma = bcm2835_spi_can_dma;
    970
    971	return 0;
    972
    973err_config:
    974	dev_err(dev, "issue configuring dma: %d - not using DMA mode\n",
    975		ret);
    976err_release:
    977	bcm2835_dma_release(ctlr, bs);
    978err:
    979	/*
    980	 * Only report error for deferred probing, otherwise fall back to
    981	 * interrupt mode
    982	 */
    983	if (ret != -EPROBE_DEFER)
    984		ret = 0;
    985
    986	return ret;
    987}
    988
    989static int bcm2835_spi_transfer_one_poll(struct spi_controller *ctlr,
    990					 struct spi_device *spi,
    991					 struct spi_transfer *tfr,
    992					 u32 cs)
    993{
    994	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
    995	unsigned long timeout;
    996
    997	/* update usage statistics */
    998	bs->count_transfer_polling++;
    999
   1000	/* enable HW block without interrupts */
   1001	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
   1002
   1003	/* fill in the fifo before timeout calculations
   1004	 * if we are interrupted here, then the data is
   1005	 * getting transferred by the HW while we are interrupted
   1006	 */
   1007	bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
   1008
   1009	/* set the timeout to at least 2 jiffies */
   1010	timeout = jiffies + 2 + HZ * polling_limit_us / 1000000;
   1011
   1012	/* loop until finished the transfer */
   1013	while (bs->rx_len) {
   1014		/* fill in tx fifo with remaining data */
   1015		bcm2835_wr_fifo(bs);
   1016
   1017		/* read from fifo as much as possible */
   1018		bcm2835_rd_fifo(bs);
   1019
   1020		/* if there is still data pending to read
   1021		 * then check the timeout
   1022		 */
   1023		if (bs->rx_len && time_after(jiffies, timeout)) {
   1024			dev_dbg_ratelimited(&spi->dev,
   1025					    "timeout period reached: jiffies: %lu remaining tx/rx: %d/%d - falling back to interrupt mode\n",
   1026					    jiffies - timeout,
   1027					    bs->tx_len, bs->rx_len);
   1028			/* fall back to interrupt mode */
   1029
   1030			/* update usage statistics */
   1031			bs->count_transfer_irq_after_polling++;
   1032
   1033			return bcm2835_spi_transfer_one_irq(ctlr, spi,
   1034							    tfr, cs, false);
   1035		}
   1036	}
   1037
   1038	/* Transfer complete - reset SPI HW */
   1039	bcm2835_spi_reset_hw(bs);
   1040	/* and return without waiting for completion */
   1041	return 0;
   1042}
   1043
   1044static int bcm2835_spi_transfer_one(struct spi_controller *ctlr,
   1045				    struct spi_device *spi,
   1046				    struct spi_transfer *tfr)
   1047{
   1048	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
   1049	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
   1050	unsigned long spi_hz, cdiv;
   1051	unsigned long hz_per_byte, byte_limit;
   1052	u32 cs = slv->prepare_cs;
   1053
   1054	/* set clock */
   1055	spi_hz = tfr->speed_hz;
   1056
   1057	if (spi_hz >= bs->clk_hz / 2) {
   1058		cdiv = 2; /* clk_hz/2 is the fastest we can go */
   1059	} else if (spi_hz) {
   1060		/* CDIV must be a multiple of two */
   1061		cdiv = DIV_ROUND_UP(bs->clk_hz, spi_hz);
   1062		cdiv += (cdiv % 2);
   1063
   1064		if (cdiv >= 65536)
   1065			cdiv = 0; /* 0 is the slowest we can go */
   1066	} else {
   1067		cdiv = 0; /* 0 is the slowest we can go */
   1068	}
   1069	tfr->effective_speed_hz = cdiv ? (bs->clk_hz / cdiv) : (bs->clk_hz / 65536);
   1070	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
   1071
   1072	/* handle all the 3-wire mode */
   1073	if (spi->mode & SPI_3WIRE && tfr->rx_buf)
   1074		cs |= BCM2835_SPI_CS_REN;
   1075
   1076	/* set transmit buffers and length */
   1077	bs->tx_buf = tfr->tx_buf;
   1078	bs->rx_buf = tfr->rx_buf;
   1079	bs->tx_len = tfr->len;
   1080	bs->rx_len = tfr->len;
   1081
   1082	/* Calculate the estimated time in us the transfer runs.  Note that
   1083	 * there is 1 idle clocks cycles after each byte getting transferred
   1084	 * so we have 9 cycles/byte.  This is used to find the number of Hz
   1085	 * per byte per polling limit.  E.g., we can transfer 1 byte in 30 us
   1086	 * per 300,000 Hz of bus clock.
   1087	 */
   1088	hz_per_byte = polling_limit_us ? (9 * 1000000) / polling_limit_us : 0;
   1089	byte_limit = hz_per_byte ? tfr->effective_speed_hz / hz_per_byte : 1;
   1090
   1091	/* run in polling mode for short transfers */
   1092	if (tfr->len < byte_limit)
   1093		return bcm2835_spi_transfer_one_poll(ctlr, spi, tfr, cs);
   1094
   1095	/* run in dma mode if conditions are right
   1096	 * Note that unlike poll or interrupt mode DMA mode does not have
   1097	 * this 1 idle clock cycle pattern but runs the spi clock without gaps
   1098	 */
   1099	if (ctlr->can_dma && bcm2835_spi_can_dma(ctlr, spi, tfr))
   1100		return bcm2835_spi_transfer_one_dma(ctlr, tfr, slv, cs);
   1101
   1102	/* run in interrupt-mode */
   1103	return bcm2835_spi_transfer_one_irq(ctlr, spi, tfr, cs, true);
   1104}
   1105
   1106static int bcm2835_spi_prepare_message(struct spi_controller *ctlr,
   1107				       struct spi_message *msg)
   1108{
   1109	struct spi_device *spi = msg->spi;
   1110	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
   1111	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
   1112	int ret;
   1113
   1114	if (ctlr->can_dma) {
   1115		/*
   1116		 * DMA transfers are limited to 16 bit (0 to 65535 bytes) by
   1117		 * the SPI HW due to DLEN. Split up transfers (32-bit FIFO
   1118		 * aligned) if the limit is exceeded.
   1119		 */
   1120		ret = spi_split_transfers_maxsize(ctlr, msg, 65532,
   1121						  GFP_KERNEL | GFP_DMA);
   1122		if (ret)
   1123			return ret;
   1124	}
   1125
   1126	/*
   1127	 * Set up clock polarity before spi_transfer_one_message() asserts
   1128	 * chip select to avoid a gratuitous clock signal edge.
   1129	 */
   1130	bcm2835_wr(bs, BCM2835_SPI_CS, slv->prepare_cs);
   1131
   1132	return 0;
   1133}
   1134
   1135static void bcm2835_spi_handle_err(struct spi_controller *ctlr,
   1136				   struct spi_message *msg)
   1137{
   1138	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
   1139
   1140	/* if an error occurred and we have an active dma, then terminate */
   1141	dmaengine_terminate_sync(ctlr->dma_tx);
   1142	bs->tx_dma_active = false;
   1143	dmaengine_terminate_sync(ctlr->dma_rx);
   1144	bs->rx_dma_active = false;
   1145	bcm2835_spi_undo_prologue(bs);
   1146
   1147	/* and reset */
   1148	bcm2835_spi_reset_hw(bs);
   1149}
   1150
   1151static int chip_match_name(struct gpio_chip *chip, void *data)
   1152{
   1153	return !strcmp(chip->label, data);
   1154}
   1155
   1156static void bcm2835_spi_cleanup(struct spi_device *spi)
   1157{
   1158	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
   1159	struct spi_controller *ctlr = spi->controller;
   1160
   1161	if (slv->clear_rx_desc)
   1162		dmaengine_desc_free(slv->clear_rx_desc);
   1163
   1164	if (slv->clear_rx_addr)
   1165		dma_unmap_single(ctlr->dma_rx->device->dev,
   1166				 slv->clear_rx_addr,
   1167				 sizeof(u32),
   1168				 DMA_TO_DEVICE);
   1169
   1170	kfree(slv);
   1171}
   1172
   1173static int bcm2835_spi_setup_dma(struct spi_controller *ctlr,
   1174				 struct spi_device *spi,
   1175				 struct bcm2835_spi *bs,
   1176				 struct bcm2835_spidev *slv)
   1177{
   1178	int ret;
   1179
   1180	if (!ctlr->dma_rx)
   1181		return 0;
   1182
   1183	slv->clear_rx_addr = dma_map_single(ctlr->dma_rx->device->dev,
   1184					    &slv->clear_rx_cs,
   1185					    sizeof(u32),
   1186					    DMA_TO_DEVICE);
   1187	if (dma_mapping_error(ctlr->dma_rx->device->dev, slv->clear_rx_addr)) {
   1188		dev_err(&spi->dev, "cannot map clear_rx_cs\n");
   1189		slv->clear_rx_addr = 0;
   1190		return -ENOMEM;
   1191	}
   1192
   1193	slv->clear_rx_desc = dmaengine_prep_dma_cyclic(ctlr->dma_rx,
   1194						       slv->clear_rx_addr,
   1195						       sizeof(u32), 0,
   1196						       DMA_MEM_TO_DEV, 0);
   1197	if (!slv->clear_rx_desc) {
   1198		dev_err(&spi->dev, "cannot prepare clear_rx_desc\n");
   1199		return -ENOMEM;
   1200	}
   1201
   1202	ret = dmaengine_desc_set_reuse(slv->clear_rx_desc);
   1203	if (ret) {
   1204		dev_err(&spi->dev, "cannot reuse clear_rx_desc\n");
   1205		return ret;
   1206	}
   1207
   1208	return 0;
   1209}
   1210
   1211static int bcm2835_spi_setup(struct spi_device *spi)
   1212{
   1213	struct spi_controller *ctlr = spi->controller;
   1214	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
   1215	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
   1216	struct gpio_chip *chip;
   1217	int ret;
   1218	u32 cs;
   1219
   1220	if (!slv) {
   1221		slv = kzalloc(ALIGN(sizeof(*slv), dma_get_cache_alignment()),
   1222			      GFP_KERNEL);
   1223		if (!slv)
   1224			return -ENOMEM;
   1225
   1226		spi_set_ctldata(spi, slv);
   1227
   1228		ret = bcm2835_spi_setup_dma(ctlr, spi, bs, slv);
   1229		if (ret)
   1230			goto err_cleanup;
   1231	}
   1232
   1233	/*
   1234	 * Precalculate SPI slave's CS register value for ->prepare_message():
   1235	 * The driver always uses software-controlled GPIO chip select, hence
   1236	 * set the hardware-controlled native chip select to an invalid value
   1237	 * to prevent it from interfering.
   1238	 */
   1239	cs = BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
   1240	if (spi->mode & SPI_CPOL)
   1241		cs |= BCM2835_SPI_CS_CPOL;
   1242	if (spi->mode & SPI_CPHA)
   1243		cs |= BCM2835_SPI_CS_CPHA;
   1244	slv->prepare_cs = cs;
   1245
   1246	/*
   1247	 * Precalculate SPI slave's CS register value to clear RX FIFO
   1248	 * in case of a TX-only DMA transfer.
   1249	 */
   1250	if (ctlr->dma_rx) {
   1251		slv->clear_rx_cs = cs | BCM2835_SPI_CS_TA |
   1252					BCM2835_SPI_CS_DMAEN |
   1253					BCM2835_SPI_CS_CLEAR_RX;
   1254		dma_sync_single_for_device(ctlr->dma_rx->device->dev,
   1255					   slv->clear_rx_addr,
   1256					   sizeof(u32),
   1257					   DMA_TO_DEVICE);
   1258	}
   1259
   1260	/*
   1261	 * sanity checking the native-chipselects
   1262	 */
   1263	if (spi->mode & SPI_NO_CS)
   1264		return 0;
   1265	/*
   1266	 * The SPI core has successfully requested the CS GPIO line from the
   1267	 * device tree, so we are done.
   1268	 */
   1269	if (spi->cs_gpiod)
   1270		return 0;
   1271	if (spi->chip_select > 1) {
   1272		/* error in the case of native CS requested with CS > 1
   1273		 * officially there is a CS2, but it is not documented
   1274		 * which GPIO is connected with that...
   1275		 */
   1276		dev_err(&spi->dev,
   1277			"setup: only two native chip-selects are supported\n");
   1278		ret = -EINVAL;
   1279		goto err_cleanup;
   1280	}
   1281
   1282	/*
   1283	 * Translate native CS to GPIO
   1284	 *
   1285	 * FIXME: poking around in the gpiolib internals like this is
   1286	 * not very good practice. Find a way to locate the real problem
   1287	 * and fix it. Why is the GPIO descriptor in spi->cs_gpiod
   1288	 * sometimes not assigned correctly? Erroneous device trees?
   1289	 */
   1290
   1291	/* get the gpio chip for the base */
   1292	chip = gpiochip_find("pinctrl-bcm2835", chip_match_name);
   1293	if (!chip)
   1294		return 0;
   1295
   1296	spi->cs_gpiod = gpiochip_request_own_desc(chip, 8 - spi->chip_select,
   1297						  DRV_NAME,
   1298						  GPIO_LOOKUP_FLAGS_DEFAULT,
   1299						  GPIOD_OUT_LOW);
   1300	if (IS_ERR(spi->cs_gpiod)) {
   1301		ret = PTR_ERR(spi->cs_gpiod);
   1302		goto err_cleanup;
   1303	}
   1304
   1305	/* and set up the "mode" and level */
   1306	dev_info(&spi->dev, "setting up native-CS%i to use GPIO\n",
   1307		 spi->chip_select);
   1308
   1309	return 0;
   1310
   1311err_cleanup:
   1312	bcm2835_spi_cleanup(spi);
   1313	return ret;
   1314}
   1315
   1316static int bcm2835_spi_probe(struct platform_device *pdev)
   1317{
   1318	struct spi_controller *ctlr;
   1319	struct bcm2835_spi *bs;
   1320	int err;
   1321
   1322	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*bs));
   1323	if (!ctlr)
   1324		return -ENOMEM;
   1325
   1326	platform_set_drvdata(pdev, ctlr);
   1327
   1328	ctlr->use_gpio_descriptors = true;
   1329	ctlr->mode_bits = BCM2835_SPI_MODE_BITS;
   1330	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
   1331	ctlr->num_chipselect = 3;
   1332	ctlr->setup = bcm2835_spi_setup;
   1333	ctlr->cleanup = bcm2835_spi_cleanup;
   1334	ctlr->transfer_one = bcm2835_spi_transfer_one;
   1335	ctlr->handle_err = bcm2835_spi_handle_err;
   1336	ctlr->prepare_message = bcm2835_spi_prepare_message;
   1337	ctlr->dev.of_node = pdev->dev.of_node;
   1338
   1339	bs = spi_controller_get_devdata(ctlr);
   1340	bs->ctlr = ctlr;
   1341
   1342	bs->regs = devm_platform_ioremap_resource(pdev, 0);
   1343	if (IS_ERR(bs->regs))
   1344		return PTR_ERR(bs->regs);
   1345
   1346	bs->clk = devm_clk_get(&pdev->dev, NULL);
   1347	if (IS_ERR(bs->clk))
   1348		return dev_err_probe(&pdev->dev, PTR_ERR(bs->clk),
   1349				     "could not get clk\n");
   1350
   1351	ctlr->max_speed_hz = clk_get_rate(bs->clk) / 2;
   1352
   1353	bs->irq = platform_get_irq(pdev, 0);
   1354	if (bs->irq <= 0)
   1355		return bs->irq ? bs->irq : -ENODEV;
   1356
   1357	clk_prepare_enable(bs->clk);
   1358	bs->clk_hz = clk_get_rate(bs->clk);
   1359
   1360	err = bcm2835_dma_init(ctlr, &pdev->dev, bs);
   1361	if (err)
   1362		goto out_clk_disable;
   1363
   1364	/* initialise the hardware with the default polarities */
   1365	bcm2835_wr(bs, BCM2835_SPI_CS,
   1366		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
   1367
   1368	err = devm_request_irq(&pdev->dev, bs->irq, bcm2835_spi_interrupt, 0,
   1369			       dev_name(&pdev->dev), bs);
   1370	if (err) {
   1371		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
   1372		goto out_dma_release;
   1373	}
   1374
   1375	err = spi_register_controller(ctlr);
   1376	if (err) {
   1377		dev_err(&pdev->dev, "could not register SPI controller: %d\n",
   1378			err);
   1379		goto out_dma_release;
   1380	}
   1381
   1382	bcm2835_debugfs_create(bs, dev_name(&pdev->dev));
   1383
   1384	return 0;
   1385
   1386out_dma_release:
   1387	bcm2835_dma_release(ctlr, bs);
   1388out_clk_disable:
   1389	clk_disable_unprepare(bs->clk);
   1390	return err;
   1391}
   1392
   1393static int bcm2835_spi_remove(struct platform_device *pdev)
   1394{
   1395	struct spi_controller *ctlr = platform_get_drvdata(pdev);
   1396	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
   1397
   1398	bcm2835_debugfs_remove(bs);
   1399
   1400	spi_unregister_controller(ctlr);
   1401
   1402	bcm2835_dma_release(ctlr, bs);
   1403
   1404	/* Clear FIFOs, and disable the HW block */
   1405	bcm2835_wr(bs, BCM2835_SPI_CS,
   1406		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
   1407
   1408	clk_disable_unprepare(bs->clk);
   1409
   1410	return 0;
   1411}
   1412
   1413static void bcm2835_spi_shutdown(struct platform_device *pdev)
   1414{
   1415	int ret;
   1416
   1417	ret = bcm2835_spi_remove(pdev);
   1418	if (ret)
   1419		dev_err(&pdev->dev, "failed to shutdown\n");
   1420}
   1421
   1422static const struct of_device_id bcm2835_spi_match[] = {
   1423	{ .compatible = "brcm,bcm2835-spi", },
   1424	{}
   1425};
   1426MODULE_DEVICE_TABLE(of, bcm2835_spi_match);
   1427
   1428static struct platform_driver bcm2835_spi_driver = {
   1429	.driver		= {
   1430		.name		= DRV_NAME,
   1431		.of_match_table	= bcm2835_spi_match,
   1432	},
   1433	.probe		= bcm2835_spi_probe,
   1434	.remove		= bcm2835_spi_remove,
   1435	.shutdown	= bcm2835_spi_shutdown,
   1436};
   1437module_platform_driver(bcm2835_spi_driver);
   1438
   1439MODULE_DESCRIPTION("SPI controller driver for Broadcom BCM2835");
   1440MODULE_AUTHOR("Chris Boot <bootc@bootc.net>");
   1441MODULE_LICENSE("GPL");