cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bcm-pdc-mailbox.c (48882B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright 2016 Broadcom
      4 */
      5
      6/*
      7 * Broadcom PDC Mailbox Driver
      8 * The PDC provides a ring based programming interface to one or more hardware
      9 * offload engines. For example, the PDC driver works with both SPU-M and SPU2
     10 * cryptographic offload hardware. In some chips the PDC is referred to as MDE,
     11 * and in others the FA2/FA+ hardware is used with this PDC driver.
     12 *
     13 * The PDC driver registers with the Linux mailbox framework as a mailbox
     14 * controller, once for each PDC instance. Ring 0 for each PDC is registered as
     15 * a mailbox channel. The PDC driver uses interrupts to determine when data
     16 * transfers to and from an offload engine are complete. The PDC driver uses
     17 * threaded IRQs so that response messages are handled outside of interrupt
     18 * context.
     19 *
     20 * The PDC driver allows multiple messages to be pending in the descriptor
     21 * rings. The tx_msg_start descriptor index indicates where the last message
     22 * starts. The txin_numd value at this index indicates how many descriptor
     23 * indexes make up the message. Similar state is kept on the receive side. When
     24 * an rx interrupt indicates a response is ready, the PDC driver processes numd
     25 * descriptors from the tx and rx ring, thus processing one response at a time.
     26 */
     27
     28#include <linux/errno.h>
     29#include <linux/module.h>
     30#include <linux/init.h>
     31#include <linux/slab.h>
     32#include <linux/debugfs.h>
     33#include <linux/interrupt.h>
     34#include <linux/wait.h>
     35#include <linux/platform_device.h>
     36#include <linux/io.h>
     37#include <linux/of.h>
     38#include <linux/of_device.h>
     39#include <linux/of_address.h>
     40#include <linux/of_irq.h>
     41#include <linux/mailbox_controller.h>
     42#include <linux/mailbox/brcm-message.h>
     43#include <linux/scatterlist.h>
     44#include <linux/dma-direction.h>
     45#include <linux/dma-mapping.h>
     46#include <linux/dmapool.h>
     47
     48#define PDC_SUCCESS  0
     49
     50#define RING_ENTRY_SIZE   sizeof(struct dma64dd)
     51
     52/* # entries in PDC dma ring */
     53#define PDC_RING_ENTRIES  512
     54/*
     55 * Minimum number of ring descriptor entries that must be free to tell mailbox
     56 * framework that it can submit another request
     57 */
     58#define PDC_RING_SPACE_MIN  15
     59
     60#define PDC_RING_SIZE    (PDC_RING_ENTRIES * RING_ENTRY_SIZE)
     61/* Rings are 8k aligned */
     62#define RING_ALIGN_ORDER  13
     63#define RING_ALIGN        BIT(RING_ALIGN_ORDER)
     64
     65#define RX_BUF_ALIGN_ORDER  5
     66#define RX_BUF_ALIGN	    BIT(RX_BUF_ALIGN_ORDER)
     67
     68/* descriptor bumping macros */
     69#define XXD(x, max_mask)              ((x) & (max_mask))
     70#define TXD(x, max_mask)              XXD((x), (max_mask))
     71#define RXD(x, max_mask)              XXD((x), (max_mask))
     72#define NEXTTXD(i, max_mask)          TXD((i) + 1, (max_mask))
     73#define PREVTXD(i, max_mask)          TXD((i) - 1, (max_mask))
     74#define NEXTRXD(i, max_mask)          RXD((i) + 1, (max_mask))
     75#define PREVRXD(i, max_mask)          RXD((i) - 1, (max_mask))
     76#define NTXDACTIVE(h, t, max_mask)    TXD((t) - (h), (max_mask))
     77#define NRXDACTIVE(h, t, max_mask)    RXD((t) - (h), (max_mask))
     78
     79/* Length of BCM header at start of SPU msg, in bytes */
     80#define BCM_HDR_LEN  8
     81
     82/*
     83 * PDC driver reserves ringset 0 on each SPU for its own use. The driver does
     84 * not currently support use of multiple ringsets on a single PDC engine.
     85 */
     86#define PDC_RINGSET  0
     87
     88/*
     89 * Interrupt mask and status definitions. Enable interrupts for tx and rx on
     90 * ring 0
     91 */
     92#define PDC_RCVINT_0         (16 + PDC_RINGSET)
     93#define PDC_RCVINTEN_0       BIT(PDC_RCVINT_0)
     94#define PDC_INTMASK	     (PDC_RCVINTEN_0)
     95#define PDC_LAZY_FRAMECOUNT  1
     96#define PDC_LAZY_TIMEOUT     10000
     97#define PDC_LAZY_INT  (PDC_LAZY_TIMEOUT | (PDC_LAZY_FRAMECOUNT << 24))
     98#define PDC_INTMASK_OFFSET   0x24
     99#define PDC_INTSTATUS_OFFSET 0x20
    100#define PDC_RCVLAZY0_OFFSET  (0x30 + 4 * PDC_RINGSET)
    101#define FA_RCVLAZY0_OFFSET   0x100
    102
    103/*
    104 * For SPU2, configure MDE_CKSUM_CONTROL to write 17 bytes of metadata
    105 * before frame
    106 */
    107#define PDC_SPU2_RESP_HDR_LEN  17
    108#define PDC_CKSUM_CTRL         BIT(27)
    109#define PDC_CKSUM_CTRL_OFFSET  0x400
    110
    111#define PDC_SPUM_RESP_HDR_LEN  32
    112
    113/*
    114 * Sets the following bits for write to transmit control reg:
    115 * 11    - PtyChkDisable - parity check is disabled
    116 * 20:18 - BurstLen = 3 -> 2^7 = 128 byte data reads from memory
    117 */
    118#define PDC_TX_CTL		0x000C0800
    119
    120/* Bit in tx control reg to enable tx channel */
    121#define PDC_TX_ENABLE		0x1
    122
    123/*
    124 * Sets the following bits for write to receive control reg:
    125 * 7:1   - RcvOffset - size in bytes of status region at start of rx frame buf
    126 * 9     - SepRxHdrDescEn - place start of new frames only in descriptors
    127 *                          that have StartOfFrame set
    128 * 10    - OflowContinue - on rx FIFO overflow, clear rx fifo, discard all
    129 *                         remaining bytes in current frame, report error
    130 *                         in rx frame status for current frame
    131 * 11    - PtyChkDisable - parity check is disabled
    132 * 20:18 - BurstLen = 3 -> 2^7 = 128 byte data reads from memory
    133 */
    134#define PDC_RX_CTL		0x000C0E00
    135
    136/* Bit in rx control reg to enable rx channel */
    137#define PDC_RX_ENABLE		0x1
    138
    139#define CRYPTO_D64_RS0_CD_MASK   ((PDC_RING_ENTRIES * RING_ENTRY_SIZE) - 1)
    140
    141/* descriptor flags */
    142#define D64_CTRL1_EOT   BIT(28)	/* end of descriptor table */
    143#define D64_CTRL1_IOC   BIT(29)	/* interrupt on complete */
    144#define D64_CTRL1_EOF   BIT(30)	/* end of frame */
    145#define D64_CTRL1_SOF   BIT(31)	/* start of frame */
    146
    147#define RX_STATUS_OVERFLOW       0x00800000
    148#define RX_STATUS_LEN            0x0000FFFF
    149
    150#define PDC_TXREGS_OFFSET  0x200
    151#define PDC_RXREGS_OFFSET  0x220
    152
    153/* Maximum size buffer the DMA engine can handle */
    154#define PDC_DMA_BUF_MAX 16384
    155
    156enum pdc_hw {
    157	FA_HW,		/* FA2/FA+ hardware (i.e. Northstar Plus) */
    158	PDC_HW		/* PDC/MDE hardware (i.e. Northstar 2, Pegasus) */
    159};
    160
    161struct pdc_dma_map {
    162	void *ctx;          /* opaque context associated with frame */
    163};
    164
    165/* dma descriptor */
    166struct dma64dd {
    167	u32 ctrl1;      /* misc control bits */
    168	u32 ctrl2;      /* buffer count and address extension */
    169	u32 addrlow;    /* memory address of the date buffer, bits 31:0 */
    170	u32 addrhigh;   /* memory address of the date buffer, bits 63:32 */
    171};
    172
    173/* dma registers per channel(xmt or rcv) */
    174struct dma64_regs {
    175	u32  control;   /* enable, et al */
    176	u32  ptr;       /* last descriptor posted to chip */
    177	u32  addrlow;   /* descriptor ring base address low 32-bits */
    178	u32  addrhigh;  /* descriptor ring base address bits 63:32 */
    179	u32  status0;   /* last rx descriptor written by hw */
    180	u32  status1;   /* driver does not use */
    181};
    182
    183/* cpp contortions to concatenate w/arg prescan */
    184#ifndef PAD
    185#define _PADLINE(line)  pad ## line
    186#define _XSTR(line)     _PADLINE(line)
    187#define PAD             _XSTR(__LINE__)
    188#endif  /* PAD */
    189
    190/* dma registers. matches hw layout. */
    191struct dma64 {
    192	struct dma64_regs dmaxmt;  /* dma tx */
    193	u32          PAD[2];
    194	struct dma64_regs dmarcv;  /* dma rx */
    195	u32          PAD[2];
    196};
    197
    198/* PDC registers */
    199struct pdc_regs {
    200	u32  devcontrol;             /* 0x000 */
    201	u32  devstatus;              /* 0x004 */
    202	u32  PAD;
    203	u32  biststatus;             /* 0x00c */
    204	u32  PAD[4];
    205	u32  intstatus;              /* 0x020 */
    206	u32  intmask;                /* 0x024 */
    207	u32  gptimer;                /* 0x028 */
    208
    209	u32  PAD;
    210	u32  intrcvlazy_0;           /* 0x030 (Only in PDC, not FA2) */
    211	u32  intrcvlazy_1;           /* 0x034 (Only in PDC, not FA2) */
    212	u32  intrcvlazy_2;           /* 0x038 (Only in PDC, not FA2) */
    213	u32  intrcvlazy_3;           /* 0x03c (Only in PDC, not FA2) */
    214
    215	u32  PAD[48];
    216	u32  fa_intrecvlazy;         /* 0x100 (Only in FA2, not PDC) */
    217	u32  flowctlthresh;          /* 0x104 */
    218	u32  wrrthresh;              /* 0x108 */
    219	u32  gmac_idle_cnt_thresh;   /* 0x10c */
    220
    221	u32  PAD[4];
    222	u32  ifioaccessaddr;         /* 0x120 */
    223	u32  ifioaccessbyte;         /* 0x124 */
    224	u32  ifioaccessdata;         /* 0x128 */
    225
    226	u32  PAD[21];
    227	u32  phyaccess;              /* 0x180 */
    228	u32  PAD;
    229	u32  phycontrol;             /* 0x188 */
    230	u32  txqctl;                 /* 0x18c */
    231	u32  rxqctl;                 /* 0x190 */
    232	u32  gpioselect;             /* 0x194 */
    233	u32  gpio_output_en;         /* 0x198 */
    234	u32  PAD;                    /* 0x19c */
    235	u32  txq_rxq_mem_ctl;        /* 0x1a0 */
    236	u32  memory_ecc_status;      /* 0x1a4 */
    237	u32  serdes_ctl;             /* 0x1a8 */
    238	u32  serdes_status0;         /* 0x1ac */
    239	u32  serdes_status1;         /* 0x1b0 */
    240	u32  PAD[11];                /* 0x1b4-1dc */
    241	u32  clk_ctl_st;             /* 0x1e0 */
    242	u32  hw_war;                 /* 0x1e4 (Only in PDC, not FA2) */
    243	u32  pwrctl;                 /* 0x1e8 */
    244	u32  PAD[5];
    245
    246#define PDC_NUM_DMA_RINGS   4
    247	struct dma64 dmaregs[PDC_NUM_DMA_RINGS];  /* 0x0200 - 0x2fc */
    248
    249	/* more registers follow, but we don't use them */
    250};
    251
    252/* structure for allocating/freeing DMA rings */
    253struct pdc_ring_alloc {
    254	dma_addr_t  dmabase; /* DMA address of start of ring */
    255	void	   *vbase;   /* base kernel virtual address of ring */
    256	u32	    size;    /* ring allocation size in bytes */
    257};
    258
    259/*
    260 * context associated with a receive descriptor.
    261 * @rxp_ctx: opaque context associated with frame that starts at each
    262 *           rx ring index.
    263 * @dst_sg:  Scatterlist used to form reply frames beginning at a given ring
    264 *           index. Retained in order to unmap each sg after reply is processed.
    265 * @rxin_numd: Number of rx descriptors associated with the message that starts
    266 *             at a descriptor index. Not set for every index. For example,
    267 *             if descriptor index i points to a scatterlist with 4 entries,
    268 *             then the next three descriptor indexes don't have a value set.
    269 * @resp_hdr: Virtual address of buffer used to catch DMA rx status
    270 * @resp_hdr_daddr: physical address of DMA rx status buffer
    271 */
    272struct pdc_rx_ctx {
    273	void *rxp_ctx;
    274	struct scatterlist *dst_sg;
    275	u32  rxin_numd;
    276	void *resp_hdr;
    277	dma_addr_t resp_hdr_daddr;
    278};
    279
    280/* PDC state structure */
    281struct pdc_state {
    282	/* Index of the PDC whose state is in this structure instance */
    283	u8 pdc_idx;
    284
    285	/* Platform device for this PDC instance */
    286	struct platform_device *pdev;
    287
    288	/*
    289	 * Each PDC instance has a mailbox controller. PDC receives request
    290	 * messages through mailboxes, and sends response messages through the
    291	 * mailbox framework.
    292	 */
    293	struct mbox_controller mbc;
    294
    295	unsigned int pdc_irq;
    296
    297	/* tasklet for deferred processing after DMA rx interrupt */
    298	struct tasklet_struct rx_tasklet;
    299
    300	/* Number of bytes of receive status prior to each rx frame */
    301	u32 rx_status_len;
    302	/* Whether a BCM header is prepended to each frame */
    303	bool use_bcm_hdr;
    304	/* Sum of length of BCM header and rx status header */
    305	u32 pdc_resp_hdr_len;
    306
    307	/* The base virtual address of DMA hw registers */
    308	void __iomem *pdc_reg_vbase;
    309
    310	/* Pool for allocation of DMA rings */
    311	struct dma_pool *ring_pool;
    312
    313	/* Pool for allocation of metadata buffers for response messages */
    314	struct dma_pool *rx_buf_pool;
    315
    316	/*
    317	 * The base virtual address of DMA tx/rx descriptor rings. Corresponding
    318	 * DMA address and size of ring allocation.
    319	 */
    320	struct pdc_ring_alloc tx_ring_alloc;
    321	struct pdc_ring_alloc rx_ring_alloc;
    322
    323	struct pdc_regs *regs;    /* start of PDC registers */
    324
    325	struct dma64_regs *txregs_64; /* dma tx engine registers */
    326	struct dma64_regs *rxregs_64; /* dma rx engine registers */
    327
    328	/*
    329	 * Arrays of PDC_RING_ENTRIES descriptors
    330	 * To use multiple ringsets, this needs to be extended
    331	 */
    332	struct dma64dd   *txd_64;  /* tx descriptor ring */
    333	struct dma64dd   *rxd_64;  /* rx descriptor ring */
    334
    335	/* descriptor ring sizes */
    336	u32      ntxd;       /* # tx descriptors */
    337	u32      nrxd;       /* # rx descriptors */
    338	u32      nrxpost;    /* # rx buffers to keep posted */
    339	u32      ntxpost;    /* max number of tx buffers that can be posted */
    340
    341	/*
    342	 * Index of next tx descriptor to reclaim. That is, the descriptor
    343	 * index of the oldest tx buffer for which the host has yet to process
    344	 * the corresponding response.
    345	 */
    346	u32  txin;
    347
    348	/*
    349	 * Index of the first receive descriptor for the sequence of
    350	 * message fragments currently under construction. Used to build up
    351	 * the rxin_numd count for a message. Updated to rxout when the host
    352	 * starts a new sequence of rx buffers for a new message.
    353	 */
    354	u32  tx_msg_start;
    355
    356	/* Index of next tx descriptor to post. */
    357	u32  txout;
    358
    359	/*
    360	 * Number of tx descriptors associated with the message that starts
    361	 * at this tx descriptor index.
    362	 */
    363	u32      txin_numd[PDC_RING_ENTRIES];
    364
    365	/*
    366	 * Index of next rx descriptor to reclaim. This is the index of
    367	 * the next descriptor whose data has yet to be processed by the host.
    368	 */
    369	u32  rxin;
    370
    371	/*
    372	 * Index of the first receive descriptor for the sequence of
    373	 * message fragments currently under construction. Used to build up
    374	 * the rxin_numd count for a message. Updated to rxout when the host
    375	 * starts a new sequence of rx buffers for a new message.
    376	 */
    377	u32  rx_msg_start;
    378
    379	/*
    380	 * Saved value of current hardware rx descriptor index.
    381	 * The last rx buffer written by the hw is the index previous to
    382	 * this one.
    383	 */
    384	u32  last_rx_curr;
    385
    386	/* Index of next rx descriptor to post. */
    387	u32  rxout;
    388
    389	struct pdc_rx_ctx rx_ctx[PDC_RING_ENTRIES];
    390
    391	/*
    392	 * Scatterlists used to form request and reply frames beginning at a
    393	 * given ring index. Retained in order to unmap each sg after reply
    394	 * is processed
    395	 */
    396	struct scatterlist *src_sg[PDC_RING_ENTRIES];
    397
    398	/* counters */
    399	u32  pdc_requests;     /* number of request messages submitted */
    400	u32  pdc_replies;      /* number of reply messages received */
    401	u32  last_tx_not_done; /* too few tx descriptors to indicate done */
    402	u32  tx_ring_full;     /* unable to accept msg because tx ring full */
    403	u32  rx_ring_full;     /* unable to accept msg because rx ring full */
    404	u32  txnobuf;          /* unable to create tx descriptor */
    405	u32  rxnobuf;          /* unable to create rx descriptor */
    406	u32  rx_oflow;         /* count of rx overflows */
    407
    408	/* hardware type - FA2 or PDC/MDE */
    409	enum pdc_hw hw_type;
    410};
    411
    412/* Global variables */
    413
    414struct pdc_globals {
    415	/* Actual number of SPUs in hardware, as reported by device tree */
    416	u32 num_spu;
    417};
    418
    419static struct pdc_globals pdcg;
    420
    421/* top level debug FS directory for PDC driver */
    422static struct dentry *debugfs_dir;
    423
    424static ssize_t pdc_debugfs_read(struct file *filp, char __user *ubuf,
    425				size_t count, loff_t *offp)
    426{
    427	struct pdc_state *pdcs;
    428	char *buf;
    429	ssize_t ret, out_offset, out_count;
    430
    431	out_count = 512;
    432
    433	buf = kmalloc(out_count, GFP_KERNEL);
    434	if (!buf)
    435		return -ENOMEM;
    436
    437	pdcs = filp->private_data;
    438	out_offset = 0;
    439	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    440			       "SPU %u stats:\n", pdcs->pdc_idx);
    441	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    442			       "PDC requests....................%u\n",
    443			       pdcs->pdc_requests);
    444	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    445			       "PDC responses...................%u\n",
    446			       pdcs->pdc_replies);
    447	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    448			       "Tx not done.....................%u\n",
    449			       pdcs->last_tx_not_done);
    450	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    451			       "Tx ring full....................%u\n",
    452			       pdcs->tx_ring_full);
    453	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    454			       "Rx ring full....................%u\n",
    455			       pdcs->rx_ring_full);
    456	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    457			       "Tx desc write fail. Ring full...%u\n",
    458			       pdcs->txnobuf);
    459	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    460			       "Rx desc write fail. Ring full...%u\n",
    461			       pdcs->rxnobuf);
    462	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    463			       "Receive overflow................%u\n",
    464			       pdcs->rx_oflow);
    465	out_offset += scnprintf(buf + out_offset, out_count - out_offset,
    466			       "Num frags in rx ring............%u\n",
    467			       NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr,
    468					  pdcs->nrxpost));
    469
    470	if (out_offset > out_count)
    471		out_offset = out_count;
    472
    473	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
    474	kfree(buf);
    475	return ret;
    476}
    477
    478static const struct file_operations pdc_debugfs_stats = {
    479	.owner = THIS_MODULE,
    480	.open = simple_open,
    481	.read = pdc_debugfs_read,
    482};
    483
    484/**
    485 * pdc_setup_debugfs() - Create the debug FS directories. If the top-level
    486 * directory has not yet been created, create it now. Create a stats file in
    487 * this directory for a SPU.
    488 * @pdcs: PDC state structure
    489 */
    490static void pdc_setup_debugfs(struct pdc_state *pdcs)
    491{
    492	char spu_stats_name[16];
    493
    494	if (!debugfs_initialized())
    495		return;
    496
    497	snprintf(spu_stats_name, 16, "pdc%d_stats", pdcs->pdc_idx);
    498	if (!debugfs_dir)
    499		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
    500
    501	/* S_IRUSR == 0400 */
    502	debugfs_create_file(spu_stats_name, 0400, debugfs_dir, pdcs,
    503			    &pdc_debugfs_stats);
    504}
    505
    506static void pdc_free_debugfs(void)
    507{
    508	debugfs_remove_recursive(debugfs_dir);
    509	debugfs_dir = NULL;
    510}
    511
    512/**
    513 * pdc_build_rxd() - Build DMA descriptor to receive SPU result.
    514 * @pdcs:      PDC state for SPU that will generate result
    515 * @dma_addr:  DMA address of buffer that descriptor is being built for
    516 * @buf_len:   Length of the receive buffer, in bytes
    517 * @flags:     Flags to be stored in descriptor
    518 */
    519static inline void
    520pdc_build_rxd(struct pdc_state *pdcs, dma_addr_t dma_addr,
    521	      u32 buf_len, u32 flags)
    522{
    523	struct device *dev = &pdcs->pdev->dev;
    524	struct dma64dd *rxd = &pdcs->rxd_64[pdcs->rxout];
    525
    526	dev_dbg(dev,
    527		"Writing rx descriptor for PDC %u at index %u with length %u. flags %#x\n",
    528		pdcs->pdc_idx, pdcs->rxout, buf_len, flags);
    529
    530	rxd->addrlow = cpu_to_le32(lower_32_bits(dma_addr));
    531	rxd->addrhigh = cpu_to_le32(upper_32_bits(dma_addr));
    532	rxd->ctrl1 = cpu_to_le32(flags);
    533	rxd->ctrl2 = cpu_to_le32(buf_len);
    534
    535	/* bump ring index and return */
    536	pdcs->rxout = NEXTRXD(pdcs->rxout, pdcs->nrxpost);
    537}
    538
    539/**
    540 * pdc_build_txd() - Build a DMA descriptor to transmit a SPU request to
    541 * hardware.
    542 * @pdcs:        PDC state for the SPU that will process this request
    543 * @dma_addr:    DMA address of packet to be transmitted
    544 * @buf_len:     Length of tx buffer, in bytes
    545 * @flags:       Flags to be stored in descriptor
    546 */
    547static inline void
    548pdc_build_txd(struct pdc_state *pdcs, dma_addr_t dma_addr, u32 buf_len,
    549	      u32 flags)
    550{
    551	struct device *dev = &pdcs->pdev->dev;
    552	struct dma64dd *txd = &pdcs->txd_64[pdcs->txout];
    553
    554	dev_dbg(dev,
    555		"Writing tx descriptor for PDC %u at index %u with length %u, flags %#x\n",
    556		pdcs->pdc_idx, pdcs->txout, buf_len, flags);
    557
    558	txd->addrlow = cpu_to_le32(lower_32_bits(dma_addr));
    559	txd->addrhigh = cpu_to_le32(upper_32_bits(dma_addr));
    560	txd->ctrl1 = cpu_to_le32(flags);
    561	txd->ctrl2 = cpu_to_le32(buf_len);
    562
    563	/* bump ring index and return */
    564	pdcs->txout = NEXTTXD(pdcs->txout, pdcs->ntxpost);
    565}
    566
    567/**
    568 * pdc_receive_one() - Receive a response message from a given SPU.
    569 * @pdcs:    PDC state for the SPU to receive from
    570 *
    571 * When the return code indicates success, the response message is available in
    572 * the receive buffers provided prior to submission of the request.
    573 *
    574 * Return:  PDC_SUCCESS if one or more receive descriptors was processed
    575 *          -EAGAIN indicates that no response message is available
    576 *          -EIO an error occurred
    577 */
    578static int
    579pdc_receive_one(struct pdc_state *pdcs)
    580{
    581	struct device *dev = &pdcs->pdev->dev;
    582	struct mbox_controller *mbc;
    583	struct mbox_chan *chan;
    584	struct brcm_message mssg;
    585	u32 len, rx_status;
    586	u32 num_frags;
    587	u8 *resp_hdr;    /* virtual addr of start of resp message DMA header */
    588	u32 frags_rdy;   /* number of fragments ready to read */
    589	u32 rx_idx;      /* ring index of start of receive frame */
    590	dma_addr_t resp_hdr_daddr;
    591	struct pdc_rx_ctx *rx_ctx;
    592
    593	mbc = &pdcs->mbc;
    594	chan = &mbc->chans[0];
    595	mssg.type = BRCM_MESSAGE_SPU;
    596
    597	/*
    598	 * return if a complete response message is not yet ready.
    599	 * rxin_numd[rxin] is the number of fragments in the next msg
    600	 * to read.
    601	 */
    602	frags_rdy = NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr, pdcs->nrxpost);
    603	if ((frags_rdy == 0) ||
    604	    (frags_rdy < pdcs->rx_ctx[pdcs->rxin].rxin_numd))
    605		/* No response ready */
    606		return -EAGAIN;
    607
    608	num_frags = pdcs->txin_numd[pdcs->txin];
    609	WARN_ON(num_frags == 0);
    610
    611	dma_unmap_sg(dev, pdcs->src_sg[pdcs->txin],
    612		     sg_nents(pdcs->src_sg[pdcs->txin]), DMA_TO_DEVICE);
    613
    614	pdcs->txin = (pdcs->txin + num_frags) & pdcs->ntxpost;
    615
    616	dev_dbg(dev, "PDC %u reclaimed %d tx descriptors",
    617		pdcs->pdc_idx, num_frags);
    618
    619	rx_idx = pdcs->rxin;
    620	rx_ctx = &pdcs->rx_ctx[rx_idx];
    621	num_frags = rx_ctx->rxin_numd;
    622	/* Return opaque context with result */
    623	mssg.ctx = rx_ctx->rxp_ctx;
    624	rx_ctx->rxp_ctx = NULL;
    625	resp_hdr = rx_ctx->resp_hdr;
    626	resp_hdr_daddr = rx_ctx->resp_hdr_daddr;
    627	dma_unmap_sg(dev, rx_ctx->dst_sg, sg_nents(rx_ctx->dst_sg),
    628		     DMA_FROM_DEVICE);
    629
    630	pdcs->rxin = (pdcs->rxin + num_frags) & pdcs->nrxpost;
    631
    632	dev_dbg(dev, "PDC %u reclaimed %d rx descriptors",
    633		pdcs->pdc_idx, num_frags);
    634
    635	dev_dbg(dev,
    636		"PDC %u txin %u, txout %u, rxin %u, rxout %u, last_rx_curr %u\n",
    637		pdcs->pdc_idx, pdcs->txin, pdcs->txout, pdcs->rxin,
    638		pdcs->rxout, pdcs->last_rx_curr);
    639
    640	if (pdcs->pdc_resp_hdr_len == PDC_SPUM_RESP_HDR_LEN) {
    641		/*
    642		 * For SPU-M, get length of response msg and rx overflow status.
    643		 */
    644		rx_status = *((u32 *)resp_hdr);
    645		len = rx_status & RX_STATUS_LEN;
    646		dev_dbg(dev,
    647			"SPU response length %u bytes", len);
    648		if (unlikely(((rx_status & RX_STATUS_OVERFLOW) || (!len)))) {
    649			if (rx_status & RX_STATUS_OVERFLOW) {
    650				dev_err_ratelimited(dev,
    651						    "crypto receive overflow");
    652				pdcs->rx_oflow++;
    653			} else {
    654				dev_info_ratelimited(dev, "crypto rx len = 0");
    655			}
    656			return -EIO;
    657		}
    658	}
    659
    660	dma_pool_free(pdcs->rx_buf_pool, resp_hdr, resp_hdr_daddr);
    661
    662	mbox_chan_received_data(chan, &mssg);
    663
    664	pdcs->pdc_replies++;
    665	return PDC_SUCCESS;
    666}
    667
    668/**
    669 * pdc_receive() - Process as many responses as are available in the rx ring.
    670 * @pdcs:  PDC state
    671 *
    672 * Called within the hard IRQ.
    673 * Return:
    674 */
    675static int
    676pdc_receive(struct pdc_state *pdcs)
    677{
    678	int rx_status;
    679
    680	/* read last_rx_curr from register once */
    681	pdcs->last_rx_curr =
    682	    (ioread32((const void __iomem *)&pdcs->rxregs_64->status0) &
    683	     CRYPTO_D64_RS0_CD_MASK) / RING_ENTRY_SIZE;
    684
    685	do {
    686		/* Could be many frames ready */
    687		rx_status = pdc_receive_one(pdcs);
    688	} while (rx_status == PDC_SUCCESS);
    689
    690	return 0;
    691}
    692
    693/**
    694 * pdc_tx_list_sg_add() - Add the buffers in a scatterlist to the transmit
    695 * descriptors for a given SPU. The scatterlist buffers contain the data for a
    696 * SPU request message.
    697 * @spu_idx:   The index of the SPU to submit the request to, [0, max_spu)
    698 * @sg:        Scatterlist whose buffers contain part of the SPU request
    699 *
    700 * If a scatterlist buffer is larger than PDC_DMA_BUF_MAX, multiple descriptors
    701 * are written for that buffer, each <= PDC_DMA_BUF_MAX byte in length.
    702 *
    703 * Return: PDC_SUCCESS if successful
    704 *         < 0 otherwise
    705 */
    706static int pdc_tx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg)
    707{
    708	u32 flags = 0;
    709	u32 eot;
    710	u32 tx_avail;
    711
    712	/*
    713	 * Num descriptors needed. Conservatively assume we need a descriptor
    714	 * for every entry in sg.
    715	 */
    716	u32 num_desc;
    717	u32 desc_w = 0;	/* Number of tx descriptors written */
    718	u32 bufcnt;	/* Number of bytes of buffer pointed to by descriptor */
    719	dma_addr_t databufptr;	/* DMA address to put in descriptor */
    720
    721	num_desc = (u32)sg_nents(sg);
    722
    723	/* check whether enough tx descriptors are available */
    724	tx_avail = pdcs->ntxpost - NTXDACTIVE(pdcs->txin, pdcs->txout,
    725					      pdcs->ntxpost);
    726	if (unlikely(num_desc > tx_avail)) {
    727		pdcs->txnobuf++;
    728		return -ENOSPC;
    729	}
    730
    731	/* build tx descriptors */
    732	if (pdcs->tx_msg_start == pdcs->txout) {
    733		/* Start of frame */
    734		pdcs->txin_numd[pdcs->tx_msg_start] = 0;
    735		pdcs->src_sg[pdcs->txout] = sg;
    736		flags = D64_CTRL1_SOF;
    737	}
    738
    739	while (sg) {
    740		if (unlikely(pdcs->txout == (pdcs->ntxd - 1)))
    741			eot = D64_CTRL1_EOT;
    742		else
    743			eot = 0;
    744
    745		/*
    746		 * If sg buffer larger than PDC limit, split across
    747		 * multiple descriptors
    748		 */
    749		bufcnt = sg_dma_len(sg);
    750		databufptr = sg_dma_address(sg);
    751		while (bufcnt > PDC_DMA_BUF_MAX) {
    752			pdc_build_txd(pdcs, databufptr, PDC_DMA_BUF_MAX,
    753				      flags | eot);
    754			desc_w++;
    755			bufcnt -= PDC_DMA_BUF_MAX;
    756			databufptr += PDC_DMA_BUF_MAX;
    757			if (unlikely(pdcs->txout == (pdcs->ntxd - 1)))
    758				eot = D64_CTRL1_EOT;
    759			else
    760				eot = 0;
    761		}
    762		sg = sg_next(sg);
    763		if (!sg)
    764			/* Writing last descriptor for frame */
    765			flags |= (D64_CTRL1_EOF | D64_CTRL1_IOC);
    766		pdc_build_txd(pdcs, databufptr, bufcnt, flags | eot);
    767		desc_w++;
    768		/* Clear start of frame after first descriptor */
    769		flags &= ~D64_CTRL1_SOF;
    770	}
    771	pdcs->txin_numd[pdcs->tx_msg_start] += desc_w;
    772
    773	return PDC_SUCCESS;
    774}
    775
    776/**
    777 * pdc_tx_list_final() - Initiate DMA transfer of last frame written to tx
    778 * ring.
    779 * @pdcs:  PDC state for SPU to process the request
    780 *
    781 * Sets the index of the last descriptor written in both the rx and tx ring.
    782 *
    783 * Return: PDC_SUCCESS
    784 */
    785static int pdc_tx_list_final(struct pdc_state *pdcs)
    786{
    787	/*
    788	 * write barrier to ensure all register writes are complete
    789	 * before chip starts to process new request
    790	 */
    791	wmb();
    792	iowrite32(pdcs->rxout << 4, &pdcs->rxregs_64->ptr);
    793	iowrite32(pdcs->txout << 4, &pdcs->txregs_64->ptr);
    794	pdcs->pdc_requests++;
    795
    796	return PDC_SUCCESS;
    797}
    798
    799/**
    800 * pdc_rx_list_init() - Start a new receive descriptor list for a given PDC.
    801 * @pdcs:   PDC state for SPU handling request
    802 * @dst_sg: scatterlist providing rx buffers for response to be returned to
    803 *	    mailbox client
    804 * @ctx:    Opaque context for this request
    805 *
    806 * Posts a single receive descriptor to hold the metadata that precedes a
    807 * response. For example, with SPU-M, the metadata is a 32-byte DMA header and
    808 * an 8-byte BCM header. Moves the msg_start descriptor indexes for both tx and
    809 * rx to indicate the start of a new message.
    810 *
    811 * Return:  PDC_SUCCESS if successful
    812 *          < 0 if an error (e.g., rx ring is full)
    813 */
    814static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg,
    815			    void *ctx)
    816{
    817	u32 flags = 0;
    818	u32 rx_avail;
    819	u32 rx_pkt_cnt = 1;	/* Adding a single rx buffer */
    820	dma_addr_t daddr;
    821	void *vaddr;
    822	struct pdc_rx_ctx *rx_ctx;
    823
    824	rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
    825					      pdcs->nrxpost);
    826	if (unlikely(rx_pkt_cnt > rx_avail)) {
    827		pdcs->rxnobuf++;
    828		return -ENOSPC;
    829	}
    830
    831	/* allocate a buffer for the dma rx status */
    832	vaddr = dma_pool_zalloc(pdcs->rx_buf_pool, GFP_ATOMIC, &daddr);
    833	if (unlikely(!vaddr))
    834		return -ENOMEM;
    835
    836	/*
    837	 * Update msg_start indexes for both tx and rx to indicate the start
    838	 * of a new sequence of descriptor indexes that contain the fragments
    839	 * of the same message.
    840	 */
    841	pdcs->rx_msg_start = pdcs->rxout;
    842	pdcs->tx_msg_start = pdcs->txout;
    843
    844	/* This is always the first descriptor in the receive sequence */
    845	flags = D64_CTRL1_SOF;
    846	pdcs->rx_ctx[pdcs->rx_msg_start].rxin_numd = 1;
    847
    848	if (unlikely(pdcs->rxout == (pdcs->nrxd - 1)))
    849		flags |= D64_CTRL1_EOT;
    850
    851	rx_ctx = &pdcs->rx_ctx[pdcs->rxout];
    852	rx_ctx->rxp_ctx = ctx;
    853	rx_ctx->dst_sg = dst_sg;
    854	rx_ctx->resp_hdr = vaddr;
    855	rx_ctx->resp_hdr_daddr = daddr;
    856	pdc_build_rxd(pdcs, daddr, pdcs->pdc_resp_hdr_len, flags);
    857	return PDC_SUCCESS;
    858}
    859
    860/**
    861 * pdc_rx_list_sg_add() - Add the buffers in a scatterlist to the receive
    862 * descriptors for a given SPU. The caller must have already DMA mapped the
    863 * scatterlist.
    864 * @spu_idx:    Indicates which SPU the buffers are for
    865 * @sg:         Scatterlist whose buffers are added to the receive ring
    866 *
    867 * If a receive buffer in the scatterlist is larger than PDC_DMA_BUF_MAX,
    868 * multiple receive descriptors are written, each with a buffer <=
    869 * PDC_DMA_BUF_MAX.
    870 *
    871 * Return: PDC_SUCCESS if successful
    872 *         < 0 otherwise (e.g., receive ring is full)
    873 */
    874static int pdc_rx_list_sg_add(struct pdc_state *pdcs, struct scatterlist *sg)
    875{
    876	u32 flags = 0;
    877	u32 rx_avail;
    878
    879	/*
    880	 * Num descriptors needed. Conservatively assume we need a descriptor
    881	 * for every entry from our starting point in the scatterlist.
    882	 */
    883	u32 num_desc;
    884	u32 desc_w = 0;	/* Number of tx descriptors written */
    885	u32 bufcnt;	/* Number of bytes of buffer pointed to by descriptor */
    886	dma_addr_t databufptr;	/* DMA address to put in descriptor */
    887
    888	num_desc = (u32)sg_nents(sg);
    889
    890	rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
    891					      pdcs->nrxpost);
    892	if (unlikely(num_desc > rx_avail)) {
    893		pdcs->rxnobuf++;
    894		return -ENOSPC;
    895	}
    896
    897	while (sg) {
    898		if (unlikely(pdcs->rxout == (pdcs->nrxd - 1)))
    899			flags = D64_CTRL1_EOT;
    900		else
    901			flags = 0;
    902
    903		/*
    904		 * If sg buffer larger than PDC limit, split across
    905		 * multiple descriptors
    906		 */
    907		bufcnt = sg_dma_len(sg);
    908		databufptr = sg_dma_address(sg);
    909		while (bufcnt > PDC_DMA_BUF_MAX) {
    910			pdc_build_rxd(pdcs, databufptr, PDC_DMA_BUF_MAX, flags);
    911			desc_w++;
    912			bufcnt -= PDC_DMA_BUF_MAX;
    913			databufptr += PDC_DMA_BUF_MAX;
    914			if (unlikely(pdcs->rxout == (pdcs->nrxd - 1)))
    915				flags = D64_CTRL1_EOT;
    916			else
    917				flags = 0;
    918		}
    919		pdc_build_rxd(pdcs, databufptr, bufcnt, flags);
    920		desc_w++;
    921		sg = sg_next(sg);
    922	}
    923	pdcs->rx_ctx[pdcs->rx_msg_start].rxin_numd += desc_w;
    924
    925	return PDC_SUCCESS;
    926}
    927
    928/**
    929 * pdc_irq_handler() - Interrupt handler called in interrupt context.
    930 * @irq:      Interrupt number that has fired
    931 * @data:     device struct for DMA engine that generated the interrupt
    932 *
    933 * We have to clear the device interrupt status flags here. So cache the
    934 * status for later use in the thread function. Other than that, just return
    935 * WAKE_THREAD to invoke the thread function.
    936 *
    937 * Return: IRQ_WAKE_THREAD if interrupt is ours
    938 *         IRQ_NONE otherwise
    939 */
    940static irqreturn_t pdc_irq_handler(int irq, void *data)
    941{
    942	struct device *dev = (struct device *)data;
    943	struct pdc_state *pdcs = dev_get_drvdata(dev);
    944	u32 intstatus = ioread32(pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET);
    945
    946	if (unlikely(intstatus == 0))
    947		return IRQ_NONE;
    948
    949	/* Disable interrupts until soft handler runs */
    950	iowrite32(0, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
    951
    952	/* Clear interrupt flags in device */
    953	iowrite32(intstatus, pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET);
    954
    955	/* Wakeup IRQ thread */
    956	tasklet_schedule(&pdcs->rx_tasklet);
    957	return IRQ_HANDLED;
    958}
    959
    960/**
    961 * pdc_tasklet_cb() - Tasklet callback that runs the deferred processing after
    962 * a DMA receive interrupt. Reenables the receive interrupt.
    963 * @data: PDC state structure
    964 */
    965static void pdc_tasklet_cb(struct tasklet_struct *t)
    966{
    967	struct pdc_state *pdcs = from_tasklet(pdcs, t, rx_tasklet);
    968
    969	pdc_receive(pdcs);
    970
    971	/* reenable interrupts */
    972	iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
    973}
    974
    975/**
    976 * pdc_ring_init() - Allocate DMA rings and initialize constant fields of
    977 * descriptors in one ringset.
    978 * @pdcs:    PDC instance state
    979 * @ringset: index of ringset being used
    980 *
    981 * Return: PDC_SUCCESS if ring initialized
    982 *         < 0 otherwise
    983 */
    984static int pdc_ring_init(struct pdc_state *pdcs, int ringset)
    985{
    986	int i;
    987	int err = PDC_SUCCESS;
    988	struct dma64 *dma_reg;
    989	struct device *dev = &pdcs->pdev->dev;
    990	struct pdc_ring_alloc tx;
    991	struct pdc_ring_alloc rx;
    992
    993	/* Allocate tx ring */
    994	tx.vbase = dma_pool_zalloc(pdcs->ring_pool, GFP_KERNEL, &tx.dmabase);
    995	if (unlikely(!tx.vbase)) {
    996		err = -ENOMEM;
    997		goto done;
    998	}
    999
   1000	/* Allocate rx ring */
   1001	rx.vbase = dma_pool_zalloc(pdcs->ring_pool, GFP_KERNEL, &rx.dmabase);
   1002	if (unlikely(!rx.vbase)) {
   1003		err = -ENOMEM;
   1004		goto fail_dealloc;
   1005	}
   1006
   1007	dev_dbg(dev, " - base DMA addr of tx ring      %pad", &tx.dmabase);
   1008	dev_dbg(dev, " - base virtual addr of tx ring  %p", tx.vbase);
   1009	dev_dbg(dev, " - base DMA addr of rx ring      %pad", &rx.dmabase);
   1010	dev_dbg(dev, " - base virtual addr of rx ring  %p", rx.vbase);
   1011
   1012	memcpy(&pdcs->tx_ring_alloc, &tx, sizeof(tx));
   1013	memcpy(&pdcs->rx_ring_alloc, &rx, sizeof(rx));
   1014
   1015	pdcs->rxin = 0;
   1016	pdcs->rx_msg_start = 0;
   1017	pdcs->last_rx_curr = 0;
   1018	pdcs->rxout = 0;
   1019	pdcs->txin = 0;
   1020	pdcs->tx_msg_start = 0;
   1021	pdcs->txout = 0;
   1022
   1023	/* Set descriptor array base addresses */
   1024	pdcs->txd_64 = (struct dma64dd *)pdcs->tx_ring_alloc.vbase;
   1025	pdcs->rxd_64 = (struct dma64dd *)pdcs->rx_ring_alloc.vbase;
   1026
   1027	/* Tell device the base DMA address of each ring */
   1028	dma_reg = &pdcs->regs->dmaregs[ringset];
   1029
   1030	/* But first disable DMA and set curptr to 0 for both TX & RX */
   1031	iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control);
   1032	iowrite32((PDC_RX_CTL + (pdcs->rx_status_len << 1)),
   1033		  &dma_reg->dmarcv.control);
   1034	iowrite32(0, &dma_reg->dmaxmt.ptr);
   1035	iowrite32(0, &dma_reg->dmarcv.ptr);
   1036
   1037	/* Set base DMA addresses */
   1038	iowrite32(lower_32_bits(pdcs->tx_ring_alloc.dmabase),
   1039		  &dma_reg->dmaxmt.addrlow);
   1040	iowrite32(upper_32_bits(pdcs->tx_ring_alloc.dmabase),
   1041		  &dma_reg->dmaxmt.addrhigh);
   1042
   1043	iowrite32(lower_32_bits(pdcs->rx_ring_alloc.dmabase),
   1044		  &dma_reg->dmarcv.addrlow);
   1045	iowrite32(upper_32_bits(pdcs->rx_ring_alloc.dmabase),
   1046		  &dma_reg->dmarcv.addrhigh);
   1047
   1048	/* Re-enable DMA */
   1049	iowrite32(PDC_TX_CTL | PDC_TX_ENABLE, &dma_reg->dmaxmt.control);
   1050	iowrite32((PDC_RX_CTL | PDC_RX_ENABLE | (pdcs->rx_status_len << 1)),
   1051		  &dma_reg->dmarcv.control);
   1052
   1053	/* Initialize descriptors */
   1054	for (i = 0; i < PDC_RING_ENTRIES; i++) {
   1055		/* Every tx descriptor can be used for start of frame. */
   1056		if (i != pdcs->ntxpost) {
   1057			iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOF,
   1058				  &pdcs->txd_64[i].ctrl1);
   1059		} else {
   1060			/* Last descriptor in ringset. Set End of Table. */
   1061			iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOF |
   1062				  D64_CTRL1_EOT, &pdcs->txd_64[i].ctrl1);
   1063		}
   1064
   1065		/* Every rx descriptor can be used for start of frame */
   1066		if (i != pdcs->nrxpost) {
   1067			iowrite32(D64_CTRL1_SOF,
   1068				  &pdcs->rxd_64[i].ctrl1);
   1069		} else {
   1070			/* Last descriptor in ringset. Set End of Table. */
   1071			iowrite32(D64_CTRL1_SOF | D64_CTRL1_EOT,
   1072				  &pdcs->rxd_64[i].ctrl1);
   1073		}
   1074	}
   1075	return PDC_SUCCESS;
   1076
   1077fail_dealloc:
   1078	dma_pool_free(pdcs->ring_pool, tx.vbase, tx.dmabase);
   1079done:
   1080	return err;
   1081}
   1082
   1083static void pdc_ring_free(struct pdc_state *pdcs)
   1084{
   1085	if (pdcs->tx_ring_alloc.vbase) {
   1086		dma_pool_free(pdcs->ring_pool, pdcs->tx_ring_alloc.vbase,
   1087			      pdcs->tx_ring_alloc.dmabase);
   1088		pdcs->tx_ring_alloc.vbase = NULL;
   1089	}
   1090
   1091	if (pdcs->rx_ring_alloc.vbase) {
   1092		dma_pool_free(pdcs->ring_pool, pdcs->rx_ring_alloc.vbase,
   1093			      pdcs->rx_ring_alloc.dmabase);
   1094		pdcs->rx_ring_alloc.vbase = NULL;
   1095	}
   1096}
   1097
   1098/**
   1099 * pdc_desc_count() - Count the number of DMA descriptors that will be required
   1100 * for a given scatterlist. Account for the max length of a DMA buffer.
   1101 * @sg:    Scatterlist to be DMA'd
   1102 * Return: Number of descriptors required
   1103 */
   1104static u32 pdc_desc_count(struct scatterlist *sg)
   1105{
   1106	u32 cnt = 0;
   1107
   1108	while (sg) {
   1109		cnt += ((sg->length / PDC_DMA_BUF_MAX) + 1);
   1110		sg = sg_next(sg);
   1111	}
   1112	return cnt;
   1113}
   1114
   1115/**
   1116 * pdc_rings_full() - Check whether the tx ring has room for tx_cnt descriptors
   1117 * and the rx ring has room for rx_cnt descriptors.
   1118 * @pdcs:  PDC state
   1119 * @tx_cnt: The number of descriptors required in the tx ring
   1120 * @rx_cnt: The number of descriptors required i the rx ring
   1121 *
   1122 * Return: true if one of the rings does not have enough space
   1123 *         false if sufficient space is available in both rings
   1124 */
   1125static bool pdc_rings_full(struct pdc_state *pdcs, int tx_cnt, int rx_cnt)
   1126{
   1127	u32 rx_avail;
   1128	u32 tx_avail;
   1129	bool full = false;
   1130
   1131	/* Check if the tx and rx rings are likely to have enough space */
   1132	rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
   1133					      pdcs->nrxpost);
   1134	if (unlikely(rx_cnt > rx_avail)) {
   1135		pdcs->rx_ring_full++;
   1136		full = true;
   1137	}
   1138
   1139	if (likely(!full)) {
   1140		tx_avail = pdcs->ntxpost - NTXDACTIVE(pdcs->txin, pdcs->txout,
   1141						      pdcs->ntxpost);
   1142		if (unlikely(tx_cnt > tx_avail)) {
   1143			pdcs->tx_ring_full++;
   1144			full = true;
   1145		}
   1146	}
   1147	return full;
   1148}
   1149
   1150/**
   1151 * pdc_last_tx_done() - If both the tx and rx rings have at least
   1152 * PDC_RING_SPACE_MIN descriptors available, then indicate that the mailbox
   1153 * framework can submit another message.
   1154 * @chan:  mailbox channel to check
   1155 * Return: true if PDC can accept another message on this channel
   1156 */
   1157static bool pdc_last_tx_done(struct mbox_chan *chan)
   1158{
   1159	struct pdc_state *pdcs = chan->con_priv;
   1160	bool ret;
   1161
   1162	if (unlikely(pdc_rings_full(pdcs, PDC_RING_SPACE_MIN,
   1163				    PDC_RING_SPACE_MIN))) {
   1164		pdcs->last_tx_not_done++;
   1165		ret = false;
   1166	} else {
   1167		ret = true;
   1168	}
   1169	return ret;
   1170}
   1171
   1172/**
   1173 * pdc_send_data() - mailbox send_data function
   1174 * @chan:	The mailbox channel on which the data is sent. The channel
   1175 *              corresponds to a DMA ringset.
   1176 * @data:	The mailbox message to be sent. The message must be a
   1177 *              brcm_message structure.
   1178 *
   1179 * This function is registered as the send_data function for the mailbox
   1180 * controller. From the destination scatterlist in the mailbox message, it
   1181 * creates a sequence of receive descriptors in the rx ring. From the source
   1182 * scatterlist, it creates a sequence of transmit descriptors in the tx ring.
   1183 * After creating the descriptors, it writes the rx ptr and tx ptr registers to
   1184 * initiate the DMA transfer.
   1185 *
   1186 * This function does the DMA map and unmap of the src and dst scatterlists in
   1187 * the mailbox message.
   1188 *
   1189 * Return: 0 if successful
   1190 *	   -ENOTSUPP if the mailbox message is a type this driver does not
   1191 *			support
   1192 *         < 0 if an error
   1193 */
   1194static int pdc_send_data(struct mbox_chan *chan, void *data)
   1195{
   1196	struct pdc_state *pdcs = chan->con_priv;
   1197	struct device *dev = &pdcs->pdev->dev;
   1198	struct brcm_message *mssg = data;
   1199	int err = PDC_SUCCESS;
   1200	int src_nent;
   1201	int dst_nent;
   1202	int nent;
   1203	u32 tx_desc_req;
   1204	u32 rx_desc_req;
   1205
   1206	if (unlikely(mssg->type != BRCM_MESSAGE_SPU))
   1207		return -ENOTSUPP;
   1208
   1209	src_nent = sg_nents(mssg->spu.src);
   1210	if (likely(src_nent)) {
   1211		nent = dma_map_sg(dev, mssg->spu.src, src_nent, DMA_TO_DEVICE);
   1212		if (unlikely(nent == 0))
   1213			return -EIO;
   1214	}
   1215
   1216	dst_nent = sg_nents(mssg->spu.dst);
   1217	if (likely(dst_nent)) {
   1218		nent = dma_map_sg(dev, mssg->spu.dst, dst_nent,
   1219				  DMA_FROM_DEVICE);
   1220		if (unlikely(nent == 0)) {
   1221			dma_unmap_sg(dev, mssg->spu.src, src_nent,
   1222				     DMA_TO_DEVICE);
   1223			return -EIO;
   1224		}
   1225	}
   1226
   1227	/*
   1228	 * Check if the tx and rx rings have enough space. Do this prior to
   1229	 * writing any tx or rx descriptors. Need to ensure that we do not write
   1230	 * a partial set of descriptors, or write just rx descriptors but
   1231	 * corresponding tx descriptors don't fit. Note that we want this check
   1232	 * and the entire sequence of descriptor to happen without another
   1233	 * thread getting in. The channel spin lock in the mailbox framework
   1234	 * ensures this.
   1235	 */
   1236	tx_desc_req = pdc_desc_count(mssg->spu.src);
   1237	rx_desc_req = pdc_desc_count(mssg->spu.dst);
   1238	if (unlikely(pdc_rings_full(pdcs, tx_desc_req, rx_desc_req + 1)))
   1239		return -ENOSPC;
   1240
   1241	/* Create rx descriptors to SPU catch response */
   1242	err = pdc_rx_list_init(pdcs, mssg->spu.dst, mssg->ctx);
   1243	err |= pdc_rx_list_sg_add(pdcs, mssg->spu.dst);
   1244
   1245	/* Create tx descriptors to submit SPU request */
   1246	err |= pdc_tx_list_sg_add(pdcs, mssg->spu.src);
   1247	err |= pdc_tx_list_final(pdcs);	/* initiate transfer */
   1248
   1249	if (unlikely(err))
   1250		dev_err(&pdcs->pdev->dev,
   1251			"%s failed with error %d", __func__, err);
   1252
   1253	return err;
   1254}
   1255
   1256static int pdc_startup(struct mbox_chan *chan)
   1257{
   1258	return pdc_ring_init(chan->con_priv, PDC_RINGSET);
   1259}
   1260
   1261static void pdc_shutdown(struct mbox_chan *chan)
   1262{
   1263	struct pdc_state *pdcs = chan->con_priv;
   1264
   1265	if (!pdcs)
   1266		return;
   1267
   1268	dev_dbg(&pdcs->pdev->dev,
   1269		"Shutdown mailbox channel for PDC %u", pdcs->pdc_idx);
   1270	pdc_ring_free(pdcs);
   1271}
   1272
   1273/**
   1274 * pdc_hw_init() - Use the given initialization parameters to initialize the
   1275 * state for one of the PDCs.
   1276 * @pdcs:  state of the PDC
   1277 */
   1278static
   1279void pdc_hw_init(struct pdc_state *pdcs)
   1280{
   1281	struct platform_device *pdev;
   1282	struct device *dev;
   1283	struct dma64 *dma_reg;
   1284	int ringset = PDC_RINGSET;
   1285
   1286	pdev = pdcs->pdev;
   1287	dev = &pdev->dev;
   1288
   1289	dev_dbg(dev, "PDC %u initial values:", pdcs->pdc_idx);
   1290	dev_dbg(dev, "state structure:                   %p",
   1291		pdcs);
   1292	dev_dbg(dev, " - base virtual addr of hw regs    %p",
   1293		pdcs->pdc_reg_vbase);
   1294
   1295	/* initialize data structures */
   1296	pdcs->regs = (struct pdc_regs *)pdcs->pdc_reg_vbase;
   1297	pdcs->txregs_64 = (struct dma64_regs *)
   1298	    (((u8 *)pdcs->pdc_reg_vbase) +
   1299		     PDC_TXREGS_OFFSET + (sizeof(struct dma64) * ringset));
   1300	pdcs->rxregs_64 = (struct dma64_regs *)
   1301	    (((u8 *)pdcs->pdc_reg_vbase) +
   1302		     PDC_RXREGS_OFFSET + (sizeof(struct dma64) * ringset));
   1303
   1304	pdcs->ntxd = PDC_RING_ENTRIES;
   1305	pdcs->nrxd = PDC_RING_ENTRIES;
   1306	pdcs->ntxpost = PDC_RING_ENTRIES - 1;
   1307	pdcs->nrxpost = PDC_RING_ENTRIES - 1;
   1308	iowrite32(0, &pdcs->regs->intmask);
   1309
   1310	dma_reg = &pdcs->regs->dmaregs[ringset];
   1311
   1312	/* Configure DMA but will enable later in pdc_ring_init() */
   1313	iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control);
   1314
   1315	iowrite32(PDC_RX_CTL + (pdcs->rx_status_len << 1),
   1316		  &dma_reg->dmarcv.control);
   1317
   1318	/* Reset current index pointers after making sure DMA is disabled */
   1319	iowrite32(0, &dma_reg->dmaxmt.ptr);
   1320	iowrite32(0, &dma_reg->dmarcv.ptr);
   1321
   1322	if (pdcs->pdc_resp_hdr_len == PDC_SPU2_RESP_HDR_LEN)
   1323		iowrite32(PDC_CKSUM_CTRL,
   1324			  pdcs->pdc_reg_vbase + PDC_CKSUM_CTRL_OFFSET);
   1325}
   1326
   1327/**
   1328 * pdc_hw_disable() - Disable the tx and rx control in the hw.
   1329 * @pdcs: PDC state structure
   1330 *
   1331 */
   1332static void pdc_hw_disable(struct pdc_state *pdcs)
   1333{
   1334	struct dma64 *dma_reg;
   1335
   1336	dma_reg = &pdcs->regs->dmaregs[PDC_RINGSET];
   1337	iowrite32(PDC_TX_CTL, &dma_reg->dmaxmt.control);
   1338	iowrite32(PDC_RX_CTL + (pdcs->rx_status_len << 1),
   1339		  &dma_reg->dmarcv.control);
   1340}
   1341
   1342/**
   1343 * pdc_rx_buf_pool_create() - Pool of receive buffers used to catch the metadata
   1344 * header returned with each response message.
   1345 * @pdcs: PDC state structure
   1346 *
   1347 * The metadata is not returned to the mailbox client. So the PDC driver
   1348 * manages these buffers.
   1349 *
   1350 * Return: PDC_SUCCESS
   1351 *         -ENOMEM if pool creation fails
   1352 */
   1353static int pdc_rx_buf_pool_create(struct pdc_state *pdcs)
   1354{
   1355	struct platform_device *pdev;
   1356	struct device *dev;
   1357
   1358	pdev = pdcs->pdev;
   1359	dev = &pdev->dev;
   1360
   1361	pdcs->pdc_resp_hdr_len = pdcs->rx_status_len;
   1362	if (pdcs->use_bcm_hdr)
   1363		pdcs->pdc_resp_hdr_len += BCM_HDR_LEN;
   1364
   1365	pdcs->rx_buf_pool = dma_pool_create("pdc rx bufs", dev,
   1366					    pdcs->pdc_resp_hdr_len,
   1367					    RX_BUF_ALIGN, 0);
   1368	if (!pdcs->rx_buf_pool)
   1369		return -ENOMEM;
   1370
   1371	return PDC_SUCCESS;
   1372}
   1373
   1374/**
   1375 * pdc_interrupts_init() - Initialize the interrupt configuration for a PDC and
   1376 * specify a threaded IRQ handler for deferred handling of interrupts outside of
   1377 * interrupt context.
   1378 * @pdcs:   PDC state
   1379 *
   1380 * Set the interrupt mask for transmit and receive done.
   1381 * Set the lazy interrupt frame count to generate an interrupt for just one pkt.
   1382 *
   1383 * Return:  PDC_SUCCESS
   1384 *          <0 if threaded irq request fails
   1385 */
   1386static int pdc_interrupts_init(struct pdc_state *pdcs)
   1387{
   1388	struct platform_device *pdev = pdcs->pdev;
   1389	struct device *dev = &pdev->dev;
   1390	struct device_node *dn = pdev->dev.of_node;
   1391	int err;
   1392
   1393	/* interrupt configuration */
   1394	iowrite32(PDC_INTMASK, pdcs->pdc_reg_vbase + PDC_INTMASK_OFFSET);
   1395
   1396	if (pdcs->hw_type == FA_HW)
   1397		iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
   1398			  FA_RCVLAZY0_OFFSET);
   1399	else
   1400		iowrite32(PDC_LAZY_INT, pdcs->pdc_reg_vbase +
   1401			  PDC_RCVLAZY0_OFFSET);
   1402
   1403	/* read irq from device tree */
   1404	pdcs->pdc_irq = irq_of_parse_and_map(dn, 0);
   1405	dev_dbg(dev, "pdc device %s irq %u for pdcs %p",
   1406		dev_name(dev), pdcs->pdc_irq, pdcs);
   1407
   1408	err = devm_request_irq(dev, pdcs->pdc_irq, pdc_irq_handler, 0,
   1409			       dev_name(dev), dev);
   1410	if (err) {
   1411		dev_err(dev, "IRQ %u request failed with err %d\n",
   1412			pdcs->pdc_irq, err);
   1413		return err;
   1414	}
   1415	return PDC_SUCCESS;
   1416}
   1417
   1418static const struct mbox_chan_ops pdc_mbox_chan_ops = {
   1419	.send_data = pdc_send_data,
   1420	.last_tx_done = pdc_last_tx_done,
   1421	.startup = pdc_startup,
   1422	.shutdown = pdc_shutdown
   1423};
   1424
   1425/**
   1426 * pdc_mb_init() - Initialize the mailbox controller.
   1427 * @pdcs:  PDC state
   1428 *
   1429 * Each PDC is a mailbox controller. Each ringset is a mailbox channel. Kernel
   1430 * driver only uses one ringset and thus one mb channel. PDC uses the transmit
   1431 * complete interrupt to determine when a mailbox message has successfully been
   1432 * transmitted.
   1433 *
   1434 * Return: 0 on success
   1435 *         < 0 if there is an allocation or registration failure
   1436 */
   1437static int pdc_mb_init(struct pdc_state *pdcs)
   1438{
   1439	struct device *dev = &pdcs->pdev->dev;
   1440	struct mbox_controller *mbc;
   1441	int chan_index;
   1442	int err;
   1443
   1444	mbc = &pdcs->mbc;
   1445	mbc->dev = dev;
   1446	mbc->ops = &pdc_mbox_chan_ops;
   1447	mbc->num_chans = 1;
   1448	mbc->chans = devm_kcalloc(dev, mbc->num_chans, sizeof(*mbc->chans),
   1449				  GFP_KERNEL);
   1450	if (!mbc->chans)
   1451		return -ENOMEM;
   1452
   1453	mbc->txdone_irq = false;
   1454	mbc->txdone_poll = true;
   1455	mbc->txpoll_period = 1;
   1456	for (chan_index = 0; chan_index < mbc->num_chans; chan_index++)
   1457		mbc->chans[chan_index].con_priv = pdcs;
   1458
   1459	/* Register mailbox controller */
   1460	err = devm_mbox_controller_register(dev, mbc);
   1461	if (err) {
   1462		dev_crit(dev,
   1463			 "Failed to register PDC mailbox controller. Error %d.",
   1464			 err);
   1465		return err;
   1466	}
   1467	return 0;
   1468}
   1469
   1470/* Device tree API */
   1471static const int pdc_hw = PDC_HW;
   1472static const int fa_hw = FA_HW;
   1473
   1474static const struct of_device_id pdc_mbox_of_match[] = {
   1475	{.compatible = "brcm,iproc-pdc-mbox", .data = &pdc_hw},
   1476	{.compatible = "brcm,iproc-fa2-mbox", .data = &fa_hw},
   1477	{ /* sentinel */ }
   1478};
   1479MODULE_DEVICE_TABLE(of, pdc_mbox_of_match);
   1480
   1481/**
   1482 * pdc_dt_read() - Read application-specific data from device tree.
   1483 * @pdev:  Platform device
   1484 * @pdcs:  PDC state
   1485 *
   1486 * Reads the number of bytes of receive status that precede each received frame.
   1487 * Reads whether transmit and received frames should be preceded by an 8-byte
   1488 * BCM header.
   1489 *
   1490 * Return: 0 if successful
   1491 *         -ENODEV if device not available
   1492 */
   1493static int pdc_dt_read(struct platform_device *pdev, struct pdc_state *pdcs)
   1494{
   1495	struct device *dev = &pdev->dev;
   1496	struct device_node *dn = pdev->dev.of_node;
   1497	const struct of_device_id *match;
   1498	const int *hw_type;
   1499	int err;
   1500
   1501	err = of_property_read_u32(dn, "brcm,rx-status-len",
   1502				   &pdcs->rx_status_len);
   1503	if (err < 0)
   1504		dev_err(dev,
   1505			"%s failed to get DMA receive status length from device tree",
   1506			__func__);
   1507
   1508	pdcs->use_bcm_hdr = of_property_read_bool(dn, "brcm,use-bcm-hdr");
   1509
   1510	pdcs->hw_type = PDC_HW;
   1511
   1512	match = of_match_device(of_match_ptr(pdc_mbox_of_match), dev);
   1513	if (match != NULL) {
   1514		hw_type = match->data;
   1515		pdcs->hw_type = *hw_type;
   1516	}
   1517
   1518	return 0;
   1519}
   1520
   1521/**
   1522 * pdc_probe() - Probe function for PDC driver.
   1523 * @pdev:   PDC platform device
   1524 *
   1525 * Reserve and map register regions defined in device tree.
   1526 * Allocate and initialize tx and rx DMA rings.
   1527 * Initialize a mailbox controller for each PDC.
   1528 *
   1529 * Return: 0 if successful
   1530 *         < 0 if an error
   1531 */
   1532static int pdc_probe(struct platform_device *pdev)
   1533{
   1534	int err = 0;
   1535	struct device *dev = &pdev->dev;
   1536	struct resource *pdc_regs;
   1537	struct pdc_state *pdcs;
   1538
   1539	/* PDC state for one SPU */
   1540	pdcs = devm_kzalloc(dev, sizeof(*pdcs), GFP_KERNEL);
   1541	if (!pdcs) {
   1542		err = -ENOMEM;
   1543		goto cleanup;
   1544	}
   1545
   1546	pdcs->pdev = pdev;
   1547	platform_set_drvdata(pdev, pdcs);
   1548	pdcs->pdc_idx = pdcg.num_spu;
   1549	pdcg.num_spu++;
   1550
   1551	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
   1552	if (err) {
   1553		dev_warn(dev, "PDC device cannot perform DMA. Error %d.", err);
   1554		goto cleanup;
   1555	}
   1556
   1557	/* Create DMA pool for tx ring */
   1558	pdcs->ring_pool = dma_pool_create("pdc rings", dev, PDC_RING_SIZE,
   1559					  RING_ALIGN, 0);
   1560	if (!pdcs->ring_pool) {
   1561		err = -ENOMEM;
   1562		goto cleanup;
   1563	}
   1564
   1565	err = pdc_dt_read(pdev, pdcs);
   1566	if (err)
   1567		goto cleanup_ring_pool;
   1568
   1569	pdc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
   1570	if (!pdc_regs) {
   1571		err = -ENODEV;
   1572		goto cleanup_ring_pool;
   1573	}
   1574	dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
   1575		&pdc_regs->start, &pdc_regs->end);
   1576
   1577	pdcs->pdc_reg_vbase = devm_ioremap_resource(&pdev->dev, pdc_regs);
   1578	if (IS_ERR(pdcs->pdc_reg_vbase)) {
   1579		err = PTR_ERR(pdcs->pdc_reg_vbase);
   1580		goto cleanup_ring_pool;
   1581	}
   1582
   1583	/* create rx buffer pool after dt read to know how big buffers are */
   1584	err = pdc_rx_buf_pool_create(pdcs);
   1585	if (err)
   1586		goto cleanup_ring_pool;
   1587
   1588	pdc_hw_init(pdcs);
   1589
   1590	/* Init tasklet for deferred DMA rx processing */
   1591	tasklet_setup(&pdcs->rx_tasklet, pdc_tasklet_cb);
   1592
   1593	err = pdc_interrupts_init(pdcs);
   1594	if (err)
   1595		goto cleanup_buf_pool;
   1596
   1597	/* Initialize mailbox controller */
   1598	err = pdc_mb_init(pdcs);
   1599	if (err)
   1600		goto cleanup_buf_pool;
   1601
   1602	pdc_setup_debugfs(pdcs);
   1603
   1604	dev_dbg(dev, "pdc_probe() successful");
   1605	return PDC_SUCCESS;
   1606
   1607cleanup_buf_pool:
   1608	tasklet_kill(&pdcs->rx_tasklet);
   1609	dma_pool_destroy(pdcs->rx_buf_pool);
   1610
   1611cleanup_ring_pool:
   1612	dma_pool_destroy(pdcs->ring_pool);
   1613
   1614cleanup:
   1615	return err;
   1616}
   1617
   1618static int pdc_remove(struct platform_device *pdev)
   1619{
   1620	struct pdc_state *pdcs = platform_get_drvdata(pdev);
   1621
   1622	pdc_free_debugfs();
   1623
   1624	tasklet_kill(&pdcs->rx_tasklet);
   1625
   1626	pdc_hw_disable(pdcs);
   1627
   1628	dma_pool_destroy(pdcs->rx_buf_pool);
   1629	dma_pool_destroy(pdcs->ring_pool);
   1630	return 0;
   1631}
   1632
   1633static struct platform_driver pdc_mbox_driver = {
   1634	.probe = pdc_probe,
   1635	.remove = pdc_remove,
   1636	.driver = {
   1637		   .name = "brcm-iproc-pdc-mbox",
   1638		   .of_match_table = of_match_ptr(pdc_mbox_of_match),
   1639		   },
   1640};
   1641module_platform_driver(pdc_mbox_driver);
   1642
   1643MODULE_AUTHOR("Rob Rice <rob.rice@broadcom.com>");
   1644MODULE_DESCRIPTION("Broadcom PDC mailbox driver");
   1645MODULE_LICENSE("GPL v2");