cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

prep.c (21660B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Intel I/OAT DMA Linux driver
      4 * Copyright(c) 2004 - 2015 Intel Corporation.
      5 */
      6#include <linux/module.h>
      7#include <linux/pci.h>
      8#include <linux/gfp.h>
      9#include <linux/dmaengine.h>
     10#include <linux/dma-mapping.h>
     11#include <linux/prefetch.h>
     12#include "../dmaengine.h"
     13#include "registers.h"
     14#include "hw.h"
     15#include "dma.h"
     16
     17#define MAX_SCF	256
     18
     19/* provide a lookup table for setting the source address in the base or
     20 * extended descriptor of an xor or pq descriptor
     21 */
     22static const u8 xor_idx_to_desc = 0xe0;
     23static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
     24static const u8 pq_idx_to_desc = 0xf8;
     25static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
     26				       2, 2, 2, 2, 2, 2, 2 };
     27static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
     28static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
     29					0, 1, 2, 3, 4, 5, 6 };
     30
     31static void xor_set_src(struct ioat_raw_descriptor *descs[2],
     32			dma_addr_t addr, u32 offset, int idx)
     33{
     34	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
     35
     36	raw->field[xor_idx_to_field[idx]] = addr + offset;
     37}
     38
     39static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
     40{
     41	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
     42
     43	return raw->field[pq_idx_to_field[idx]];
     44}
     45
     46static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
     47{
     48	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
     49
     50	return raw->field[pq16_idx_to_field[idx]];
     51}
     52
     53static void pq_set_src(struct ioat_raw_descriptor *descs[2],
     54		       dma_addr_t addr, u32 offset, u8 coef, int idx)
     55{
     56	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
     57	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
     58
     59	raw->field[pq_idx_to_field[idx]] = addr + offset;
     60	pq->coef[idx] = coef;
     61}
     62
     63static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
     64			dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
     65{
     66	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
     67	struct ioat_pq16a_descriptor *pq16 =
     68		(struct ioat_pq16a_descriptor *)desc[1];
     69	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
     70
     71	raw->field[pq16_idx_to_field[idx]] = addr + offset;
     72
     73	if (idx < 8)
     74		pq->coef[idx] = coef;
     75	else
     76		pq16->coef[idx - 8] = coef;
     77}
     78
     79static struct ioat_sed_ent *
     80ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
     81{
     82	struct ioat_sed_ent *sed;
     83	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
     84
     85	sed = kmem_cache_alloc(ioat_sed_cache, flags);
     86	if (!sed)
     87		return NULL;
     88
     89	sed->hw_pool = hw_pool;
     90	sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
     91				 flags, &sed->dma);
     92	if (!sed->hw) {
     93		kmem_cache_free(ioat_sed_cache, sed);
     94		return NULL;
     95	}
     96
     97	return sed;
     98}
     99
    100struct dma_async_tx_descriptor *
    101ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
    102			   dma_addr_t dma_src, size_t len, unsigned long flags)
    103{
    104	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
    105	struct ioat_dma_descriptor *hw;
    106	struct ioat_ring_ent *desc;
    107	dma_addr_t dst = dma_dest;
    108	dma_addr_t src = dma_src;
    109	size_t total_len = len;
    110	int num_descs, idx, i;
    111
    112	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    113		return NULL;
    114
    115	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
    116	if (likely(num_descs) &&
    117	    ioat_check_space_lock(ioat_chan, num_descs) == 0)
    118		idx = ioat_chan->head;
    119	else
    120		return NULL;
    121	i = 0;
    122	do {
    123		size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
    124
    125		desc = ioat_get_ring_ent(ioat_chan, idx + i);
    126		hw = desc->hw;
    127
    128		hw->size = copy;
    129		hw->ctl = 0;
    130		hw->src_addr = src;
    131		hw->dst_addr = dst;
    132
    133		len -= copy;
    134		dst += copy;
    135		src += copy;
    136		dump_desc_dbg(ioat_chan, desc);
    137	} while (++i < num_descs);
    138
    139	desc->txd.flags = flags;
    140	desc->len = total_len;
    141	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
    142	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
    143	hw->ctl_f.compl_write = 1;
    144	dump_desc_dbg(ioat_chan, desc);
    145	/* we leave the channel locked to ensure in order submission */
    146
    147	return &desc->txd;
    148}
    149
    150
    151static struct dma_async_tx_descriptor *
    152__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
    153		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
    154		      size_t len, unsigned long flags)
    155{
    156	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
    157	struct ioat_ring_ent *compl_desc;
    158	struct ioat_ring_ent *desc;
    159	struct ioat_ring_ent *ext;
    160	size_t total_len = len;
    161	struct ioat_xor_descriptor *xor;
    162	struct ioat_xor_ext_descriptor *xor_ex = NULL;
    163	struct ioat_dma_descriptor *hw;
    164	int num_descs, with_ext, idx, i;
    165	u32 offset = 0;
    166	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
    167
    168	BUG_ON(src_cnt < 2);
    169
    170	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
    171	/* we need 2x the number of descriptors to cover greater than 5
    172	 * sources
    173	 */
    174	if (src_cnt > 5) {
    175		with_ext = 1;
    176		num_descs *= 2;
    177	} else
    178		with_ext = 0;
    179
    180	/* completion writes from the raid engine may pass completion
    181	 * writes from the legacy engine, so we need one extra null
    182	 * (legacy) descriptor to ensure all completion writes arrive in
    183	 * order.
    184	 */
    185	if (likely(num_descs) &&
    186	    ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
    187		idx = ioat_chan->head;
    188	else
    189		return NULL;
    190	i = 0;
    191	do {
    192		struct ioat_raw_descriptor *descs[2];
    193		size_t xfer_size = min_t(size_t,
    194					 len, 1 << ioat_chan->xfercap_log);
    195		int s;
    196
    197		desc = ioat_get_ring_ent(ioat_chan, idx + i);
    198		xor = desc->xor;
    199
    200		/* save a branch by unconditionally retrieving the
    201		 * extended descriptor xor_set_src() knows to not write
    202		 * to it in the single descriptor case
    203		 */
    204		ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
    205		xor_ex = ext->xor_ex;
    206
    207		descs[0] = (struct ioat_raw_descriptor *) xor;
    208		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
    209		for (s = 0; s < src_cnt; s++)
    210			xor_set_src(descs, src[s], offset, s);
    211		xor->size = xfer_size;
    212		xor->dst_addr = dest + offset;
    213		xor->ctl = 0;
    214		xor->ctl_f.op = op;
    215		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
    216
    217		len -= xfer_size;
    218		offset += xfer_size;
    219		dump_desc_dbg(ioat_chan, desc);
    220	} while ((i += 1 + with_ext) < num_descs);
    221
    222	/* last xor descriptor carries the unmap parameters and fence bit */
    223	desc->txd.flags = flags;
    224	desc->len = total_len;
    225	if (result)
    226		desc->result = result;
    227	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
    228
    229	/* completion descriptor carries interrupt bit */
    230	compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
    231	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
    232	hw = compl_desc->hw;
    233	hw->ctl = 0;
    234	hw->ctl_f.null = 1;
    235	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
    236	hw->ctl_f.compl_write = 1;
    237	hw->size = NULL_DESC_BUFFER_SIZE;
    238	dump_desc_dbg(ioat_chan, compl_desc);
    239
    240	/* we leave the channel locked to ensure in order submission */
    241	return &compl_desc->txd;
    242}
    243
    244struct dma_async_tx_descriptor *
    245ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
    246	       unsigned int src_cnt, size_t len, unsigned long flags)
    247{
    248	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
    249
    250	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    251		return NULL;
    252
    253	return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
    254}
    255
    256struct dma_async_tx_descriptor *
    257ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
    258		    unsigned int src_cnt, size_t len,
    259		    enum sum_check_flags *result, unsigned long flags)
    260{
    261	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
    262
    263	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    264		return NULL;
    265
    266	/* the cleanup routine only sets bits on validate failure, it
    267	 * does not clear bits on validate success... so clear it here
    268	 */
    269	*result = 0;
    270
    271	return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
    272				     src_cnt - 1, len, flags);
    273}
    274
    275static void
    276dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
    277		 struct ioat_ring_ent *ext)
    278{
    279	struct device *dev = to_dev(ioat_chan);
    280	struct ioat_pq_descriptor *pq = desc->pq;
    281	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
    282	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
    283	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
    284	int i;
    285
    286	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
    287		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
    288		" src_cnt: %d)\n",
    289		desc_id(desc), (unsigned long long) desc->txd.phys,
    290		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
    291		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
    292		pq->ctl_f.int_en, pq->ctl_f.compl_write,
    293		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
    294		pq->ctl_f.src_cnt);
    295	for (i = 0; i < src_cnt; i++)
    296		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
    297			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
    298	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
    299	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
    300	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
    301}
    302
    303static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
    304			       struct ioat_ring_ent *desc)
    305{
    306	struct device *dev = to_dev(ioat_chan);
    307	struct ioat_pq_descriptor *pq = desc->pq;
    308	struct ioat_raw_descriptor *descs[] = { (void *)pq,
    309						(void *)pq,
    310						(void *)pq };
    311	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
    312	int i;
    313
    314	if (desc->sed) {
    315		descs[1] = (void *)desc->sed->hw;
    316		descs[2] = (void *)desc->sed->hw + 64;
    317	}
    318
    319	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
    320		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
    321		" src_cnt: %d)\n",
    322		desc_id(desc), (unsigned long long) desc->txd.phys,
    323		(unsigned long long) pq->next,
    324		desc->txd.flags, pq->size, pq->ctl,
    325		pq->ctl_f.op, pq->ctl_f.int_en,
    326		pq->ctl_f.compl_write,
    327		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
    328		pq->ctl_f.src_cnt);
    329	for (i = 0; i < src_cnt; i++) {
    330		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
    331			(unsigned long long) pq16_get_src(descs, i),
    332			pq->coef[i]);
    333	}
    334	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
    335	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
    336}
    337
    338static struct dma_async_tx_descriptor *
    339__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
    340		     const dma_addr_t *dst, const dma_addr_t *src,
    341		     unsigned int src_cnt, const unsigned char *scf,
    342		     size_t len, unsigned long flags)
    343{
    344	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
    345	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
    346	struct ioat_ring_ent *compl_desc;
    347	struct ioat_ring_ent *desc;
    348	struct ioat_ring_ent *ext;
    349	size_t total_len = len;
    350	struct ioat_pq_descriptor *pq;
    351	struct ioat_pq_ext_descriptor *pq_ex = NULL;
    352	struct ioat_dma_descriptor *hw;
    353	u32 offset = 0;
    354	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
    355	int i, s, idx, with_ext, num_descs;
    356	int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
    357
    358	dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
    359	/* the engine requires at least two sources (we provide
    360	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
    361	 */
    362	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
    363
    364	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
    365	/* we need 2x the number of descriptors to cover greater than 3
    366	 * sources (we need 1 extra source in the q-only continuation
    367	 * case and 3 extra sources in the p+q continuation case.
    368	 */
    369	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
    370	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
    371		with_ext = 1;
    372		num_descs *= 2;
    373	} else
    374		with_ext = 0;
    375
    376	/* completion writes from the raid engine may pass completion
    377	 * writes from the legacy engine, so we need one extra null
    378	 * (legacy) descriptor to ensure all completion writes arrive in
    379	 * order.
    380	 */
    381	if (likely(num_descs) &&
    382	    ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
    383		idx = ioat_chan->head;
    384	else
    385		return NULL;
    386	i = 0;
    387	do {
    388		struct ioat_raw_descriptor *descs[2];
    389		size_t xfer_size = min_t(size_t, len,
    390					 1 << ioat_chan->xfercap_log);
    391
    392		desc = ioat_get_ring_ent(ioat_chan, idx + i);
    393		pq = desc->pq;
    394
    395		/* save a branch by unconditionally retrieving the
    396		 * extended descriptor pq_set_src() knows to not write
    397		 * to it in the single descriptor case
    398		 */
    399		ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
    400		pq_ex = ext->pq_ex;
    401
    402		descs[0] = (struct ioat_raw_descriptor *) pq;
    403		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
    404
    405		for (s = 0; s < src_cnt; s++)
    406			pq_set_src(descs, src[s], offset, scf[s], s);
    407
    408		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
    409		if (dmaf_p_disabled_continue(flags))
    410			pq_set_src(descs, dst[1], offset, 1, s++);
    411		else if (dmaf_continue(flags)) {
    412			pq_set_src(descs, dst[0], offset, 0, s++);
    413			pq_set_src(descs, dst[1], offset, 1, s++);
    414			pq_set_src(descs, dst[1], offset, 0, s++);
    415		}
    416		pq->size = xfer_size;
    417		pq->p_addr = dst[0] + offset;
    418		pq->q_addr = dst[1] + offset;
    419		pq->ctl = 0;
    420		pq->ctl_f.op = op;
    421		/* we turn on descriptor write back error status */
    422		if (ioat_dma->cap & IOAT_CAP_DWBES)
    423			pq->ctl_f.wb_en = result ? 1 : 0;
    424		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
    425		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
    426		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
    427
    428		len -= xfer_size;
    429		offset += xfer_size;
    430	} while ((i += 1 + with_ext) < num_descs);
    431
    432	/* last pq descriptor carries the unmap parameters and fence bit */
    433	desc->txd.flags = flags;
    434	desc->len = total_len;
    435	if (result)
    436		desc->result = result;
    437	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
    438	dump_pq_desc_dbg(ioat_chan, desc, ext);
    439
    440	if (!cb32) {
    441		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
    442		pq->ctl_f.compl_write = 1;
    443		compl_desc = desc;
    444	} else {
    445		/* completion descriptor carries interrupt bit */
    446		compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
    447		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
    448		hw = compl_desc->hw;
    449		hw->ctl = 0;
    450		hw->ctl_f.null = 1;
    451		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
    452		hw->ctl_f.compl_write = 1;
    453		hw->size = NULL_DESC_BUFFER_SIZE;
    454		dump_desc_dbg(ioat_chan, compl_desc);
    455	}
    456
    457
    458	/* we leave the channel locked to ensure in order submission */
    459	return &compl_desc->txd;
    460}
    461
    462static struct dma_async_tx_descriptor *
    463__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
    464		       const dma_addr_t *dst, const dma_addr_t *src,
    465		       unsigned int src_cnt, const unsigned char *scf,
    466		       size_t len, unsigned long flags)
    467{
    468	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
    469	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
    470	struct ioat_ring_ent *desc;
    471	size_t total_len = len;
    472	struct ioat_pq_descriptor *pq;
    473	u32 offset = 0;
    474	u8 op;
    475	int i, s, idx, num_descs;
    476
    477	/* this function is only called with 9-16 sources */
    478	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
    479
    480	dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
    481
    482	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
    483
    484	/*
    485	 * 16 source pq is only available on cb3.3 and has no completion
    486	 * write hw bug.
    487	 */
    488	if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
    489		idx = ioat_chan->head;
    490	else
    491		return NULL;
    492
    493	i = 0;
    494
    495	do {
    496		struct ioat_raw_descriptor *descs[4];
    497		size_t xfer_size = min_t(size_t, len,
    498					 1 << ioat_chan->xfercap_log);
    499
    500		desc = ioat_get_ring_ent(ioat_chan, idx + i);
    501		pq = desc->pq;
    502
    503		descs[0] = (struct ioat_raw_descriptor *) pq;
    504
    505		desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
    506		if (!desc->sed) {
    507			dev_err(to_dev(ioat_chan),
    508				"%s: no free sed entries\n", __func__);
    509			return NULL;
    510		}
    511
    512		pq->sed_addr = desc->sed->dma;
    513		desc->sed->parent = desc;
    514
    515		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
    516		descs[2] = (void *)descs[1] + 64;
    517
    518		for (s = 0; s < src_cnt; s++)
    519			pq16_set_src(descs, src[s], offset, scf[s], s);
    520
    521		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
    522		if (dmaf_p_disabled_continue(flags))
    523			pq16_set_src(descs, dst[1], offset, 1, s++);
    524		else if (dmaf_continue(flags)) {
    525			pq16_set_src(descs, dst[0], offset, 0, s++);
    526			pq16_set_src(descs, dst[1], offset, 1, s++);
    527			pq16_set_src(descs, dst[1], offset, 0, s++);
    528		}
    529
    530		pq->size = xfer_size;
    531		pq->p_addr = dst[0] + offset;
    532		pq->q_addr = dst[1] + offset;
    533		pq->ctl = 0;
    534		pq->ctl_f.op = op;
    535		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
    536		/* we turn on descriptor write back error status */
    537		if (ioat_dma->cap & IOAT_CAP_DWBES)
    538			pq->ctl_f.wb_en = result ? 1 : 0;
    539		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
    540		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
    541
    542		len -= xfer_size;
    543		offset += xfer_size;
    544	} while (++i < num_descs);
    545
    546	/* last pq descriptor carries the unmap parameters and fence bit */
    547	desc->txd.flags = flags;
    548	desc->len = total_len;
    549	if (result)
    550		desc->result = result;
    551	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
    552
    553	/* with cb3.3 we should be able to do completion w/o a null desc */
    554	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
    555	pq->ctl_f.compl_write = 1;
    556
    557	dump_pq16_desc_dbg(ioat_chan, desc);
    558
    559	/* we leave the channel locked to ensure in order submission */
    560	return &desc->txd;
    561}
    562
    563static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
    564{
    565	if (dmaf_p_disabled_continue(flags))
    566		return src_cnt + 1;
    567	else if (dmaf_continue(flags))
    568		return src_cnt + 3;
    569	else
    570		return src_cnt;
    571}
    572
    573struct dma_async_tx_descriptor *
    574ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
    575	      unsigned int src_cnt, const unsigned char *scf, size_t len,
    576	      unsigned long flags)
    577{
    578	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
    579
    580	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    581		return NULL;
    582
    583	/* specify valid address for disabled result */
    584	if (flags & DMA_PREP_PQ_DISABLE_P)
    585		dst[0] = dst[1];
    586	if (flags & DMA_PREP_PQ_DISABLE_Q)
    587		dst[1] = dst[0];
    588
    589	/* handle the single source multiply case from the raid6
    590	 * recovery path
    591	 */
    592	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
    593		dma_addr_t single_source[2];
    594		unsigned char single_source_coef[2];
    595
    596		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
    597		single_source[0] = src[0];
    598		single_source[1] = src[0];
    599		single_source_coef[0] = scf[0];
    600		single_source_coef[1] = 0;
    601
    602		return src_cnt_flags(src_cnt, flags) > 8 ?
    603			__ioat_prep_pq16_lock(chan, NULL, dst, single_source,
    604					       2, single_source_coef, len,
    605					       flags) :
    606			__ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
    607					     single_source_coef, len, flags);
    608
    609	} else {
    610		return src_cnt_flags(src_cnt, flags) > 8 ?
    611			__ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
    612					       scf, len, flags) :
    613			__ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
    614					     scf, len, flags);
    615	}
    616}
    617
    618struct dma_async_tx_descriptor *
    619ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
    620		  unsigned int src_cnt, const unsigned char *scf, size_t len,
    621		  enum sum_check_flags *pqres, unsigned long flags)
    622{
    623	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
    624
    625	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    626		return NULL;
    627
    628	/* specify valid address for disabled result */
    629	if (flags & DMA_PREP_PQ_DISABLE_P)
    630		pq[0] = pq[1];
    631	if (flags & DMA_PREP_PQ_DISABLE_Q)
    632		pq[1] = pq[0];
    633
    634	/* the cleanup routine only sets bits on validate failure, it
    635	 * does not clear bits on validate success... so clear it here
    636	 */
    637	*pqres = 0;
    638
    639	return src_cnt_flags(src_cnt, flags) > 8 ?
    640		__ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
    641				       flags) :
    642		__ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
    643				     flags);
    644}
    645
    646struct dma_async_tx_descriptor *
    647ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
    648		 unsigned int src_cnt, size_t len, unsigned long flags)
    649{
    650	unsigned char scf[MAX_SCF];
    651	dma_addr_t pq[2];
    652	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
    653
    654	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    655		return NULL;
    656
    657	if (src_cnt > MAX_SCF)
    658		return NULL;
    659
    660	memset(scf, 0, src_cnt);
    661	pq[0] = dst;
    662	flags |= DMA_PREP_PQ_DISABLE_Q;
    663	pq[1] = dst; /* specify valid address for disabled result */
    664
    665	return src_cnt_flags(src_cnt, flags) > 8 ?
    666		__ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
    667				       flags) :
    668		__ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
    669				     flags);
    670}
    671
    672struct dma_async_tx_descriptor *
    673ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
    674		     unsigned int src_cnt, size_t len,
    675		     enum sum_check_flags *result, unsigned long flags)
    676{
    677	unsigned char scf[MAX_SCF];
    678	dma_addr_t pq[2];
    679	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
    680
    681	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    682		return NULL;
    683
    684	if (src_cnt > MAX_SCF)
    685		return NULL;
    686
    687	/* the cleanup routine only sets bits on validate failure, it
    688	 * does not clear bits on validate success... so clear it here
    689	 */
    690	*result = 0;
    691
    692	memset(scf, 0, src_cnt);
    693	pq[0] = src[0];
    694	flags |= DMA_PREP_PQ_DISABLE_Q;
    695	pq[1] = pq[0]; /* specify valid address for disabled result */
    696
    697	return src_cnt_flags(src_cnt, flags) > 8 ?
    698		__ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
    699				       scf, len, flags) :
    700		__ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
    701				     scf, len, flags);
    702}
    703
    704struct dma_async_tx_descriptor *
    705ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
    706{
    707	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
    708	struct ioat_ring_ent *desc;
    709	struct ioat_dma_descriptor *hw;
    710
    711	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
    712		return NULL;
    713
    714	if (ioat_check_space_lock(ioat_chan, 1) == 0)
    715		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
    716	else
    717		return NULL;
    718
    719	hw = desc->hw;
    720	hw->ctl = 0;
    721	hw->ctl_f.null = 1;
    722	hw->ctl_f.int_en = 1;
    723	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
    724	hw->ctl_f.compl_write = 1;
    725	hw->size = NULL_DESC_BUFFER_SIZE;
    726	hw->src_addr = 0;
    727	hw->dst_addr = 0;
    728
    729	desc->txd.flags = flags;
    730	desc->len = 1;
    731
    732	dump_desc_dbg(ioat_chan, desc);
    733
    734	/* we leave the channel locked to ensure in order submission */
    735	return &desc->txd;
    736}
    737