Merge commit 'dmaengine-3.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine
Pull dmaengine changes from Dan 1/ Bartlomiej and Dan finalized a rework of the dma address unmap implementation. 2/ In the course of testing 1/ a collection of enhancements to dmatest fell out. Notably basic performance statistics, and fixed / enhanced test control through new module parameters 'run', 'wait', 'noverify', and 'verbose'. Thanks to Andriy and Linus for their review. 3/ Testing the raid related corner cases of 1/ triggered bugs in the recently added 16-source operation support in the ioatdma driver. 4/ Some minor fixes / cleanups to mv_xor and ioatdma. Conflicts: drivers/dma/dmatest.c Signed-off-by: Vinod Koul <vinod.koul@intel.com>
This commit is contained in:
@@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
|
||||
&dest, 1, &src, 1, len);
|
||||
struct dma_device *device = chan ? chan->device : NULL;
|
||||
struct dma_async_tx_descriptor *tx = NULL;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
|
||||
if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
|
||||
dma_addr_t dma_dest, dma_src;
|
||||
if (device)
|
||||
unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
|
||||
|
||||
if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
|
||||
unsigned long dma_prep_flags = 0;
|
||||
|
||||
if (submit->cb_fn)
|
||||
dma_prep_flags |= DMA_PREP_INTERRUPT;
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_prep_flags |= DMA_PREP_FENCE;
|
||||
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
|
||||
DMA_FROM_DEVICE);
|
||||
|
||||
dma_src = dma_map_page(device->dev, src, src_offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
unmap->to_cnt = 1;
|
||||
unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
unmap->from_cnt = 1;
|
||||
unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
|
||||
DMA_FROM_DEVICE);
|
||||
unmap->len = len;
|
||||
|
||||
tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
|
||||
len, dma_prep_flags);
|
||||
if (!tx) {
|
||||
dma_unmap_page(device->dev, dma_dest, len,
|
||||
DMA_FROM_DEVICE);
|
||||
dma_unmap_page(device->dev, dma_src, len,
|
||||
DMA_TO_DEVICE);
|
||||
}
|
||||
tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
|
||||
unmap->addr[0], len,
|
||||
dma_prep_flags);
|
||||
}
|
||||
|
||||
if (tx) {
|
||||
pr_debug("%s: (async) len: %zu\n", __func__, len);
|
||||
|
||||
dma_set_unmap(tx, unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
} else {
|
||||
void *dest_buf, *src_buf;
|
||||
@@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
|
||||
async_tx_sync_epilog(submit);
|
||||
}
|
||||
|
||||
dmaengine_unmap_put(unmap);
|
||||
|
||||
return tx;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_memcpy);
|
||||
|
||||
@@ -46,49 +46,24 @@ static struct page *pq_scribble_page;
|
||||
* do_async_gen_syndrome - asynchronously calculate P and/or Q
|
||||
*/
|
||||
static __async_inline struct dma_async_tx_descriptor *
|
||||
do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
|
||||
const unsigned char *scfs, unsigned int offset, int disks,
|
||||
size_t len, dma_addr_t *dma_src,
|
||||
do_async_gen_syndrome(struct dma_chan *chan,
|
||||
const unsigned char *scfs, int disks,
|
||||
struct dmaengine_unmap_data *unmap,
|
||||
enum dma_ctrl_flags dma_flags,
|
||||
struct async_submit_ctl *submit)
|
||||
{
|
||||
struct dma_async_tx_descriptor *tx = NULL;
|
||||
struct dma_device *dma = chan->device;
|
||||
enum dma_ctrl_flags dma_flags = 0;
|
||||
enum async_tx_flags flags_orig = submit->flags;
|
||||
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
|
||||
dma_async_tx_callback cb_param_orig = submit->cb_param;
|
||||
int src_cnt = disks - 2;
|
||||
unsigned char coefs[src_cnt];
|
||||
unsigned short pq_src_cnt;
|
||||
dma_addr_t dma_dest[2];
|
||||
int src_off = 0;
|
||||
int idx;
|
||||
int i;
|
||||
|
||||
/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
|
||||
if (P(blocks, disks))
|
||||
dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
|
||||
len, DMA_BIDIRECTIONAL);
|
||||
else
|
||||
dma_flags |= DMA_PREP_PQ_DISABLE_P;
|
||||
if (Q(blocks, disks))
|
||||
dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
|
||||
len, DMA_BIDIRECTIONAL);
|
||||
else
|
||||
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
|
||||
|
||||
/* convert source addresses being careful to collapse 'empty'
|
||||
* sources and update the coefficients accordingly
|
||||
*/
|
||||
for (i = 0, idx = 0; i < src_cnt; i++) {
|
||||
if (blocks[i] == NULL)
|
||||
continue;
|
||||
dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
coefs[idx] = scfs[i];
|
||||
idx++;
|
||||
}
|
||||
src_cnt = idx;
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_flags |= DMA_PREP_FENCE;
|
||||
|
||||
while (src_cnt > 0) {
|
||||
submit->flags = flags_orig;
|
||||
@@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
|
||||
if (src_cnt > pq_src_cnt) {
|
||||
submit->flags &= ~ASYNC_TX_ACK;
|
||||
submit->flags |= ASYNC_TX_FENCE;
|
||||
dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
|
||||
submit->cb_fn = NULL;
|
||||
submit->cb_param = NULL;
|
||||
} else {
|
||||
dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
|
||||
submit->cb_fn = cb_fn_orig;
|
||||
submit->cb_param = cb_param_orig;
|
||||
if (cb_fn_orig)
|
||||
dma_flags |= DMA_PREP_INTERRUPT;
|
||||
}
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_flags |= DMA_PREP_FENCE;
|
||||
|
||||
/* Since we have clobbered the src_list we are committed
|
||||
* to doing this asynchronously. Drivers force forward
|
||||
* progress in case they can not provide a descriptor
|
||||
/* Drivers force forward progress in case they can not provide
|
||||
* a descriptor
|
||||
*/
|
||||
for (;;) {
|
||||
dma_dest[0] = unmap->addr[disks - 2];
|
||||
dma_dest[1] = unmap->addr[disks - 1];
|
||||
tx = dma->device_prep_dma_pq(chan, dma_dest,
|
||||
&dma_src[src_off],
|
||||
&unmap->addr[src_off],
|
||||
pq_src_cnt,
|
||||
&coefs[src_off], len,
|
||||
&scfs[src_off], unmap->len,
|
||||
dma_flags);
|
||||
if (likely(tx))
|
||||
break;
|
||||
@@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
|
||||
dma_async_issue_pending(chan);
|
||||
}
|
||||
|
||||
dma_set_unmap(tx, unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
submit->depend_tx = tx;
|
||||
|
||||
@@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
|
||||
* set to NULL those buffers will be replaced with the raid6_zero_page
|
||||
* in the synchronous path and omitted in the hardware-asynchronous
|
||||
* path.
|
||||
*
|
||||
* 'blocks' note: if submit->scribble is NULL then the contents of
|
||||
* 'blocks' may be overwritten to perform address conversions
|
||||
* (dma_map_page() or page_address()).
|
||||
*/
|
||||
struct dma_async_tx_descriptor *
|
||||
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
|
||||
@@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
|
||||
&P(blocks, disks), 2,
|
||||
blocks, src_cnt, len);
|
||||
struct dma_device *device = chan ? chan->device : NULL;
|
||||
dma_addr_t *dma_src = NULL;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
|
||||
BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
|
||||
|
||||
if (submit->scribble)
|
||||
dma_src = submit->scribble;
|
||||
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
|
||||
dma_src = (dma_addr_t *) blocks;
|
||||
if (device)
|
||||
unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
|
||||
|
||||
if (dma_src && device &&
|
||||
if (unmap &&
|
||||
(src_cnt <= dma_maxpq(device, 0) ||
|
||||
dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
|
||||
is_dma_pq_aligned(device, offset, 0, len)) {
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
enum dma_ctrl_flags dma_flags = 0;
|
||||
unsigned char coefs[src_cnt];
|
||||
int i, j;
|
||||
|
||||
/* run the p+q asynchronously */
|
||||
pr_debug("%s: (async) disks: %d len: %zu\n",
|
||||
__func__, disks, len);
|
||||
return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
|
||||
disks, len, dma_src, submit);
|
||||
|
||||
/* convert source addresses being careful to collapse 'empty'
|
||||
* sources and update the coefficients accordingly
|
||||
*/
|
||||
unmap->len = len;
|
||||
for (i = 0, j = 0; i < src_cnt; i++) {
|
||||
if (blocks[i] == NULL)
|
||||
continue;
|
||||
unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
|
||||
len, DMA_TO_DEVICE);
|
||||
coefs[j] = raid6_gfexp[i];
|
||||
unmap->to_cnt++;
|
||||
j++;
|
||||
}
|
||||
|
||||
/*
|
||||
* DMAs use destinations as sources,
|
||||
* so use BIDIRECTIONAL mapping
|
||||
*/
|
||||
unmap->bidi_cnt++;
|
||||
if (P(blocks, disks))
|
||||
unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
|
||||
offset, len, DMA_BIDIRECTIONAL);
|
||||
else {
|
||||
unmap->addr[j++] = 0;
|
||||
dma_flags |= DMA_PREP_PQ_DISABLE_P;
|
||||
}
|
||||
|
||||
unmap->bidi_cnt++;
|
||||
if (Q(blocks, disks))
|
||||
unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
|
||||
offset, len, DMA_BIDIRECTIONAL);
|
||||
else {
|
||||
unmap->addr[j++] = 0;
|
||||
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
|
||||
}
|
||||
|
||||
tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
|
||||
dmaengine_unmap_put(unmap);
|
||||
return tx;
|
||||
}
|
||||
|
||||
dmaengine_unmap_put(unmap);
|
||||
|
||||
/* run the pq synchronously */
|
||||
pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
|
||||
|
||||
@@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
unsigned char coefs[disks-2];
|
||||
enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
|
||||
dma_addr_t *dma_src = NULL;
|
||||
int src_cnt = 0;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
|
||||
BUG_ON(disks < 4);
|
||||
|
||||
if (submit->scribble)
|
||||
dma_src = submit->scribble;
|
||||
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
|
||||
dma_src = (dma_addr_t *) blocks;
|
||||
if (device)
|
||||
unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
|
||||
|
||||
if (dma_src && device && disks <= dma_maxpq(device, 0) &&
|
||||
if (unmap && disks <= dma_maxpq(device, 0) &&
|
||||
is_dma_pq_aligned(device, offset, 0, len)) {
|
||||
struct device *dev = device->dev;
|
||||
dma_addr_t *pq = &dma_src[disks-2];
|
||||
int i;
|
||||
dma_addr_t pq[2];
|
||||
int i, j = 0, src_cnt = 0;
|
||||
|
||||
pr_debug("%s: (async) disks: %d len: %zu\n",
|
||||
__func__, disks, len);
|
||||
if (!P(blocks, disks))
|
||||
|
||||
unmap->len = len;
|
||||
for (i = 0; i < disks-2; i++)
|
||||
if (likely(blocks[i])) {
|
||||
unmap->addr[j] = dma_map_page(dev, blocks[i],
|
||||
offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
coefs[j] = raid6_gfexp[i];
|
||||
unmap->to_cnt++;
|
||||
src_cnt++;
|
||||
j++;
|
||||
}
|
||||
|
||||
if (!P(blocks, disks)) {
|
||||
pq[0] = 0;
|
||||
dma_flags |= DMA_PREP_PQ_DISABLE_P;
|
||||
else
|
||||
} else {
|
||||
pq[0] = dma_map_page(dev, P(blocks, disks),
|
||||
offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
if (!Q(blocks, disks))
|
||||
unmap->addr[j++] = pq[0];
|
||||
unmap->to_cnt++;
|
||||
}
|
||||
if (!Q(blocks, disks)) {
|
||||
pq[1] = 0;
|
||||
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
|
||||
else
|
||||
} else {
|
||||
pq[1] = dma_map_page(dev, Q(blocks, disks),
|
||||
offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
unmap->addr[j++] = pq[1];
|
||||
unmap->to_cnt++;
|
||||
}
|
||||
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_flags |= DMA_PREP_FENCE;
|
||||
for (i = 0; i < disks-2; i++)
|
||||
if (likely(blocks[i])) {
|
||||
dma_src[src_cnt] = dma_map_page(dev, blocks[i],
|
||||
offset, len,
|
||||
DMA_TO_DEVICE);
|
||||
coefs[src_cnt] = raid6_gfexp[i];
|
||||
src_cnt++;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
|
||||
tx = device->device_prep_dma_pq_val(chan, pq,
|
||||
unmap->addr,
|
||||
src_cnt,
|
||||
coefs,
|
||||
len, pqres,
|
||||
@@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
|
||||
async_tx_quiesce(&submit->depend_tx);
|
||||
dma_async_issue_pending(chan);
|
||||
}
|
||||
|
||||
dma_set_unmap(tx, unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
|
||||
return tx;
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/raid/pq.h>
|
||||
#include <linux/async_tx.h>
|
||||
#include <linux/dmaengine.h>
|
||||
|
||||
static struct dma_async_tx_descriptor *
|
||||
async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
|
||||
@@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
|
||||
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
|
||||
&dest, 1, srcs, 2, len);
|
||||
struct dma_device *dma = chan ? chan->device : NULL;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
const u8 *amul, *bmul;
|
||||
u8 ax, bx;
|
||||
u8 *a, *b, *c;
|
||||
|
||||
if (dma) {
|
||||
dma_addr_t dma_dest[2];
|
||||
dma_addr_t dma_src[2];
|
||||
if (dma)
|
||||
unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
|
||||
|
||||
if (unmap) {
|
||||
struct device *dev = dma->dev;
|
||||
dma_addr_t pq[2];
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
|
||||
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_flags |= DMA_PREP_FENCE;
|
||||
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
|
||||
dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
|
||||
dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
|
||||
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
|
||||
unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
|
||||
unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
|
||||
unmap->to_cnt = 2;
|
||||
|
||||
unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
|
||||
unmap->bidi_cnt = 1;
|
||||
/* engine only looks at Q, but expects it to follow P */
|
||||
pq[1] = unmap->addr[2];
|
||||
|
||||
unmap->len = len;
|
||||
tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
|
||||
len, dma_flags);
|
||||
if (tx) {
|
||||
dma_set_unmap(tx, unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
dmaengine_unmap_put(unmap);
|
||||
return tx;
|
||||
}
|
||||
|
||||
/* could not get a descriptor, unmap and fall through to
|
||||
* the synchronous path
|
||||
*/
|
||||
dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
|
||||
dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
|
||||
dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
|
||||
dmaengine_unmap_put(unmap);
|
||||
}
|
||||
|
||||
/* run the operation synchronously */
|
||||
@@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
|
||||
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
|
||||
&dest, 1, &src, 1, len);
|
||||
struct dma_device *dma = chan ? chan->device : NULL;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
const u8 *qmul; /* Q multiplier table */
|
||||
u8 *d, *s;
|
||||
|
||||
if (dma) {
|
||||
if (dma)
|
||||
unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
|
||||
|
||||
if (unmap) {
|
||||
dma_addr_t dma_dest[2];
|
||||
dma_addr_t dma_src[1];
|
||||
struct device *dev = dma->dev;
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
|
||||
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_flags |= DMA_PREP_FENCE;
|
||||
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
|
||||
dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
|
||||
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
|
||||
len, dma_flags);
|
||||
unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
|
||||
unmap->to_cnt++;
|
||||
unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
|
||||
dma_dest[1] = unmap->addr[1];
|
||||
unmap->bidi_cnt++;
|
||||
unmap->len = len;
|
||||
|
||||
/* this looks funny, but the engine looks for Q at
|
||||
* dma_dest[1] and ignores dma_dest[0] as a dest
|
||||
* due to DMA_PREP_PQ_DISABLE_P
|
||||
*/
|
||||
tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
|
||||
1, &coef, len, dma_flags);
|
||||
|
||||
if (tx) {
|
||||
dma_set_unmap(tx, unmap);
|
||||
dmaengine_unmap_put(unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
return tx;
|
||||
}
|
||||
@@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
|
||||
/* could not get a descriptor, unmap and fall through to
|
||||
* the synchronous path
|
||||
*/
|
||||
dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
|
||||
dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
|
||||
dmaengine_unmap_put(unmap);
|
||||
}
|
||||
|
||||
/* no channel available, or failed to allocate a descriptor, so
|
||||
|
||||
@@ -33,48 +33,31 @@
|
||||
|
||||
/* do_async_xor - dma map the pages and perform the xor with an engine */
|
||||
static __async_inline struct dma_async_tx_descriptor *
|
||||
do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
|
||||
unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
|
||||
do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
|
||||
struct async_submit_ctl *submit)
|
||||
{
|
||||
struct dma_device *dma = chan->device;
|
||||
struct dma_async_tx_descriptor *tx = NULL;
|
||||
int src_off = 0;
|
||||
int i;
|
||||
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
|
||||
void *cb_param_orig = submit->cb_param;
|
||||
enum async_tx_flags flags_orig = submit->flags;
|
||||
enum dma_ctrl_flags dma_flags;
|
||||
int xor_src_cnt = 0;
|
||||
dma_addr_t dma_dest;
|
||||
|
||||
/* map the dest bidrectional in case it is re-used as a source */
|
||||
dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
|
||||
for (i = 0; i < src_cnt; i++) {
|
||||
/* only map the dest once */
|
||||
if (!src_list[i])
|
||||
continue;
|
||||
if (unlikely(src_list[i] == dest)) {
|
||||
dma_src[xor_src_cnt++] = dma_dest;
|
||||
continue;
|
||||
}
|
||||
dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
|
||||
len, DMA_TO_DEVICE);
|
||||
}
|
||||
src_cnt = xor_src_cnt;
|
||||
enum dma_ctrl_flags dma_flags = 0;
|
||||
int src_cnt = unmap->to_cnt;
|
||||
int xor_src_cnt;
|
||||
dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
|
||||
dma_addr_t *src_list = unmap->addr;
|
||||
|
||||
while (src_cnt) {
|
||||
dma_addr_t tmp;
|
||||
|
||||
submit->flags = flags_orig;
|
||||
dma_flags = 0;
|
||||
xor_src_cnt = min(src_cnt, (int)dma->max_xor);
|
||||
/* if we are submitting additional xors, leave the chain open,
|
||||
* clear the callback parameters, and leave the destination
|
||||
* buffer mapped
|
||||
/* if we are submitting additional xors, leave the chain open
|
||||
* and clear the callback parameters
|
||||
*/
|
||||
if (src_cnt > xor_src_cnt) {
|
||||
submit->flags &= ~ASYNC_TX_ACK;
|
||||
submit->flags |= ASYNC_TX_FENCE;
|
||||
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
|
||||
submit->cb_fn = NULL;
|
||||
submit->cb_param = NULL;
|
||||
} else {
|
||||
@@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
|
||||
dma_flags |= DMA_PREP_INTERRUPT;
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_flags |= DMA_PREP_FENCE;
|
||||
/* Since we have clobbered the src_list we are committed
|
||||
* to doing this asynchronously. Drivers force forward progress
|
||||
* in case they can not provide a descriptor
|
||||
|
||||
/* Drivers force forward progress in case they can not provide a
|
||||
* descriptor
|
||||
*/
|
||||
tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
|
||||
xor_src_cnt, len, dma_flags);
|
||||
tmp = src_list[0];
|
||||
if (src_list > unmap->addr)
|
||||
src_list[0] = dma_dest;
|
||||
tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
|
||||
xor_src_cnt, unmap->len,
|
||||
dma_flags);
|
||||
src_list[0] = tmp;
|
||||
|
||||
|
||||
if (unlikely(!tx))
|
||||
async_tx_quiesce(&submit->depend_tx);
|
||||
@@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
|
||||
while (unlikely(!tx)) {
|
||||
dma_async_issue_pending(chan);
|
||||
tx = dma->device_prep_dma_xor(chan, dma_dest,
|
||||
&dma_src[src_off],
|
||||
xor_src_cnt, len,
|
||||
src_list,
|
||||
xor_src_cnt, unmap->len,
|
||||
dma_flags);
|
||||
}
|
||||
|
||||
dma_set_unmap(tx, unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
submit->depend_tx = tx;
|
||||
|
||||
if (src_cnt > xor_src_cnt) {
|
||||
/* drop completed sources */
|
||||
src_cnt -= xor_src_cnt;
|
||||
src_off += xor_src_cnt;
|
||||
|
||||
/* use the intermediate result a source */
|
||||
dma_src[--src_off] = dma_dest;
|
||||
src_cnt++;
|
||||
src_list += xor_src_cnt - 1;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
@@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
|
||||
struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
|
||||
&dest, 1, src_list,
|
||||
src_cnt, len);
|
||||
dma_addr_t *dma_src = NULL;
|
||||
struct dma_device *device = chan ? chan->device : NULL;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
|
||||
BUG_ON(src_cnt <= 1);
|
||||
|
||||
if (submit->scribble)
|
||||
dma_src = submit->scribble;
|
||||
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
|
||||
dma_src = (dma_addr_t *) src_list;
|
||||
if (device)
|
||||
unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
|
||||
|
||||
if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
int i, j;
|
||||
|
||||
if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
|
||||
/* run the xor asynchronously */
|
||||
pr_debug("%s (async): len: %zu\n", __func__, len);
|
||||
|
||||
return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
|
||||
dma_src, submit);
|
||||
unmap->len = len;
|
||||
for (i = 0, j = 0; i < src_cnt; i++) {
|
||||
if (!src_list[i])
|
||||
continue;
|
||||
unmap->to_cnt++;
|
||||
unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
|
||||
offset, len, DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
/* map it bidirectional as it may be re-used as a source */
|
||||
unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
|
||||
DMA_BIDIRECTIONAL);
|
||||
unmap->bidi_cnt = 1;
|
||||
|
||||
tx = do_async_xor(chan, unmap, submit);
|
||||
dmaengine_unmap_put(unmap);
|
||||
return tx;
|
||||
} else {
|
||||
dmaengine_unmap_put(unmap);
|
||||
/* run the xor synchronously */
|
||||
pr_debug("%s (sync): len: %zu\n", __func__, len);
|
||||
WARN_ONCE(chan, "%s: no space for dma address conversion\n",
|
||||
@@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
|
||||
struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
|
||||
struct dma_device *device = chan ? chan->device : NULL;
|
||||
struct dma_async_tx_descriptor *tx = NULL;
|
||||
dma_addr_t *dma_src = NULL;
|
||||
struct dmaengine_unmap_data *unmap = NULL;
|
||||
|
||||
BUG_ON(src_cnt <= 1);
|
||||
|
||||
if (submit->scribble)
|
||||
dma_src = submit->scribble;
|
||||
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
|
||||
dma_src = (dma_addr_t *) src_list;
|
||||
if (device)
|
||||
unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
|
||||
|
||||
if (dma_src && device && src_cnt <= device->max_xor &&
|
||||
if (unmap && src_cnt <= device->max_xor &&
|
||||
is_dma_xor_aligned(device, offset, 0, len)) {
|
||||
unsigned long dma_prep_flags = 0;
|
||||
int i;
|
||||
@@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
|
||||
dma_prep_flags |= DMA_PREP_INTERRUPT;
|
||||
if (submit->flags & ASYNC_TX_FENCE)
|
||||
dma_prep_flags |= DMA_PREP_FENCE;
|
||||
for (i = 0; i < src_cnt; i++)
|
||||
dma_src[i] = dma_map_page(device->dev, src_list[i],
|
||||
offset, len, DMA_TO_DEVICE);
|
||||
|
||||
tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
|
||||
for (i = 0; i < src_cnt; i++) {
|
||||
unmap->addr[i] = dma_map_page(device->dev, src_list[i],
|
||||
offset, len, DMA_TO_DEVICE);
|
||||
unmap->to_cnt++;
|
||||
}
|
||||
unmap->len = len;
|
||||
|
||||
tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
|
||||
len, result,
|
||||
dma_prep_flags);
|
||||
if (unlikely(!tx)) {
|
||||
@@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
|
||||
while (!tx) {
|
||||
dma_async_issue_pending(chan);
|
||||
tx = device->device_prep_dma_xor_val(chan,
|
||||
dma_src, src_cnt, len, result,
|
||||
unmap->addr, src_cnt, len, result,
|
||||
dma_prep_flags);
|
||||
}
|
||||
}
|
||||
|
||||
dma_set_unmap(tx, unmap);
|
||||
async_tx_submit(chan, tx, submit);
|
||||
} else {
|
||||
enum async_tx_flags flags_orig = submit->flags;
|
||||
@@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
|
||||
async_tx_sync_epilog(submit);
|
||||
submit->flags = flags_orig;
|
||||
}
|
||||
dmaengine_unmap_put(unmap);
|
||||
|
||||
return tx;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
#undef pr
|
||||
#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
|
||||
|
||||
#define NDISKS 16 /* Including P and Q */
|
||||
#define NDISKS 64 /* Including P and Q */
|
||||
|
||||
static struct page *dataptrs[NDISKS];
|
||||
static addr_conv_t addr_conv[NDISKS];
|
||||
@@ -219,6 +219,14 @@ static int raid6_test(void)
|
||||
err += test(11, &tests);
|
||||
err += test(12, &tests);
|
||||
}
|
||||
|
||||
/* the 24 disk case is special for ioatdma as it is the boudary point
|
||||
* at which it needs to switch from 8-source ops to 16-source
|
||||
* ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
|
||||
*/
|
||||
if (NDISKS > 24)
|
||||
err += test(24, &tests);
|
||||
|
||||
err += test(NDISKS, &tests);
|
||||
|
||||
pr("\n");
|
||||
|
||||
Reference in New Issue
Block a user