diff options
Diffstat (limited to 'drivers/dma')
34 files changed, 4801 insertions, 1110 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index d96d87c56..8c98779a1 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -332,7 +332,7 @@ config MPC512X_DMA config MV_XOR bool "Marvell XOR engine support" - depends on PLAT_ORION + depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST select DMA_ENGINE select DMA_ENGINE_RAID select ASYNC_TX_ENABLE_CHANNEL_SWITCH @@ -467,6 +467,20 @@ config TEGRA20_APB_DMA This DMA controller transfers data from memory to peripheral fifo or vice versa. It does not support memory to memory data transfer. +config TEGRA210_ADMA + bool "NVIDIA Tegra210 ADMA support" + depends on ARCH_TEGRA_210_SOC + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select PM_CLK + help + Support for the NVIDIA Tegra210 ADMA controller driver. The + DMA controller has multiple DMA channels and is used to service + various audio clients in the Tegra210 audio processing engine + (APE). This DMA controller transfers data from memory to + peripheral and vice versa. It does not support memory to + memory data transfer. + config TIMB_DMA tristate "Timberdale FPGA DMA support" depends on MFD_TIMBERDALE @@ -507,7 +521,7 @@ config XGENE_DMA config XILINX_VDMA tristate "Xilinx AXI VDMA Engine" - depends on (ARCH_ZYNQ || MICROBLAZE) + depends on (ARCH_ZYNQ || MICROBLAZE || ARM64) select DMA_ENGINE help Enable support for Xilinx AXI VDMA Soft IP. diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 6084127c1..614f28b0b 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o +obj-$(CONFIG_TEGRA210_ADMA) += tegra210-adma.o obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_TI_CPPI41) += cppi41.o obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 9b42c0588..81db1c481 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -107,16 +107,20 @@ struct pl08x_driver_data; /** * struct vendor_data - vendor-specific config parameters for PL08x derivatives * @channels: the number of channels available in this variant + * @signals: the number of request signals available from the hardware * @dualmaster: whether this version supports dual AHB masters or not. * @nomadik: whether the channels have Nomadik security extension bits * that need to be checked for permission before use and some registers are * missing * @pl080s: whether this version is a PL080S, which has separate register and * LLI word for transfer size. + * @max_transfer_size: the maximum single element transfer size for this + * PL08x variant. */ struct vendor_data { u8 config_offset; u8 channels; + u8 signals; bool dualmaster; bool nomadik; bool pl080s; @@ -235,7 +239,7 @@ struct pl08x_dma_chan { struct virt_dma_chan vc; struct pl08x_phy_chan *phychan; const char *name; - const struct pl08x_channel_data *cd; + struct pl08x_channel_data *cd; struct dma_slave_config cfg; struct pl08x_txd *at; struct pl08x_driver_data *host; @@ -1909,6 +1913,12 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, if (slave) { chan->cd = &pl08x->pd->slave_channels[i]; + /* + * Some implementations have muxed signals, whereas some + * use a mux in front of the signals and need dynamic + * assignment of signals. + */ + chan->signal = i; pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; @@ -2050,40 +2060,33 @@ static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct pl08x_driver_data *pl08x = ofdma->of_dma_data; - struct pl08x_channel_data *data; - struct pl08x_dma_chan *chan; struct dma_chan *dma_chan; + struct pl08x_dma_chan *plchan; if (!pl08x) return NULL; - if (dma_spec->args_count != 2) + if (dma_spec->args_count != 2) { + dev_err(&pl08x->adev->dev, + "DMA channel translation requires two cells\n"); return NULL; + } dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); - if (dma_chan) - return dma_get_slave_channel(dma_chan); - - chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), - GFP_KERNEL); - if (!chan) + if (!dma_chan) { + dev_err(&pl08x->adev->dev, + "DMA slave channel not found\n"); return NULL; + } - data = (void *)&chan[1]; - data->bus_id = "(none)"; - data->periph_buses = dma_spec->args[1]; - - chan->cd = data; - chan->host = pl08x; - chan->slave = true; - chan->name = data->bus_id; - chan->state = PL08X_CHAN_IDLE; - chan->signal = dma_spec->args[0]; - chan->vc.desc_free = pl08x_desc_free; - - vchan_init(&chan->vc, &pl08x->slave); + plchan = to_pl08x_chan(dma_chan); + dev_dbg(&pl08x->adev->dev, + "translated channel for signal %d\n", + dma_spec->args[0]); - return dma_get_slave_channel(&chan->vc.chan); + /* Augment channel data for applicable AHB buses */ + plchan->cd->periph_buses = dma_spec->args[1]; + return dma_get_slave_channel(dma_chan); } static int pl08x_of_probe(struct amba_device *adev, @@ -2091,9 +2094,11 @@ static int pl08x_of_probe(struct amba_device *adev, struct device_node *np) { struct pl08x_platform_data *pd; + struct pl08x_channel_data *chanp = NULL; u32 cctl_memcpy = 0; u32 val; int ret; + int i; pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); if (!pd) @@ -2195,6 +2200,27 @@ static int pl08x_of_probe(struct amba_device *adev, /* Use the buses that can access memory, obviously */ pd->memcpy_channel.periph_buses = pd->mem_buses; + /* + * Allocate channel data for all possible slave channels (one + * for each possible signal), channels will then be allocated + * for a device and have it's AHB interfaces set up at + * translation time. + */ + chanp = devm_kcalloc(&adev->dev, + pl08x->vd->signals, + sizeof(struct pl08x_channel_data), + GFP_KERNEL); + if (!chanp) + return -ENOMEM; + + pd->slave_channels = chanp; + for (i = 0; i < pl08x->vd->signals; i++) { + /* chanp->periph_buses will be assigned at translation */ + chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); + chanp++; + } + pd->num_slave_channels = pl08x->vd->signals; + pl08x->pd = pd; return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, @@ -2234,6 +2260,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_pl08x; } + /* Assign useful pointers to the driver state */ + pl08x->adev = adev; + pl08x->vd = vd; + /* Initialize memcpy engine */ dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); pl08x->memcpy.dev = &adev->dev; @@ -2284,10 +2314,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) } } - /* Assign useful pointers to the driver state */ - pl08x->adev = adev; - pl08x->vd = vd; - /* By default, AHB1 only. If dualmaster, from platform */ pl08x->lli_buses = PL08X_AHB1; pl08x->mem_buses = PL08X_AHB1; @@ -2438,6 +2464,7 @@ out_no_pl08x: static struct vendor_data vendor_pl080 = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 16, .dualmaster = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2445,6 +2472,7 @@ static struct vendor_data vendor_pl080 = { static struct vendor_data vendor_nomadik = { .config_offset = PL080_CH_CONFIG, .channels = 8, + .signals = 32, .dualmaster = true, .nomadik = true, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, @@ -2453,6 +2481,7 @@ static struct vendor_data vendor_nomadik = { static struct vendor_data vendor_pl080s = { .config_offset = PL080S_CH_CONFIG, .channels = 8, + .signals = 32, .pl080s = true, .max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK, }; @@ -2460,6 +2489,7 @@ static struct vendor_data vendor_pl080s = { static struct vendor_data vendor_pl081 = { .config_offset = PL080_CH_CONFIG, .channels = 2, + .signals = 16, .dualmaster = false, .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 8e304b1be..75bd6621d 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -242,7 +242,7 @@ struct at_xdmac_lld { u32 mbr_dus; /* Destination Microblock Stride Register */ }; - +/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ struct at_xdmac_desc { struct at_xdmac_lld lld; enum dma_transfer_direction direction; @@ -253,7 +253,7 @@ struct at_xdmac_desc { unsigned int xfer_size; struct list_head descs_list; struct list_head xfer_node; -}; +} __aligned(sizeof(u64)); static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb) { @@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, u32 cur_nda, check_nda, cur_ubc, mask, value; u8 dwidth = 0; unsigned long flags; + bool initd; ret = dma_cookie_status(chan, cookie, txstate); if (ret == DMA_COMPLETE) @@ -1424,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, residue = desc->xfer_size; /* * Flush FIFO: only relevant when the transfer is source peripheral - * synchronized. + * synchronized. Flush is needed before reading CUBC because data in + * the FIFO are not reported by CUBC. Reporting a residue of the + * transfer length while we have data in FIFO can cause issue. + * Usecase: atmel USART has a timeout which means I have received + * characters but there is no more character received for a while. On + * timeout, it requests the residue. If the data are in the DMA FIFO, + * we will return a residue of the transfer length. It means no data + * received. If an application is waiting for these data, it will hang + * since we won't have another USART timeout without receiving new + * data. */ mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC; value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM; @@ -1435,34 +1445,43 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, } /* - * When processing the residue, we need to read two registers but we - * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where - * we stand in the descriptor list and AT_XDMAC_CUBC is used - * to know how many data are remaining for the current descriptor. - * Since the dma channel is not paused to not loose data, between the - * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of - * descriptor. - * For that reason, after reading AT_XDMAC_CUBC, we check if we are - * still using the same descriptor by reading a second time - * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to - * read again AT_XDMAC_CUBC. + * The easiest way to compute the residue should be to pause the DMA + * but doing this can lead to miss some data as some devices don't + * have FIFO. + * We need to read several registers because: + * - DMA is running therefore a descriptor change is possible while + * reading these registers + * - When the block transfer is done, the value of the CUBC register + * is set to its initial value until the fetch of the next descriptor. + * This value will corrupt the residue calculation so we have to skip + * it. + * + * INITD -------- ------------ + * |____________________| + * _______________________ _______________ + * NDA @desc2 \/ @desc3 + * _______________________/\_______________ + * __________ ___________ _______________ + * CUBC 0 \/ MAX desc1 \/ MAX desc2 + * __________/\___________/\_______________ + * + * Since descriptors are aligned on 64 bits, we can assume that + * the update of NDA and CUBC is atomic. * Memory barriers are used to ensure the read order of the registers. - * A max number of retries is set because unlikely it can never ends if - * we are transferring a lot of data with small buffers. + * A max number of retries is set because unlikely it could never ends. */ - cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; - rmb(); - cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { - rmb(); check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; - - if (likely(cur_nda == check_nda)) - break; - - cur_nda = check_nda; + rmb(); + initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); rmb(); cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); + rmb(); + cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; + rmb(); + + if ((check_nda == cur_nda) && initd) + break; } if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) { @@ -1471,6 +1490,19 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, } /* + * Flush FIFO: only relevant when the transfer is source peripheral + * synchronized. Another flush is needed here because CUBC is updated + * when the controller sends the data write command. It can lead to + * report data that are not written in the memory or the device. The + * FIFO flush ensures that data are really written. + */ + if ((desc->lld.mbr_cfg & mask) == value) { + at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask); + while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS)) + cpu_relax(); + } + + /* * Remove size of all microblocks already transferred and the current * one. Then add the remaining size to transfer of the current * microblock. diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 996c4b00d..6149b27c3 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -46,6 +46,9 @@ #include "virt-dma.h" +#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14 +#define BCM2835_DMA_CHAN_NAME_SIZE 8 + struct bcm2835_dmadev { struct dma_device ddev; spinlock_t lock; @@ -73,7 +76,6 @@ struct bcm2835_chan { struct list_head node; struct dma_slave_config cfg; - bool cyclic; unsigned int dreq; int ch; @@ -82,6 +84,9 @@ struct bcm2835_chan { void __iomem *chan_base; int irq_number; + unsigned int irq_flags; + + bool is_lite_channel; }; struct bcm2835_desc { @@ -89,47 +94,104 @@ struct bcm2835_desc { struct virt_dma_desc vd; enum dma_transfer_direction dir; - struct bcm2835_cb_entry *cb_list; - unsigned int frames; size_t size; + + bool cyclic; + + struct bcm2835_cb_entry cb_list[]; }; #define BCM2835_DMA_CS 0x00 #define BCM2835_DMA_ADDR 0x04 +#define BCM2835_DMA_TI 0x08 #define BCM2835_DMA_SOURCE_AD 0x0c #define BCM2835_DMA_DEST_AD 0x10 -#define BCM2835_DMA_NEXTCB 0x1C +#define BCM2835_DMA_LEN 0x14 +#define BCM2835_DMA_STRIDE 0x18 +#define BCM2835_DMA_NEXTCB 0x1c +#define BCM2835_DMA_DEBUG 0x20 /* DMA CS Control and Status bits */ -#define BCM2835_DMA_ACTIVE BIT(0) -#define BCM2835_DMA_INT BIT(2) +#define BCM2835_DMA_ACTIVE BIT(0) /* activate the DMA */ +#define BCM2835_DMA_END BIT(1) /* current CB has ended */ +#define BCM2835_DMA_INT BIT(2) /* interrupt status */ +#define BCM2835_DMA_DREQ BIT(3) /* DREQ state */ #define BCM2835_DMA_ISPAUSED BIT(4) /* Pause requested or not active */ #define BCM2835_DMA_ISHELD BIT(5) /* Is held by DREQ flow control */ -#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last + * AXI-write to ack + */ +#define BCM2835_DMA_ERR BIT(8) +#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */ +#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */ +/* current value of TI.BCM2835_DMA_WAIT_RESP */ +#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28) +#define BCM2835_DMA_DIS_DEBUG BIT(29) /* disable debug pause signal */ #define BCM2835_DMA_ABORT BIT(30) /* Stop current CB, go to next, WO */ #define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */ +/* Transfer information bits - also bcm2835_cb.info field */ #define BCM2835_DMA_INT_EN BIT(0) +#define BCM2835_DMA_TDMODE BIT(1) /* 2D-Mode */ +#define BCM2835_DMA_WAIT_RESP BIT(3) /* wait for AXI-write to be acked */ #define BCM2835_DMA_D_INC BIT(4) -#define BCM2835_DMA_D_DREQ BIT(6) +#define BCM2835_DMA_D_WIDTH BIT(5) /* 128bit writes if set */ +#define BCM2835_DMA_D_DREQ BIT(6) /* enable DREQ for destination */ +#define BCM2835_DMA_D_IGNORE BIT(7) /* ignore destination writes */ #define BCM2835_DMA_S_INC BIT(8) -#define BCM2835_DMA_S_DREQ BIT(10) - -#define BCM2835_DMA_PER_MAP(x) ((x) << 16) +#define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */ +#define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */ +#define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */ +#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12) +#define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */ +#define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */ +#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */ + +/* debug register bits */ +#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0) +#define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1) +#define BCM2835_DMA_DEBUG_READ_ERR BIT(2) +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4 +#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4 +#define BCM2835_DMA_DEBUG_ID_SHIFT 16 +#define BCM2835_DMA_DEBUG_ID_BITS 9 +#define BCM2835_DMA_DEBUG_STATE_SHIFT 16 +#define BCM2835_DMA_DEBUG_STATE_BITS 9 +#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25 +#define BCM2835_DMA_DEBUG_VERSION_BITS 3 +#define BCM2835_DMA_DEBUG_LITE BIT(28) + +/* shared registers for all dma channels */ +#define BCM2835_DMA_INT_STATUS 0xfe0 +#define BCM2835_DMA_ENABLE 0xff0 #define BCM2835_DMA_DATA_TYPE_S8 1 #define BCM2835_DMA_DATA_TYPE_S16 2 #define BCM2835_DMA_DATA_TYPE_S32 4 #define BCM2835_DMA_DATA_TYPE_S128 16 -#define BCM2835_DMA_BULK_MASK BIT(0) -#define BCM2835_DMA_FIQ_MASK (BIT(2) | BIT(3)) - /* Valid only for channels 0 - 14, 15 has its own base address */ #define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */ #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n)) +/* the max dma length for different channels */ +#define MAX_DMA_LEN SZ_1G +#define MAX_LITE_DMA_LEN (SZ_64K - 4) + +static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c) +{ + /* lite and normal channels have different max frame length */ + return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN; +} + +/* how many frames of max_len size do we need to transfer len bytes */ +static inline size_t bcm2835_dma_frames_for_length(size_t len, + size_t max_len) +{ + return DIV_ROUND_UP(len, max_len); +} + static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d) { return container_of(d, struct bcm2835_dmadev, ddev); @@ -146,19 +208,209 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc( return container_of(t, struct bcm2835_desc, vd.tx); } -static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc) { - struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd); - int i; + size_t i; for (i = 0; i < desc->frames; i++) dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb, desc->cb_list[i].paddr); - kfree(desc->cb_list); kfree(desc); } +static void bcm2835_dma_desc_free(struct virt_dma_desc *vd) +{ + bcm2835_dma_free_cb_chain( + container_of(vd, struct bcm2835_desc, vd)); +} + +static void bcm2835_dma_create_cb_set_length( + struct bcm2835_chan *chan, + struct bcm2835_dma_cb *control_block, + size_t len, + size_t period_len, + size_t *total_len, + u32 finalextrainfo) +{ + size_t max_len = bcm2835_dma_max_frame_length(chan); + + /* set the length taking lite-channel limitations into account */ + control_block->length = min_t(u32, len, max_len); + + /* finished if we have no period_length */ + if (!period_len) + return; + + /* + * period_len means: that we need to generate + * transfers that are terminating at every + * multiple of period_len - this is typically + * used to set the interrupt flag in info + * which is required during cyclic transfers + */ + + /* have we filled in period_length yet? */ + if (*total_len + control_block->length < period_len) + return; + + /* calculate the length that remains to reach period_length */ + control_block->length = period_len - *total_len; + + /* reset total_length for next period */ + *total_len = 0; + + /* add extrainfo bits in info */ + control_block->info |= finalextrainfo; +} + +static inline size_t bcm2835_dma_count_frames_for_sg( + struct bcm2835_chan *c, + struct scatterlist *sgl, + unsigned int sg_len) +{ + size_t frames = 0; + struct scatterlist *sgent; + unsigned int i; + size_t plength = bcm2835_dma_max_frame_length(c); + + for_each_sg(sgl, sgent, sg_len, i) + frames += bcm2835_dma_frames_for_length( + sg_dma_len(sgent), plength); + + return frames; +} + +/** + * bcm2835_dma_create_cb_chain - create a control block and fills data in + * + * @chan: the @dma_chan for which we run this + * @direction: the direction in which we transfer + * @cyclic: it is a cyclic transfer + * @info: the default info bits to apply per controlblock + * @frames: number of controlblocks to allocate + * @src: the src address to assign (if the S_INC bit is set + * in @info, then it gets incremented) + * @dst: the dst address to assign (if the D_INC bit is set + * in @info, then it gets incremented) + * @buf_len: the full buffer length (may also be 0) + * @period_len: the period length when to apply @finalextrainfo + * in addition to the last transfer + * this will also break some control-blocks early + * @finalextrainfo: additional bits in last controlblock + * (or when period_len is reached in case of cyclic) + * @gfp: the GFP flag to use for allocation + */ +static struct bcm2835_desc *bcm2835_dma_create_cb_chain( + struct dma_chan *chan, enum dma_transfer_direction direction, + bool cyclic, u32 info, u32 finalextrainfo, size_t frames, + dma_addr_t src, dma_addr_t dst, size_t buf_len, + size_t period_len, gfp_t gfp) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t len = buf_len, total_len; + size_t frame; + struct bcm2835_desc *d; + struct bcm2835_cb_entry *cb_entry; + struct bcm2835_dma_cb *control_block; + + if (!frames) + return NULL; + + /* allocate and setup the descriptor. */ + d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry), + gfp); + if (!d) + return NULL; + + d->c = c; + d->dir = direction; + d->cyclic = cyclic; + + /* + * Iterate over all frames, create a control block + * for each frame and link them together. + */ + for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) { + cb_entry = &d->cb_list[frame]; + cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp, + &cb_entry->paddr); + if (!cb_entry->cb) + goto error_cb; + + /* fill in the control block */ + control_block = cb_entry->cb; + control_block->info = info; + control_block->src = src; + control_block->dst = dst; + control_block->stride = 0; + control_block->next = 0; + /* set up length in control_block if requested */ + if (buf_len) { + /* calculate length honoring period_length */ + bcm2835_dma_create_cb_set_length( + c, control_block, + len, period_len, &total_len, + cyclic ? finalextrainfo : 0); + + /* calculate new remaining length */ + len -= control_block->length; + } + + /* link this the last controlblock */ + if (frame) + d->cb_list[frame - 1].cb->next = cb_entry->paddr; + + /* update src and dst and length */ + if (src && (info & BCM2835_DMA_S_INC)) + src += control_block->length; + if (dst && (info & BCM2835_DMA_D_INC)) + dst += control_block->length; + + /* Length of total transfer */ + d->size += control_block->length; + } + + /* the last frame requires extra flags */ + d->cb_list[d->frames - 1].cb->info |= finalextrainfo; + + /* detect a size missmatch */ + if (buf_len && (d->size != buf_len)) + goto error_cb; + + return d; +error_cb: + bcm2835_dma_free_cb_chain(d); + + return NULL; +} + +static void bcm2835_dma_fill_cb_chain_with_sg( + struct dma_chan *chan, + enum dma_transfer_direction direction, + struct bcm2835_cb_entry *cb, + struct scatterlist *sgl, + unsigned int sg_len) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + size_t max_len = bcm2835_dma_max_frame_length(c); + unsigned int i, len; + dma_addr_t addr; + struct scatterlist *sgent; + + for_each_sg(sgl, sgent, sg_len, i) { + for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent); + len > 0; + addr += cb->cb->length, len -= cb->cb->length, cb++) { + if (direction == DMA_DEV_TO_MEM) + cb->cb->dst = addr; + else + cb->cb->src = addr; + cb->cb->length = min(len, max_len); + } + } +} + static int bcm2835_dma_abort(void __iomem *chan_base) { unsigned long cs; @@ -218,6 +470,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) struct bcm2835_desc *d; unsigned long flags; + /* check the shared interrupt */ + if (c->irq_flags & IRQF_SHARED) { + /* check if the interrupt is enabled */ + flags = readl(c->chan_base + BCM2835_DMA_CS); + /* if not set then we are not the reason for the irq */ + if (!(flags & BCM2835_DMA_INT)) + return IRQ_NONE; + } + spin_lock_irqsave(&c->vc.lock, flags); /* Acknowledge interrupt */ @@ -226,12 +487,18 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) d = c->desc; if (d) { - /* TODO Only works for cyclic DMA */ - vchan_cyclic_callback(&d->vd); - } + if (d->cyclic) { + /* call the cyclic callback */ + vchan_cyclic_callback(&d->vd); - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS); + /* Keep the DMA engine running */ + writel(BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); + } else { + vchan_cookie_complete(&c->desc->vd); + bcm2835_dma_start_desc(c); + } + } spin_unlock_irqrestore(&c->vc.lock, flags); @@ -252,8 +519,8 @@ static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan) return -ENOMEM; } - return request_irq(c->irq_number, - bcm2835_dma_callback, 0, "DMA IRQ", c); + return request_irq(c->irq_number, bcm2835_dma_callback, + c->irq_flags, "DMA IRQ", c); } static void bcm2835_dma_free_chan_resources(struct dma_chan *chan) @@ -339,8 +606,6 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); unsigned long flags; - c->cyclic = true; /* Nothing else is implemented */ - spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) bcm2835_dma_start_desc(c); @@ -348,122 +613,160 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&c->vc.lock, flags); } -static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( - struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, - size_t period_len, enum dma_transfer_direction direction, - unsigned long flags) +struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); - enum dma_slave_buswidth dev_width; struct bcm2835_desc *d; - dma_addr_t dev_addr; - unsigned int es, sync_type; - unsigned int frame; - int i; + u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC; + u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* if src, dst or len is not given return with an error */ + if (!src || !dst || !len) + return NULL; + + /* calculate number of frames */ + frames = bcm2835_dma_frames_for_length(len, max_len); + + /* allocate the CB chain - this also fills in the pointers */ + d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false, + info, extra, frames, + src, dst, len, 0, GFP_KERNEL); + if (!d) + return NULL; + + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg( + struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src = 0, dst = 0; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t frames; - /* Grab configuration */ if (!is_slave_direction(direction)) { - dev_err(chan->device->dev, "%s: bad direction?\n", __func__); + dev_err(chan->device->dev, + "%s: bad direction?\n", __func__); return NULL; } + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); + if (direction == DMA_DEV_TO_MEM) { - dev_addr = c->cfg.src_addr; - dev_width = c->cfg.src_addr_width; - sync_type = BCM2835_DMA_S_DREQ; + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; } else { - dev_addr = c->cfg.dst_addr; - dev_width = c->cfg.dst_addr_width; - sync_type = BCM2835_DMA_D_DREQ; + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; } - /* Bus width translates to the element size (ES) */ - switch (dev_width) { - case DMA_SLAVE_BUSWIDTH_4_BYTES: - es = BCM2835_DMA_DATA_TYPE_S32; - break; - default: - return NULL; - } + /* count frames in sg list */ + frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len); - /* Now allocate and setup the descriptor. */ - d = kzalloc(sizeof(*d), GFP_NOWAIT); + /* allocate the CB chain */ + d = bcm2835_dma_create_cb_chain(chan, direction, false, + info, extra, + frames, src, dst, 0, 0, + GFP_KERNEL); if (!d) return NULL; - d->c = c; - d->dir = direction; - d->frames = buf_len / period_len; + /* fill in frames with scatterlist pointers */ + bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list, + sgl, sg_len); - d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL); - if (!d->cb_list) { - kfree(d); + return vchan_tx_prep(&c->vc, &d->vd, flags); +} + +static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); + struct bcm2835_desc *d; + dma_addr_t src, dst; + u32 info = BCM2835_DMA_WAIT_RESP; + u32 extra = BCM2835_DMA_INT_EN; + size_t max_len = bcm2835_dma_max_frame_length(c); + size_t frames; + + /* Grab configuration */ + if (!is_slave_direction(direction)) { + dev_err(chan->device->dev, "%s: bad direction?\n", __func__); return NULL; } - /* Allocate memory for control blocks */ - for (i = 0; i < d->frames; i++) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC, - &cb_entry->paddr); - if (!cb_entry->cb) - goto error_cb; + if (!buf_len) { + dev_err(chan->device->dev, + "%s: bad buffer length (= 0)\n", __func__); + return NULL; } /* - * Iterate over all frames, create a control block - * for each frame and link them together. + * warn if buf_len is not a multiple of period_len - this may leed + * to unexpected latencies for interrupts and thus audiable clicks */ - for (frame = 0; frame < d->frames; frame++) { - struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb; - - /* Setup adresses */ - if (d->dir == DMA_DEV_TO_MEM) { - control_block->info = BCM2835_DMA_D_INC; - control_block->src = dev_addr; - control_block->dst = buf_addr + frame * period_len; - } else { - control_block->info = BCM2835_DMA_S_INC; - control_block->src = buf_addr + frame * period_len; - control_block->dst = dev_addr; - } + if (buf_len % period_len) + dev_warn_once(chan->device->dev, + "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n", + __func__, buf_len, period_len); - /* Enable interrupt */ - control_block->info |= BCM2835_DMA_INT_EN; + /* Setup DREQ channel */ + if (c->dreq != 0) + info |= BCM2835_DMA_PER_MAP(c->dreq); - /* Setup synchronization */ - if (sync_type != 0) - control_block->info |= sync_type; + if (direction == DMA_DEV_TO_MEM) { + if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + src = c->cfg.src_addr; + dst = buf_addr; + info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC; + } else { + if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) + return NULL; + dst = c->cfg.dst_addr; + src = buf_addr; + info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC; + } - /* Setup DREQ channel */ - if (c->dreq != 0) - control_block->info |= - BCM2835_DMA_PER_MAP(c->dreq); + /* calculate number of frames */ + frames = /* number of periods */ + DIV_ROUND_UP(buf_len, period_len) * + /* number of frames per period */ + bcm2835_dma_frames_for_length(period_len, max_len); - /* Length of a frame */ - control_block->length = period_len; - d->size += control_block->length; + /* + * allocate the CB chain + * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine + * implementation calls prep_dma_cyclic with interrupts disabled. + */ + d = bcm2835_dma_create_cb_chain(chan, direction, true, + info, extra, + frames, src, dst, buf_len, + period_len, GFP_NOWAIT); + if (!d) + return NULL; - /* - * Next block is the next frame. - * This DMA engine driver currently only supports cyclic DMA. - * Therefore, wrap around at number of frames. - */ - control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr; - } + /* wrap around into a loop */ + d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr; return vchan_tx_prep(&c->vc, &d->vd, flags); -error_cb: - i--; - for (; i >= 0; i--) { - struct bcm2835_cb_entry *cb_entry = &d->cb_list[i]; - - dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr); - } - - kfree(d->cb_list); - kfree(d); - return NULL; } static int bcm2835_dma_slave_config(struct dma_chan *chan, @@ -529,7 +832,8 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) return 0; } -static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) +static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, + int irq, unsigned int irq_flags) { struct bcm2835_chan *c; @@ -544,6 +848,12 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq) c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id); c->ch = chan_id; c->irq_number = irq; + c->irq_flags = irq_flags; + + /* check in DEBUG register if this is a LITE channel */ + if (readl(c->chan_base + BCM2835_DMA_DEBUG) & + BCM2835_DMA_DEBUG_LITE) + c->is_lite_channel = true; return 0; } @@ -587,9 +897,11 @@ static int bcm2835_dma_probe(struct platform_device *pdev) struct resource *res; void __iomem *base; int rc; - int i; - int irq; + int i, j; + int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1]; + int irq_flags; uint32_t chans_available; + char chan_name[BCM2835_DMA_CHAN_NAME_SIZE]; if (!pdev->dev.dma_mask) pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; @@ -615,16 +927,22 @@ static int bcm2835_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources; od->ddev.device_tx_status = bcm2835_dma_tx_status; od->ddev.device_issue_pending = bcm2835_dma_issue_pending; od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic; + od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg; + od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy; od->ddev.device_config = bcm2835_dma_slave_config; od->ddev.device_terminate_all = bcm2835_dma_terminate_all; od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); - od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) | + BIT(DMA_MEM_TO_MEM); + od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; od->ddev.dev = &pdev->dev; INIT_LIST_HEAD(&od->ddev.channels); spin_lock_init(&od->lock); @@ -640,22 +958,48 @@ static int bcm2835_dma_probe(struct platform_device *pdev) goto err_no_dma; } - /* - * Do not use the FIQ and BULK channels, - * because they are used by the GPU. - */ - chans_available &= ~(BCM2835_DMA_FIQ_MASK | BCM2835_DMA_BULK_MASK); + /* get irqs for each channel that we support */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip masked out channels */ + if (!(chans_available & (1 << i))) { + irq[i] = -1; + continue; + } - for (i = 0; i < pdev->num_resources; i++) { - irq = platform_get_irq(pdev, i); - if (irq < 0) - break; + /* get the named irq */ + snprintf(chan_name, sizeof(chan_name), "dma%i", i); + irq[i] = platform_get_irq_byname(pdev, chan_name); + if (irq[i] >= 0) + continue; - if (chans_available & (1 << i)) { - rc = bcm2835_dma_chan_init(od, i, irq); - if (rc) - goto err_no_dma; - } + /* legacy device tree case handling */ + dev_warn_once(&pdev->dev, + "missing interrupt-names property in device tree - legacy interpretation is used\n"); + /* + * in case of channel >= 11 + * use the 11th interrupt and that is shared + */ + irq[i] = platform_get_irq(pdev, i < 11 ? i : 11); + } + + /* get irqs for each channel */ + for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { + /* skip channels without irq */ + if (irq[i] < 0) + continue; + + /* check if there are other channels that also use this irq */ + irq_flags = 0; + for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++) + if ((i != j) && (irq[j] == irq[i])) { + irq_flags = IRQF_SHARED; + break; + } + + /* initialize the channel */ + rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags); + if (rc) + goto err_no_dma; } dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 0cb259c59..8c9f45fd5 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -289,7 +289,7 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) do { status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); if (time_after_eq(jiffies, dma_sync_wait_timeout)) { - pr_err("%s: timeout!\n", __func__); + dev_err(chan->device->dev, "%s: timeout!\n", __func__); return DMA_ERROR; } if (status != DMA_IN_PROGRESS) @@ -482,7 +482,8 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps) device = chan->device; /* check if the channel supports slave transactions */ - if (!test_bit(DMA_SLAVE, device->cap_mask.bits)) + if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) || + test_bit(DMA_CYCLIC, device->cap_mask.bits))) return -ENXIO; /* @@ -518,7 +519,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, struct dma_chan *chan; if (mask && !__dma_device_satisfies_mask(dev, mask)) { - pr_debug("%s: wrong capabilities\n", __func__); + dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__); return NULL; } /* devices with multiple channels need special handling as we need to @@ -533,12 +534,12 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, list_for_each_entry(chan, &dev->channels, device_node) { if (chan->client_count) { - pr_debug("%s: %s busy\n", + dev_dbg(dev->dev, "%s: %s busy\n", __func__, dma_chan_name(chan)); continue; } if (fn && !fn(chan, fn_param)) { - pr_debug("%s: %s filter said false\n", + dev_dbg(dev->dev, "%s: %s filter said false\n", __func__, dma_chan_name(chan)); continue; } @@ -567,11 +568,12 @@ static struct dma_chan *find_candidate(struct dma_device *device, if (err) { if (err == -ENODEV) { - pr_debug("%s: %s module removed\n", __func__, - dma_chan_name(chan)); + dev_dbg(device->dev, "%s: %s module removed\n", + __func__, dma_chan_name(chan)); list_del_rcu(&device->global_node); } else - pr_debug("%s: failed to get %s: (%d)\n", + dev_dbg(device->dev, + "%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); if (--device->privatecnt == 0) @@ -602,7 +604,8 @@ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) device->privatecnt++; err = dma_chan_get(chan); if (err) { - pr_debug("%s: failed to get %s: (%d)\n", + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); chan = NULL; if (--device->privatecnt == 0) @@ -814,8 +817,9 @@ void dmaengine_get(void) list_del_rcu(&device->global_node); break; } else if (err) - pr_debug("%s: failed to get %s: (%d)\n", - __func__, dma_chan_name(chan), err); + dev_dbg(chan->device->dev, + "%s: failed to get %s: (%d)\n", + __func__, dma_chan_name(chan), err); } } @@ -862,12 +866,12 @@ static bool device_has_all_tx_types(struct dma_device *device) return false; #endif - #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_MEMCPY) if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) return false; #endif - #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_XOR) if (!dma_has_cap(DMA_XOR, device->cap_mask)) return false; @@ -877,7 +881,7 @@ static bool device_has_all_tx_types(struct dma_device *device) #endif #endif - #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + #if IS_ENABLED(CONFIG_ASYNC_PQ) if (!dma_has_cap(DMA_PQ, device->cap_mask)) return false; @@ -1222,8 +1226,9 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) while (tx->cookie == -EBUSY) { if (time_after_eq(jiffies, dma_sync_wait_timeout)) { - pr_err("%s timeout waiting for descriptor submission\n", - __func__); + dev_err(tx->chan->device->dev, + "%s timeout waiting for descriptor submission\n", + __func__); return DMA_ERROR; } cpu_relax(); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 97199b3c2..edf053f73 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -45,22 +45,19 @@ DW_DMA_MSIZE_16; \ u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \ DW_DMA_MSIZE_16; \ + u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \ + _dwc->p_master : _dwc->m_master; \ + u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \ + _dwc->p_master : _dwc->m_master; \ \ (DWC_CTLL_DST_MSIZE(_dmsize) \ | DWC_CTLL_SRC_MSIZE(_smsize) \ | DWC_CTLL_LLP_D_EN \ | DWC_CTLL_LLP_S_EN \ - | DWC_CTLL_DMS(_dwc->dst_master) \ - | DWC_CTLL_SMS(_dwc->src_master)); \ + | DWC_CTLL_DMS(_dms) \ + | DWC_CTLL_SMS(_sms)); \ }) -/* - * Number of descriptors to allocate for each channel. This should be - * made configurable somehow; preferably, the clients (at least the - * ones using slave transfers) should be able to give us a hint. - */ -#define NR_DESCS_PER_CHANNEL 64 - /* The set of bus widths supported by the DMA controller */ #define DW_DMA_BUSWIDTHS \ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ @@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) return to_dw_desc(dwc->active_list.next); } -static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) { - struct dw_desc *desc, *_desc; - struct dw_desc *ret = NULL; - unsigned int i = 0; - unsigned long flags; + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { - i++; - if (async_tx_test_ack(&desc->txd)) { - list_del(&desc->desc_node); - ret = desc; - break; - } - dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); - } + cookie = dma_cookie_assign(tx); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + + list_add_tail(&desc->desc_node, &dwc->queue); spin_unlock_irqrestore(&dwc->lock, flags); + dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", + __func__, desc->txd.cookie); - dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); + return cookie; +} - return ret; +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *desc; + dma_addr_t phys; + + desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys); + if (!desc) + return NULL; + + dwc->descs_allocated++; + INIT_LIST_HEAD(&desc->tx_list); + dma_async_tx_descriptor_init(&desc->txd, &dwc->chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.phys = phys; + return desc; } -/* - * Move a descriptor, including any children, to the free list. - * `desc' must not be on any lists. - */ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) { - unsigned long flags; + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_desc *child, *_next; - if (desc) { - struct dw_desc *child; + if (unlikely(!desc)) + return; - spin_lock_irqsave(&dwc->lock, flags); - list_for_each_entry(child, &desc->tx_list, desc_node) - dev_vdbg(chan2dev(&dwc->chan), - "moving child desc %p to freelist\n", - child); - list_splice_init(&desc->tx_list, &dwc->free_list); - dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); - list_add(&desc->desc_node, &dwc->free_list); - spin_unlock_irqrestore(&dwc->lock, flags); + list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) { + list_del(&child->desc_node); + dma_pool_free(dw->desc_pool, child, child->txd.phys); + dwc->descs_allocated--; } + + dma_pool_free(dw->desc_pool, desc, desc->txd.phys); + dwc->descs_allocated--; } static void dwc_initialize(struct dw_dma_chan *dwc) @@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); - if (dwc->initialized == true) + if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) return; cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); @@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma_chan *dwc) channel_set_bit(dw, MASK.XFER, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); - dwc->initialized = true; + set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); } /*----------------------------------------------------------------------*/ -static inline unsigned int dwc_fast_ffs(unsigned long long v) -{ - /* - * We can be a lot more clever here, but this should take care - * of the most common optimization. - */ - if (!(v & 7)) - return 3; - else if (!(v & 3)) - return 2; - else if (!(v & 1)) - return 1; - return 0; -} - static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc) { dev_err(chan2dev(&dwc->chan), @@ -197,12 +193,12 @@ static inline void dwc_do_single_block(struct dw_dma_chan *dwc, * Software emulation of LLP mode relies on interrupts to continue * multi block transfer. */ - ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN; + ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN; - channel_writel(dwc, SAR, desc->lli.sar); - channel_writel(dwc, DAR, desc->lli.dar); + channel_writel(dwc, SAR, lli_read(desc, sar)); + channel_writel(dwc, DAR, lli_read(desc, dar)); channel_writel(dwc, CTL_LO, ctllo); - channel_writel(dwc, CTL_HI, desc->lli.ctlhi); + channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi)); channel_set_bit(dw, CH_EN, dwc->mask); /* Move pointer to next descriptor */ @@ -213,6 +209,7 @@ static inline void dwc_do_single_block(struct dw_dma_chan *dwc, static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); + u8 lms = DWC_LLP_LMS(dwc->m_master); unsigned long was_soft_llp; /* ASSERT: channel is idle */ @@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) dwc_initialize(dwc); - dwc->residue = first->total_len; + first->residue = first->total_len; dwc->tx_node_active = &first->tx_list; /* Submit first block */ @@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) dwc_initialize(dwc); - channel_writel(dwc, LLP, first->txd.phys); - channel_writel(dwc, CTL_LO, - DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, LLP, first->txd.phys | lms); + channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); channel_writel(dwc, CTL_HI, 0); channel_set_bit(dw, CH_EN, dwc->mask); } @@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, list_for_each_entry(child, &desc->tx_list, desc_node) async_tx_ack(&child->txd); async_tx_ack(&desc->txd); - - list_splice_init(&desc->tx_list, &dwc->free_list); - list_move(&desc->desc_node, &dwc->free_list); - - dma_descriptor_unmap(txd); + dwc_desc_put(dwc, desc); spin_unlock_irqrestore(&dwc->lock, flags); if (callback) @@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) head = &desc->tx_list; if (active != head) { - /* Update desc to reflect last sent one */ - if (active != head->next) - desc = to_dw_desc(active->prev); - - dwc->residue -= desc->len; + /* Update residue to reflect last sent descriptor */ + if (active == head->next) + desc->residue -= desc->len; + else + desc->residue -= to_dw_desc(active->prev)->len; child = to_dw_desc(active); @@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); } - dwc->residue = 0; - spin_unlock_irqrestore(&dwc->lock, flags); dwc_complete_all(dw, dwc); @@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) } if (list_empty(&dwc->active_list)) { - dwc->residue = 0; spin_unlock_irqrestore(&dwc->lock, flags); return; } @@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { /* Initial residue value */ - dwc->residue = desc->total_len; + desc->residue = desc->total_len; /* Check first descriptors addr */ - if (desc->txd.phys == llp) { + if (desc->txd.phys == DWC_LLP_LOC(llp)) { spin_unlock_irqrestore(&dwc->lock, flags); return; } /* Check first descriptors llp */ - if (desc->lli.llp == llp) { + if (lli_read(desc, llp) == llp) { /* This one is currently in progress */ - dwc->residue -= dwc_get_sent(dwc); + desc->residue -= dwc_get_sent(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return; } - dwc->residue -= desc->len; + desc->residue -= desc->len; list_for_each_entry(child, &desc->tx_list, desc_node) { - if (child->lli.llp == llp) { + if (lli_read(child, llp) == llp) { /* Currently in progress */ - dwc->residue -= dwc_get_sent(dwc); + desc->residue -= dwc_get_sent(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return; } - dwc->residue -= child->len; + desc->residue -= child->len; } /* @@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) spin_unlock_irqrestore(&dwc->lock, flags); } -static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) +static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc) { dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", - lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo); + lli_read(desc, sar), + lli_read(desc, dar), + lli_read(desc, llp), + lli_read(desc, ctlhi), + lli_read(desc, ctllo)); } static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) */ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n" " cookie: %d\n", bad_desc->txd.cookie); - dwc_dump_lli(dwc, &bad_desc->lli); + dwc_dump_lli(dwc, bad_desc); list_for_each_entry(child, &bad_desc->tx_list, desc_node) - dwc_dump_lli(dwc, &child->lli); + dwc_dump_lli(dwc, child); spin_unlock_irqrestore(&dwc->lock, flags); @@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, */ if (unlikely(status_err & dwc->mask) || unlikely(status_xfer & dwc->mask)) { - int i; + unsigned int i; dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n", @@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, dma_writel(dw, CLEAR.XFER, dwc->mask); for (i = 0; i < dwc->cdesc->periods; i++) - dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); + dwc_dump_lli(dwc, dwc->cdesc->desc[i]); spin_unlock_irqrestore(&dwc->lock, flags); } @@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long data) u32 status_block; u32 status_xfer; u32 status_err; - int i; + unsigned int i; status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); @@ -658,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /*----------------------------------------------------------------------*/ -static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct dw_desc *desc = txd_to_dw_desc(tx); - struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); - dma_cookie_t cookie; - unsigned long flags; - - spin_lock_irqsave(&dwc->lock, flags); - cookie = dma_cookie_assign(tx); - - /* - * REVISIT: We should attempt to chain as many descriptors as - * possible, perhaps even appending to those already submitted - * for DMA. But this is hard to do in a race-free manner. - */ - - dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->queue); - - spin_unlock_irqrestore(&dwc->lock, flags); - - return cookie; -} - static struct dma_async_tx_descriptor * dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -693,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, struct dw_desc *prev; size_t xfer_count; size_t offset; + u8 m_master = dwc->m_master; unsigned int src_width; unsigned int dst_width; - unsigned int data_width; + unsigned int data_width = dw->pdata->data_width[m_master]; u32 ctllo; + u8 lms = DWC_LLP_LMS(m_master); dev_vdbg(chan2dev(chan), "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__, @@ -709,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dwc->direction = DMA_MEM_TO_MEM; - data_width = min_t(unsigned int, dw->data_width[dwc->src_master], - dw->data_width[dwc->dst_master]); - - src_width = dst_width = min_t(unsigned int, data_width, - dwc_fast_ffs(src | dest | len)); + src_width = dst_width = __ffs(data_width | src | dest | len); ctllo = DWC_DEFAULT_CTLLO(chan) | DWC_CTLL_DST_WIDTH(dst_width) @@ -731,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (!desc) goto err_desc_get; - desc->lli.sar = src + offset; - desc->lli.dar = dest + offset; - desc->lli.ctllo = ctllo; - desc->lli.ctlhi = xfer_count; + lli_write(desc, sar, src + offset); + lli_write(desc, dar, dest + offset); + lli_write(desc, ctllo, ctllo); + lli_write(desc, ctlhi, xfer_count); desc->len = xfer_count << src_width; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } if (flags & DMA_PREP_INTERRUPT) /* Trigger interrupt after last block */ - prev->lli.ctllo |= DWC_CTLL_INT_EN; + lli_set(prev, ctllo, DWC_CTLL_INT_EN); prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); first->txd.flags = flags; first->total_len = len; @@ -773,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dw_desc *prev; struct dw_desc *first; u32 ctllo; + u8 m_master = dwc->m_master; + u8 lms = DWC_LLP_LMS(m_master); dma_addr_t reg; unsigned int reg_width; unsigned int mem_width; - unsigned int data_width; + unsigned int data_width = dw->pdata->data_width[m_master]; unsigned int i; struct scatterlist *sg; size_t total_len = 0; @@ -802,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : DWC_CTLL_FC(DW_DMA_FC_D_M2P); - data_width = dw->data_width[dwc->src_master]; - for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; u32 len, dlen, mem; @@ -811,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = min_t(unsigned int, - data_width, dwc_fast_ffs(mem | len)); + mem_width = __ffs(data_width | mem | len); slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) goto err_desc_get; - desc->lli.sar = mem; - desc->lli.dar = reg; - desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); + lli_write(desc, sar, mem); + lli_write(desc, dar, reg); + lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); if ((len >> mem_width) > dwc->block_size) { dlen = dwc->block_size << mem_width; mem += dlen; @@ -831,15 +797,14 @@ slave_sg_todev_fill_desc: len = 0; } - desc->lli.ctlhi = dlen >> mem_width; + lli_write(desc, ctlhi, dlen >> mem_width); desc->len = dlen; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; total_len += dlen; @@ -859,8 +824,6 @@ slave_sg_todev_fill_desc: ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : DWC_CTLL_FC(DW_DMA_FC_D_P2M); - data_width = dw->data_width[dwc->dst_master]; - for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; u32 len, dlen, mem; @@ -868,17 +831,16 @@ slave_sg_todev_fill_desc: mem = sg_dma_address(sg); len = sg_dma_len(sg); - mem_width = min_t(unsigned int, - data_width, dwc_fast_ffs(mem | len)); + mem_width = __ffs(data_width | mem | len); slave_sg_fromdev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) goto err_desc_get; - desc->lli.sar = reg; - desc->lli.dar = mem; - desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); + lli_write(desc, sar, reg); + lli_write(desc, dar, mem); + lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); if ((len >> reg_width) > dwc->block_size) { dlen = dwc->block_size << reg_width; mem += dlen; @@ -887,15 +849,14 @@ slave_sg_fromdev_fill_desc: dlen = len; len = 0; } - desc->lli.ctlhi = dlen >> reg_width; + lli_write(desc, ctlhi, dlen >> reg_width); desc->len = dlen; if (!first) { first = desc; } else { - prev->lli.llp = desc->txd.phys; - list_add_tail(&desc->desc_node, - &first->tx_list); + lli_write(prev, llp, desc->txd.phys | lms); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; total_len += dlen; @@ -910,9 +871,10 @@ slave_sg_fromdev_fill_desc: if (flags & DMA_PREP_INTERRUPT) /* Trigger interrupt after last block */ - prev->lli.ctllo |= DWC_CTLL_INT_EN; + lli_set(prev, ctllo, DWC_CTLL_INT_EN); prev->lli.llp = 0; + lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); first->total_len = total_len; return &first->txd; @@ -937,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) dwc->src_id = dws->src_id; dwc->dst_id = dws->dst_id; - dwc->src_master = dws->src_master; - dwc->dst_master = dws->dst_master; + dwc->m_master = dws->m_master; + dwc->p_master = dws->p_master; return true; } @@ -991,7 +953,7 @@ static int dwc_pause(struct dma_chan *chan) while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) udelay(2); - dwc->paused = true; + set_bit(DW_DMA_IS_PAUSED, &dwc->flags); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1004,7 +966,7 @@ static inline void dwc_chan_resume(struct dw_dma_chan *dwc) channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); - dwc->paused = false; + clear_bit(DW_DMA_IS_PAUSED, &dwc->flags); } static int dwc_resume(struct dma_chan *chan) @@ -1012,12 +974,10 @@ static int dwc_resume(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); unsigned long flags; - if (!dwc->paused) - return 0; - spin_lock_irqsave(&dwc->lock, flags); - dwc_chan_resume(dwc); + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) + dwc_chan_resume(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1053,16 +1013,37 @@ static int dwc_terminate_all(struct dma_chan *chan) return 0; } -static inline u32 dwc_get_residue(struct dw_dma_chan *dwc) +static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c) { + struct dw_desc *desc; + + list_for_each_entry(desc, &dwc->active_list, desc_node) + if (desc->txd.cookie == c) + return desc; + + return NULL; +} + +static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie) +{ + struct dw_desc *desc; unsigned long flags; u32 residue; spin_lock_irqsave(&dwc->lock, flags); - residue = dwc->residue; - if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) - residue -= dwc_get_sent(dwc); + desc = dwc_find_desc(dwc, cookie); + if (desc) { + if (desc == dwc_first_active(dwc)) { + residue = desc->residue; + if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) + residue -= dwc_get_sent(dwc); + } else { + residue = desc->total_len; + } + } else { + residue = 0; + } spin_unlock_irqrestore(&dwc->lock, flags); return residue; @@ -1083,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan, dwc_scan_descriptors(to_dw_dma(chan->device), dwc); ret = dma_cookie_status(chan, cookie, txstate); - if (ret != DMA_COMPLETE) - dma_set_residue(txstate, dwc_get_residue(dwc)); + if (ret == DMA_COMPLETE) + return ret; + + dma_set_residue(txstate, dwc_get_residue(dwc, cookie)); - if (dwc->paused && ret == DMA_IN_PROGRESS) + if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS) return DMA_PAUSED; return ret; @@ -1107,7 +1090,7 @@ static void dwc_issue_pending(struct dma_chan *chan) static void dw_dma_off(struct dw_dma *dw) { - int i; + unsigned int i; dma_writel(dw, CFG, 0); @@ -1121,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw) cpu_relax(); for (i = 0; i < dw->dma.chancnt; i++) - dw->chan[i].initialized = false; + clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); } static void dw_dma_on(struct dw_dma *dw) @@ -1133,9 +1116,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - struct dw_desc *desc; - int i; - unsigned long flags; dev_vdbg(chan2dev(chan), "%s\n", __func__); @@ -1166,48 +1146,13 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) dw_dma_on(dw); dw->in_use |= dwc->mask; - spin_lock_irqsave(&dwc->lock, flags); - i = dwc->descs_allocated; - while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { - dma_addr_t phys; - - spin_unlock_irqrestore(&dwc->lock, flags); - - desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys); - if (!desc) - goto err_desc_alloc; - - memset(desc, 0, sizeof(struct dw_desc)); - - INIT_LIST_HEAD(&desc->tx_list); - dma_async_tx_descriptor_init(&desc->txd, chan); - desc->txd.tx_submit = dwc_tx_submit; - desc->txd.flags = DMA_CTRL_ACK; - desc->txd.phys = phys; - - dwc_desc_put(dwc, desc); - - spin_lock_irqsave(&dwc->lock, flags); - i = ++dwc->descs_allocated; - } - - spin_unlock_irqrestore(&dwc->lock, flags); - - dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); - - return i; - -err_desc_alloc: - dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); - - return i; + return 0; } static void dwc_free_chan_resources(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - struct dw_desc *desc, *_desc; unsigned long flags; LIST_HEAD(list); @@ -1220,17 +1165,15 @@ static void dwc_free_chan_resources(struct dma_chan *chan) BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); spin_lock_irqsave(&dwc->lock, flags); - list_splice_init(&dwc->free_list, &list); - dwc->descs_allocated = 0; /* Clear custom channel configuration */ dwc->src_id = 0; dwc->dst_id = 0; - dwc->src_master = 0; - dwc->dst_master = 0; + dwc->m_master = 0; + dwc->p_master = 0; - dwc->initialized = false; + clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); @@ -1244,11 +1187,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) if (!dw->in_use) dw_dma_off(dw); - list_for_each_entry_safe(desc, _desc, &list, desc_node) { - dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); - dma_pool_free(dw->desc_pool, desc, desc->txd.phys); - } - dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } @@ -1326,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, struct dw_cyclic_desc *retval = NULL; struct dw_desc *desc; struct dw_desc *last = NULL; + u8 lms = DWC_LLP_LMS(dwc->m_master); unsigned long was_cyclic; unsigned int reg_width; unsigned int periods; @@ -1379,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, retval = ERR_PTR(-ENOMEM); - if (periods > NR_DESCS_PER_CHANNEL) - goto out_err; - cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); if (!cdesc) goto out_err; @@ -1397,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, switch (direction) { case DMA_MEM_TO_DEV: - desc->lli.dar = sconfig->dst_addr; - desc->lli.sar = buf_addr + (period_len * i); - desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_FIX - | DWC_CTLL_SRC_INC - | DWC_CTLL_INT_EN); - - desc->lli.ctllo |= sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_M2P) : - DWC_CTLL_FC(DW_DMA_FC_D_M2P); + lli_write(desc, dar, sconfig->dst_addr); + lli_write(desc, sar, buf_addr + period_len * i); + lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC + | DWC_CTLL_INT_EN)); + + lli_set(desc, ctllo, sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P)); break; case DMA_DEV_TO_MEM: - desc->lli.dar = buf_addr + (period_len * i); - desc->lli.sar = sconfig->src_addr; - desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_DST_INC - | DWC_CTLL_SRC_FIX - | DWC_CTLL_INT_EN); - - desc->lli.ctllo |= sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_P2M) : - DWC_CTLL_FC(DW_DMA_FC_D_P2M); + lli_write(desc, dar, buf_addr + period_len * i); + lli_write(desc, sar, sconfig->src_addr); + lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX + | DWC_CTLL_INT_EN)); + + lli_set(desc, ctllo, sconfig->device_fc ? + DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M)); break; default: break; } - desc->lli.ctlhi = (period_len >> reg_width); + lli_write(desc, ctlhi, period_len >> reg_width); cdesc->desc[i] = desc; if (last) - last->lli.llp = desc->txd.phys; + lli_write(last, llp, desc->txd.phys | lms); last = desc; } /* Let's make a cyclic list */ - last->lli.llp = cdesc->desc[0]->txd.phys; + lli_write(last, llp, cdesc->desc[0]->txd.phys | lms); dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf %pad len %zu period %zu periods %d\n", @@ -1471,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); struct dw_cyclic_desc *cdesc = dwc->cdesc; - int i; + unsigned int i; unsigned long flags; dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__); @@ -1495,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan *chan) kfree(cdesc->desc); kfree(cdesc); + dwc->cdesc = NULL; + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); } EXPORT_SYMBOL(dw_dma_cyclic_free); /*----------------------------------------------------------------------*/ -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) +int dw_dma_probe(struct dw_dma_chip *chip) { + struct dw_dma_platform_data *pdata; struct dw_dma *dw; bool autocfg = false; unsigned int dw_params; - unsigned int max_blk_size = 0; + unsigned int i; int err; - int i; dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); if (!dw) return -ENOMEM; + dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); + if (!dw->pdata) + return -ENOMEM; + dw->regs = chip->regs; chip->dw = dw; pm_runtime_get_sync(chip->dev); - if (!pdata) { - dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); + if (!chip->pdata) { + dw_params = dma_readl(dw, DW_PARAMS); dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); autocfg = dw_params >> DW_PARAMS_EN & 1; @@ -1529,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; } - pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - err = -ENOMEM; - goto err_pdata; - } + /* Reassign the platform data pointer */ + pdata = dw->pdata; /* Get hardware configuration parameters */ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; for (i = 0; i < pdata->nr_masters; i++) { pdata->data_width[i] = - (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; + 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3); } - max_blk_size = dma_readl(dw, MAX_BLK_SIZE); + pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); /* Fill platform data with the default values */ pdata->is_private = true; pdata->is_memcpy = true; pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; pdata->chan_priority = CHAN_PRIORITY_ASCENDING; - } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { err = -EINVAL; goto err_pdata; + } else { + memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); + + /* Reassign the platform data pointer */ + pdata = dw->pdata; } dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), @@ -1561,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) goto err_pdata; } - /* Get hardware configuration parameters */ - dw->nr_masters = pdata->nr_masters; - for (i = 0; i < dw->nr_masters; i++) - dw->data_width[i] = pdata->data_width[i]; - /* Calculate all channel mask before DMA setup */ dw->all_chan_mask = (1 << pdata->nr_channels) - 1; @@ -1612,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) INIT_LIST_HEAD(&dwc->active_list); INIT_LIST_HEAD(&dwc->queue); - INIT_LIST_HEAD(&dwc->free_list); channel_clear_bit(dw, CH_EN, dwc->mask); @@ -1620,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { - unsigned int dwc_params; unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; - void __iomem *addr = chip->regs + r * sizeof(u32); - - dwc_params = dma_read_byaddr(addr, DWC_PARAMS); + void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; + unsigned int dwc_params = dma_readl_native(addr); dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, dwc_params); @@ -1635,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) * up to 0x0a for 4095. */ dwc->block_size = - (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1; + (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; dwc->nollp = (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0; } else { dwc->block_size = pdata->block_size; /* Check if channel supports multi block transfer */ - channel_writel(dwc, LLP, 0xfffffffc); - dwc->nollp = - (channel_readl(dwc, LLP) & 0xfffffffc) == 0; + channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff)); + dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0; channel_writel(dwc, LLP, 0); } } diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 358f9689a..0ae6c3b1d 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -17,8 +17,8 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) { + const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; struct dw_dma_chip *chip; - struct dw_dma_platform_data *pdata = (void *)pid->driver_data; int ret; ret = pcim_enable_device(pdev); @@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) chip->dev = &pdev->dev; chip->regs = pcim_iomap_table(pdev)[0]; chip->irq = pdev->irq; + chip->pdata = pdata; - ret = dw_dma_probe(chip, pdata); + ret = dw_dma_probe(chip); if (ret) return ret; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 26edbe3a2..5bda0eb9f 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, slave.src_id = dma_spec->args[0]; slave.dst_id = dma_spec->args[0]; - slave.src_master = dma_spec->args[1]; - slave.dst_master = dma_spec->args[2]; + slave.m_master = dma_spec->args[1]; + slave.p_master = dma_spec->args[2]; if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || - slave.src_master >= dw->nr_masters || - slave.dst_master >= dw->nr_masters)) + slave.m_master >= dw->pdata->nr_masters || + slave.p_master >= dw->pdata->nr_masters)) return NULL; dma_cap_zero(cap); @@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) .dma_dev = dma_spec->dev, .src_id = dma_spec->slave_id, .dst_id = dma_spec->slave_id, - .src_master = 1, - .dst_master = 0, + .m_master = 0, + .p_master = 1, }; return dw_dma_filter(chan, &slave); @@ -103,6 +103,7 @@ dw_dma_parse_dt(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct dw_dma_platform_data *pdata; u32 tmp, arr[DW_DMA_MAX_NR_MASTERS]; + u32 nr_masters; u32 nr_channels; if (!np) { @@ -110,6 +111,11 @@ dw_dma_parse_dt(struct platform_device *pdev) return NULL; } + if (of_property_read_u32(np, "dma-masters", &nr_masters)) + return NULL; + if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS) + return NULL; + if (of_property_read_u32(np, "dma-channels", &nr_channels)) return NULL; @@ -117,6 +123,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!pdata) return NULL; + pdata->nr_masters = nr_masters; pdata->nr_channels = nr_channels; if (of_property_read_bool(np, "is_private")) @@ -131,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!of_property_read_u32(np, "block_size", &tmp)) pdata->block_size = tmp; - if (!of_property_read_u32(np, "dma-masters", &tmp)) { - if (tmp > DW_DMA_MAX_NR_MASTERS) - return NULL; - - pdata->nr_masters = tmp; - } - - if (!of_property_read_u32_array(np, "data_width", arr, - pdata->nr_masters)) - for (tmp = 0; tmp < pdata->nr_masters; tmp++) + if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) pdata->data_width[tmp] = arr[tmp]; + } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) { + for (tmp = 0; tmp < nr_masters; tmp++) + pdata->data_width[tmp] = BIT(arr[tmp] & 0x07); + } return pdata; } @@ -158,7 +161,7 @@ static int dw_probe(struct platform_device *pdev) struct dw_dma_chip *chip; struct device *dev = &pdev->dev; struct resource *mem; - struct dw_dma_platform_data *pdata; + const struct dw_dma_platform_data *pdata; int err; chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); @@ -183,6 +186,7 @@ static int dw_probe(struct platform_device *pdev) pdata = dw_dma_parse_dt(pdev); chip->dev = dev; + chip->pdata = pdata; chip->clk = devm_clk_get(chip->dev, "hclk"); if (IS_ERR(chip->clk)) @@ -193,7 +197,7 @@ static int dw_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - err = dw_dma_probe(chip, pdata); + err = dw_dma_probe(chip); if (err) goto err_dw_dma_probe; diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 0a50c18d8..4b7bd7834 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -114,10 +114,6 @@ struct dw_dma_regs { #define dma_writel_native writel #endif -/* To access the registers in early stage of probe */ -#define dma_read_byaddr(addr, name) \ - dma_readl_native((addr) + offsetof(struct dw_dma_regs, name)) - /* Bitfields in DW_PARAMS */ #define DW_PARAMS_NR_CHAN 8 /* number of channels */ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ @@ -143,6 +139,10 @@ enum dw_dma_msize { DW_DMA_MSIZE_256, }; +/* Bitfields in LLP */ +#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */ +#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */ + /* Bitfields in CTL_LO */ #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ @@ -216,6 +216,8 @@ enum dw_dma_msize { enum dw_dmac_flags { DW_DMA_IS_CYCLIC = 0, DW_DMA_IS_SOFT_LLP = 1, + DW_DMA_IS_PAUSED = 2, + DW_DMA_IS_INITIALIZED = 3, }; struct dw_dma_chan { @@ -224,8 +226,6 @@ struct dw_dma_chan { u8 mask; u8 priority; enum dma_transfer_direction direction; - bool paused; - bool initialized; /* software emulation of the LLP transfers */ struct list_head *tx_node_active; @@ -236,8 +236,6 @@ struct dw_dma_chan { unsigned long flags; struct list_head active_list; struct list_head queue; - struct list_head free_list; - u32 residue; struct dw_cyclic_desc *cdesc; unsigned int descs_allocated; @@ -249,8 +247,8 @@ struct dw_dma_chan { /* custom slave configuration */ u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; /* configuration passed via .device_config */ struct dma_slave_config dma_sconfig; @@ -283,9 +281,8 @@ struct dw_dma { u8 all_chan_mask; u8 in_use; - /* hardware configuration */ - unsigned char nr_masters; - unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; + /* platform data */ + struct dw_dma_platform_data *pdata; }; static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) @@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) return container_of(ddev, struct dw_dma, dma); } +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO +typedef __be32 __dw32; +#else +typedef __le32 __dw32; +#endif + /* LLI == Linked List Item; a.k.a. DMA block descriptor */ struct dw_lli { /* values that are not changed by hardware */ - u32 sar; - u32 dar; - u32 llp; /* chain to next lli */ - u32 ctllo; + __dw32 sar; + __dw32 dar; + __dw32 llp; /* chain to next lli */ + __dw32 ctllo; /* values that may get written back: */ - u32 ctlhi; + __dw32 ctlhi; /* sstat and dstat can snapshot peripheral register state. * silicon config may discard either or both... */ - u32 sstat; - u32 dstat; + __dw32 sstat; + __dw32 dstat; }; struct dw_desc { /* FIRST values the hardware uses */ struct dw_lli lli; +#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v)) +#define lli_read(d, reg) be32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v)) +#else +#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) +#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) +#define lli_read(d, reg) le32_to_cpu((d)->lli.reg) +#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) +#endif + /* THEN values for driver housekeeping */ struct list_head desc_node; struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; size_t total_len; + u32 residue; }; #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 04070baab..8181ed131 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1537,8 +1537,17 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_vdbg(ecc->dev, "dma_ccerr_handler\n"); - if (!edma_error_pending(ecc)) + if (!edma_error_pending(ecc)) { + /* + * The registers indicate no pending error event but the irq + * handler has been called. + * Ask eDMA to re-evaluate the error registers. + */ + dev_err(ecc->dev, "%s: Error interrupt without error event!\n", + __func__); + edma_write(ecc, EDMA_EEVAL, 1); return IRQ_NONE; + } while (1) { /* Event missed register(s) */ diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index aac85c30c..a8828ed63 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -462,13 +462,12 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan) struct fsl_desc_sw *desc; dma_addr_t pdesc; - desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); if (!desc) { chan_dbg(chan, "out of memory for link descriptor\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); desc->async_tx.tx_submit = fsl_dma_tx_submit; diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index ee510515c..f8c5cd533 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -77,8 +77,8 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr); /* Set descriptors */ - count = (desc->nents - desc->active) % HSU_DMA_CHAN_NR_DESC; - for (i = 0; i < count; i++) { + count = desc->nents - desc->active; + for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) { hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr); hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len); @@ -160,7 +160,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) return IRQ_NONE; /* Timeout IRQ, need wait some time, see Errata 2 */ - if (hsuc->direction == DMA_DEV_TO_MEM && (sr & HSU_CH_SR_DESCTO_ANY)) + if (sr & HSU_CH_SR_DESCTO_ANY) udelay(2); sr &= ~HSU_CH_SR_DESCTO_ANY; @@ -420,6 +420,8 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) hsu->dma.dev = chip->dev; + dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK); + ret = dma_async_device_register(&hsu->dma); if (ret) return ret; diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 6b070c22b..486b023b3 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -58,6 +58,10 @@ #define HSU_CH_DCR_CHEI BIT(23) #define HSU_CH_DCR_CHTOI(x) BIT(24 + (x)) +/* Bits in HSU_CH_DxTSR */ +#define HSU_CH_DxTSR_MASK GENMASK(15, 0) +#define HSU_CH_DxTSR_TSR(x) ((x) & HSU_CH_DxTSR_MASK) + struct hsu_dma_sg { dma_addr_t addr; unsigned int len; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index efdee1a69..d406056e8 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -690,12 +690,11 @@ static int ioat_alloc_chan_resources(struct dma_chan *c) /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ ioat_chan->completion = - dma_pool_alloc(ioat_chan->ioat_dma->completion_pool, - GFP_KERNEL, &ioat_chan->completion_dma); + dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool, + GFP_KERNEL, &ioat_chan->completion_dma); if (!ioat_chan->completion) return -ENOMEM; - memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion)); writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); writel(((u64)ioat_chan->completion_dma) >> 32, @@ -1074,6 +1073,7 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) struct ioatdma_chan *ioat_chan; bool is_raid_device = false; int err; + u16 val16; dma = &ioat_dma->dma_dev; dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; @@ -1173,6 +1173,17 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) if (dca) ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + /* disable relaxed ordering */ + err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16); + if (err) + return err; + + /* clear relaxed ordering enable */ + val16 &= ~IOAT_DEVCTRL_ROE; + err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16); + if (err) + return err; + return 0; } diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 4994a3623..70534981a 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -26,6 +26,13 @@ #define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 +/* PCIe config registers */ + +/* EXPCAPID + N */ +#define IOAT_DEVCTRL_OFFSET 0x8 +/* relaxed ordering enable */ +#define IOAT_DEVCTRL_ROE 0x10 + /* MMIO Device Registers */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index e39457f13..56f1fd68b 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -364,13 +364,12 @@ mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan) struct mmp_pdma_desc_sw *desc; dma_addr_t pdesc; - desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc); + desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc); if (!desc) { dev_err(chan->dev, "out of memory for link descriptor\n"); return NULL; } - memset(desc, 0, sizeof(*desc)); INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan); /* each desc has submit */ diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index aae76fb39..ccadafa51 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -3,6 +3,7 @@ * Copyright (C) Semihalf 2009 * Copyright (C) Ilya Yanok, Emcraft Systems 2010 * Copyright (C) Alexander Popov, Promcontroller 2014 + * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016 * * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description * (defines, structures and comments) was taken from MPC5121 DMA driver @@ -26,18 +27,19 @@ */ /* - * MPC512x and MPC8308 DMA driver. It supports - * memory to memory data transfers (tested using dmatest module) and - * data transfers between memory and peripheral I/O memory - * by means of slave scatter/gather with these limitations: - * - chunked transfers (described by s/g lists with more than one item) - * are refused as long as proper support for scatter/gather is missing; - * - transfers on MPC8308 always start from software as this SoC appears - * not to have external request lines for peripheral flow control; - * - only peripheral devices with 4-byte FIFO access register are supported; - * - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently - * source and destination addresses must be 4-byte aligned - * and transfer size must be aligned on (4 * maxburst) boundary; + * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers + * (tested using dmatest module) and data transfers between memory and + * peripheral I/O memory by means of slave scatter/gather with these + * limitations: + * - chunked transfers (described by s/g lists with more than one item) are + * refused as long as proper support for scatter/gather is missing + * - transfers on MPC8308 always start from software as this SoC does not have + * external request lines for peripheral flow control + * - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for + * MPC512x), and 32 bytes are supported, and, consequently, source + * addresses and destination addresses must be aligned accordingly; + * furthermore, for MPC512x SoCs, the transfer size must be aligned on + * (chunk size * maxburst) */ #include <linux/module.h> @@ -213,8 +215,10 @@ struct mpc_dma_chan { /* Settings for access to peripheral FIFO */ dma_addr_t src_per_paddr; u32 src_tcd_nunits; + u8 swidth; dma_addr_t dst_per_paddr; u32 dst_tcd_nunits; + u8 dwidth; /* Lock for this structure */ spinlock_t lock; @@ -247,6 +251,7 @@ static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c) static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c); + return container_of(mchan, struct mpc_dma, channels[c->chan_id]); } @@ -254,9 +259,9 @@ static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c) * Execute all queued DMA descriptors. * * Following requirements must be met while calling mpc_dma_execute(): - * a) mchan->lock is acquired, - * b) mchan->active list is empty, - * c) mchan->queued list contains at least one entry. + * a) mchan->lock is acquired, + * b) mchan->active list is empty, + * c) mchan->queued list contains at least one entry. */ static void mpc_dma_execute(struct mpc_dma_chan *mchan) { @@ -446,20 +451,15 @@ static void mpc_dma_tasklet(unsigned long data) if (es & MPC_DMA_DMAES_SAE) dev_err(mdma->dma.dev, "- Source Address Error\n"); if (es & MPC_DMA_DMAES_SOE) - dev_err(mdma->dma.dev, "- Source Offset" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n"); if (es & MPC_DMA_DMAES_DAE) - dev_err(mdma->dma.dev, "- Destination Address" - " Error\n"); + dev_err(mdma->dma.dev, "- Destination Address Error\n"); if (es & MPC_DMA_DMAES_DOE) - dev_err(mdma->dma.dev, "- Destination Offset" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n"); if (es & MPC_DMA_DMAES_NCE) - dev_err(mdma->dma.dev, "- NBytes/Citter" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n"); if (es & MPC_DMA_DMAES_SGE) - dev_err(mdma->dma.dev, "- Scatter/Gather" - " Configuration Error\n"); + dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n"); if (es & MPC_DMA_DMAES_SBE) dev_err(mdma->dma.dev, "- Source Bus Error\n"); if (es & MPC_DMA_DMAES_DBE) @@ -518,8 +518,8 @@ static int mpc_dma_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) { mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL); if (!mdesc) { - dev_notice(mdma->dma.dev, "Memory allocation error. " - "Allocated only %u descriptors\n", i); + dev_notice(mdma->dma.dev, + "Memory allocation error. Allocated only %u descriptors\n", i); break; } @@ -684,6 +684,15 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, return &mdesc->desc; } +inline u8 buswidth_to_dmatsize(u8 buswidth) +{ + u8 res; + + for (res = 0; buswidth > 1; buswidth /= 2) + res++; + return res; +} + static struct dma_async_tx_descriptor * mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, @@ -742,39 +751,54 @@ mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, memset(tcd, 0, sizeof(struct mpc_dma_tcd)); - if (!IS_ALIGNED(sg_dma_address(sg), 4)) - goto err_prep; - if (direction == DMA_DEV_TO_MEM) { tcd->saddr = per_paddr; tcd->daddr = sg_dma_address(sg); + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth)) + goto err_prep; + tcd->soff = 0; - tcd->doff = 4; + tcd->doff = mchan->dwidth; } else { tcd->saddr = sg_dma_address(sg); tcd->daddr = per_paddr; - tcd->soff = 4; + + if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth)) + goto err_prep; + + tcd->soff = mchan->swidth; tcd->doff = 0; } - tcd->ssize = MPC_DMA_TSIZE_4; - tcd->dsize = MPC_DMA_TSIZE_4; + tcd->ssize = buswidth_to_dmatsize(mchan->swidth); + tcd->dsize = buswidth_to_dmatsize(mchan->dwidth); - len = sg_dma_len(sg); - tcd->nbytes = tcd_nunits * 4; - if (!IS_ALIGNED(len, tcd->nbytes)) - goto err_prep; + if (mdma->is_mpc8308) { + tcd->nbytes = sg_dma_len(sg); + if (!IS_ALIGNED(tcd->nbytes, mchan->swidth)) + goto err_prep; - iter = len / tcd->nbytes; - if (iter >= 1 << 15) { - /* len is too big */ - goto err_prep; + /* No major loops for MPC8303 */ + tcd->biter = 1; + tcd->citer = 1; + } else { + len = sg_dma_len(sg); + tcd->nbytes = tcd_nunits * tcd->ssize; + if (!IS_ALIGNED(len, tcd->nbytes)) + goto err_prep; + + iter = len / tcd->nbytes; + if (iter >= 1 << 15) { + /* len is too big */ + goto err_prep; + } + /* citer_linkch contains the high bits of iter */ + tcd->biter = iter & 0x1ff; + tcd->biter_linkch = iter >> 9; + tcd->citer = tcd->biter; + tcd->citer_linkch = tcd->biter_linkch; } - /* citer_linkch contains the high bits of iter */ - tcd->biter = iter & 0x1ff; - tcd->biter_linkch = iter >> 9; - tcd->citer = tcd->biter; - tcd->citer_linkch = tcd->biter_linkch; tcd->e_sg = 0; tcd->d_req = 1; @@ -796,40 +820,62 @@ err_prep: return NULL; } +inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308) +{ + switch (buswidth) { + case 16: + if (is_mpc8308) + return false; + case 1: + case 2: + case 4: + case 32: + break; + default: + return false; + } + + return true; +} + static int mpc_dma_device_config(struct dma_chan *chan, struct dma_slave_config *cfg) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); + struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan); unsigned long flags; /* * Software constraints: - * - only transfers between a peripheral device and - * memory are supported; - * - only peripheral devices with 4-byte FIFO access register - * are supported; - * - minimal transfer chunk is 4 bytes and consequently - * source and destination addresses must be 4-byte aligned - * and transfer size must be aligned on (4 * maxburst) - * boundary; - * - during the transfer RAM address is being incremented by - * the size of minimal transfer chunk; - * - peripheral port's address is constant during the transfer. + * - only transfers between a peripheral device and memory are + * supported + * - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes + * are supported, and, consequently, source addresses and + * destination addresses; must be aligned accordingly; furthermore, + * for MPC512x SoCs, the transfer size must be aligned on (chunk + * size * maxburst) + * - during the transfer, the RAM address is incremented by the size + * of transfer chunk + * - the peripheral port's address is constant during the transfer. */ - if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES || - cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES || - !IS_ALIGNED(cfg->src_addr, 4) || - !IS_ALIGNED(cfg->dst_addr, 4)) { + if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) || + !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) { return -EINVAL; } + if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) || + !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308)) + return -EINVAL; + spin_lock_irqsave(&mchan->lock, flags); mchan->src_per_paddr = cfg->src_addr; mchan->src_tcd_nunits = cfg->src_maxburst; + mchan->swidth = cfg->src_addr_width; mchan->dst_per_paddr = cfg->dst_addr; mchan->dst_tcd_nunits = cfg->dst_maxburst; + mchan->dwidth = cfg->dst_addr_width; /* Apply defaults */ if (mchan->src_tcd_nunits == 0) @@ -875,7 +921,6 @@ static int mpc_dma_probe(struct platform_device *op) mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL); if (!mdma) { - dev_err(dev, "Memory exhausted!\n"); retval = -ENOMEM; goto err; } @@ -999,7 +1044,8 @@ static int mpc_dma_probe(struct platform_device *op) out_be32(&mdma->regs->dmaerrl, 0xFFFF); } else { out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG | - MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA); + MPC_DMA_DMACR_ERGA | + MPC_DMA_DMACR_ERCA); /* Disable hardware DMA requests */ out_be32(&mdma->regs->dmaerqh, 0); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3922a5d56..d0446a759 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -31,6 +31,12 @@ #include "dmaengine.h" #include "mv_xor.h" +enum mv_xor_type { + XOR_ORION, + XOR_ARMADA_38X, + XOR_ARMADA_37XX, +}; + enum mv_xor_mode { XOR_MODE_IN_REG, XOR_MODE_IN_DESC, @@ -477,7 +483,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan_to_devp(mv_chan), - "%s src_cnt: %d len: %u dest %pad flags: %ld\n", + "%s src_cnt: %d len: %zu dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); sw_desc = mv_chan_alloc_slot(mv_chan); @@ -697,8 +703,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) goto free_resources; } - src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0, - PAGE_SIZE, DMA_TO_DEVICE); + src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), + (size_t)src & ~PAGE_MASK, PAGE_SIZE, + DMA_TO_DEVICE); unmap->addr[0] = src_dma; ret = dma_mapping_error(dma_chan->device->dev, src_dma); @@ -708,8 +715,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) } unmap->to_cnt = 1; - dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0, - PAGE_SIZE, DMA_FROM_DEVICE); + dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), + (size_t)dest & ~PAGE_MASK, PAGE_SIZE, + DMA_FROM_DEVICE); unmap->addr[1] = dest_dma; ret = dma_mapping_error(dma_chan->device->dev, dest_dma); @@ -933,7 +941,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) + int idx, dma_cap_mask_t cap_mask, int irq) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -945,7 +953,10 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->idx = idx; mv_chan->irq = irq; - mv_chan->op_in_desc = op_in_desc; + if (xordev->xor_type == XOR_ORION) + mv_chan->op_in_desc = XOR_MODE_IN_REG; + else + mv_chan->op_in_desc = XOR_MODE_IN_DESC; dma_dev = &mv_chan->dmadev; @@ -1085,6 +1096,33 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } +static void +mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev) +{ + void __iomem *base = xordev->xor_high_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + /* + * For Armada3700 open default 4GB Mbus window. The dram + * related configuration are done at AXIS level. + */ + writel(0xffff0000, base + WINDOW_SIZE(0)); + win_enable |= 1; + win_enable |= 3 << 16; + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); + writel(0, base + WINDOW_OVERRIDE_CTRL(0)); + writel(0, base + WINDOW_OVERRIDE_CTRL(1)); +} + /* * Since this XOR driver is basically used only for RAID5, we don't * need to care about synchronizing ->suspend with DMA activity, @@ -1129,6 +1167,11 @@ static int mv_xor_resume(struct platform_device *dev) XOR_INTR_MASK(mv_chan)); } + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + return 0; + } + dram = mv_mbus_dram_info(); if (dram) mv_xor_conf_mbus_windows(xordev, dram); @@ -1137,8 +1180,9 @@ static int mv_xor_resume(struct platform_device *dev) } static const struct of_device_id mv_xor_dt_ids[] = { - { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, - { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, + { .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X }, + { .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX }, {}, }; @@ -1152,7 +1196,6 @@ static int mv_xor_probe(struct platform_device *pdev) struct resource *res; unsigned int max_engines, max_channels; int i, ret; - int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1180,12 +1223,30 @@ static int mv_xor_probe(struct platform_device *pdev) platform_set_drvdata(pdev, xordev); + + /* + * We need to know which type of XOR device we use before + * setting up. In non-dt case it can only be the legacy one. + */ + xordev->xor_type = XOR_ORION; + if (pdev->dev.of_node) { + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); + + xordev->xor_type = (uintptr_t)of_id->data; + } + /* * (Re-)program MBUS remapping windows if we are asked to. */ - dram = mv_mbus_dram_info(); - if (dram) - mv_xor_conf_mbus_windows(xordev, dram); + if (xordev->xor_type == XOR_ARMADA_37XX) { + mv_xor_conf_mbus_windows_a3700(xordev); + } else { + dram = mv_mbus_dram_info(); + if (dram) + mv_xor_conf_mbus_windows(xordev, dram); + } /* Not all platforms can gate the clock, so it is not * an error if the clock does not exists. @@ -1199,12 +1260,16 @@ static int mv_xor_probe(struct platform_device *pdev) * order for async_tx to perform well. So we limit the number * of engines and channels so that we take into account this * constraint. Note that we also want to use channels from - * separate engines when possible. + * separate engines when possible. For dual-CPU Armada 3700 + * SoC with single XOR engine allow using its both channels. */ max_engines = num_present_cpus(); - max_channels = min_t(unsigned int, - MV_XOR_MAX_CHANNELS, - DIV_ROUND_UP(num_present_cpus(), 2)); + if (xordev->xor_type == XOR_ARMADA_37XX) + max_channels = num_present_cpus(); + else + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); if (mv_xor_engine_count >= max_engines) return 0; @@ -1212,15 +1277,11 @@ static int mv_xor_probe(struct platform_device *pdev) if (pdev->dev.of_node) { struct device_node *np; int i = 0; - const struct of_device_id *of_id = - of_match_device(mv_xor_dt_ids, - &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; - op_in_desc = (int)of_id->data; if (i >= max_channels) continue; @@ -1237,7 +1298,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq, op_in_desc); + cap_mask, irq); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); @@ -1266,8 +1327,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq, - XOR_MODE_IN_REG); + cd->cap_mask, irq); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index c19fe30e5..bf56e082e 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -85,6 +85,7 @@ struct mv_xor_device { void __iomem *xor_high_base; struct clk *clk; struct mv_xor_chan *channels[MV_XOR_MAX_CHANNELS]; + int xor_type; }; /** diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index 1e1f2986e..faae0bfe1 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -240,8 +240,9 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, struct of_phandle_args dma_spec; struct of_dma *ofdma; struct dma_chan *chan; - int count, i; + int count, i, start; int ret_no_channel = -ENODEV; + static atomic_t last_index; if (!np || !name) { pr_err("%s: not enough information provided\n", __func__); @@ -259,8 +260,15 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, return ERR_PTR(-ENODEV); } + /* + * approximate an average distribution across multiple + * entries with the same name + */ + start = atomic_inc_return(&last_index); for (i = 0; i < count; i++) { - if (of_dma_match_channel(np, name, i, &dma_spec)) + if (of_dma_match_channel(np, name, + (i + start) % count, + &dma_spec)) continue; mutex_lock(&of_dma_lock); diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 77c1c4400..e756a30cc 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -117,6 +117,7 @@ struct pxad_chan { /* protected by vc->lock */ struct pxad_phy *phy; struct dma_pool *desc_pool; /* Descriptors pool */ + dma_cookie_t bus_error; }; struct pxad_device { @@ -563,6 +564,7 @@ static void pxad_launch_chan(struct pxad_chan *chan, return; } } + chan->bus_error = 0; /* * Program the descriptor's address into the DMA controller, @@ -666,6 +668,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) struct virt_dma_desc *vd, *tmp; unsigned int dcsr; unsigned long flags; + dma_cookie_t last_started = 0; BUG_ON(!chan); @@ -678,6 +681,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) dev_dbg(&chan->vc.chan.dev->device, "%s(): checking txd %p[%x]: completed=%d\n", __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + last_started = vd->tx.cookie; if (to_pxad_sw_desc(vd)->cyclic) { vchan_cyclic_callback(vd); break; @@ -690,7 +694,12 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) } } - if (dcsr & PXA_DCSR_STOPSTATE) { + if (dcsr & PXA_DCSR_BUSERR) { + chan->bus_error = last_started; + phy_disable(phy); + } + + if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) { dev_dbg(&chan->vc.chan.dev->device, "%s(): channel stopped, submitted_empty=%d issued_empty=%d", __func__, @@ -1249,6 +1258,9 @@ static enum dma_status pxad_tx_status(struct dma_chan *dchan, struct pxad_chan *chan = to_pxad_chan(dchan); enum dma_status ret; + if (cookie == chan->bus_error) + return DMA_ERROR; + ret = dma_cookie_status(dchan, cookie, txstate); if (likely(txstate && (ret != DMA_ERROR))) dma_set_residue(txstate, pxad_residue(chan, cookie)); @@ -1321,7 +1333,7 @@ static int pxad_init_phys(struct platform_device *op, return 0; } -static const struct of_device_id const pxad_dt_ids[] = { +static const struct of_device_id pxad_dt_ids[] = { { .compatible = "marvell,pdma-1.0", }, {} }; diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile index bfea69902..4bfc38b45 100644 --- a/drivers/dma/qcom/Makefile +++ b/drivers/dma/qcom/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o hdma_mgmt-objs := hidma_mgmt.o hidma_mgmt_sys.o +obj-$(CONFIG_QCOM_HIDMA) += hdma.o +hdma-objs := hidma_ll.o hidma.o hidma_dbg.o diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d5e0a9c3a..969b48176 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -342,7 +342,7 @@ static const struct reg_offset_data bam_v1_7_reg_info[] = { #define BAM_DESC_FIFO_SIZE SZ_32K #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1) -#define BAM_MAX_DATA_SIZE (SZ_32K - 8) +#define BAM_FIFO_SIZE (SZ_32K - 8) struct bam_chan { struct virt_dma_chan vc; @@ -387,6 +387,7 @@ struct bam_device { /* execution environment ID, from DT */ u32 ee; + bool controlled_remotely; const struct reg_offset_data *layout; @@ -458,7 +459,7 @@ static void bam_chan_init_hw(struct bam_chan *bchan, */ writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)), bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR)); - writel_relaxed(BAM_DESC_FIFO_SIZE, + writel_relaxed(BAM_FIFO_SIZE, bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES)); /* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */ @@ -604,7 +605,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, /* calculate number of required entries */ for_each_sg(sgl, sg, sg_len, i) - num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_MAX_DATA_SIZE); + num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE); /* allocate enough room to accomodate the number of entries */ async_desc = kzalloc(sizeof(*async_desc) + @@ -635,10 +636,10 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, desc->addr = cpu_to_le32(sg_dma_address(sg) + curr_offset); - if (remainder > BAM_MAX_DATA_SIZE) { - desc->size = cpu_to_le16(BAM_MAX_DATA_SIZE); - remainder -= BAM_MAX_DATA_SIZE; - curr_offset += BAM_MAX_DATA_SIZE; + if (remainder > BAM_FIFO_SIZE) { + desc->size = cpu_to_le16(BAM_FIFO_SIZE); + remainder -= BAM_FIFO_SIZE; + curr_offset += BAM_FIFO_SIZE; } else { desc->size = cpu_to_le16(remainder); remainder = 0; @@ -801,13 +802,17 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - if (srcs & BAM_IRQ) + if (srcs & BAM_IRQ) { clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS)); - /* don't allow reorder of the various accesses to the BAM registers */ - mb(); + /* + * don't allow reorder of the various accesses to the BAM + * registers + */ + mb(); - writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR)); + } return IRQ_HANDLED; } @@ -1038,6 +1043,9 @@ static int bam_init(struct bam_device *bdev) val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES)); bdev->num_channels = val & BAM_NUM_PIPES_MASK; + if (bdev->controlled_remotely) + return 0; + /* s/w reset bam */ /* after reset all pipes are disabled and idle */ val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL)); @@ -1125,6 +1133,9 @@ static int bam_dma_probe(struct platform_device *pdev) return ret; } + bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node, + "qcom,controlled-remotely"); + bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk"); if (IS_ERR(bdev->bamclk)) return PTR_ERR(bdev->bamclk); @@ -1163,7 +1174,7 @@ static int bam_dma_probe(struct platform_device *pdev) /* set max dma segment size */ bdev->common.dev = bdev->dev; bdev->common.dev->dma_parms = &bdev->dma_parms; - ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE); + ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); if (ret) { dev_err(bdev->dev, "cannot set maximum segment size\n"); goto err_bam_channel_exit; @@ -1234,6 +1245,9 @@ static int bam_dma_remove(struct platform_device *pdev) bam_dma_terminate_all(&bdev->channels[i].vc.chan); tasklet_kill(&bdev->channels[i].vc.task); + if (!bdev->channels[i].fifo_virt) + continue; + dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bdev->channels[i].fifo_virt, bdev->channels[i].fifo_phys); diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c index cccc78efb..41b5c6dee 100644 --- a/drivers/dma/qcom/hidma.c +++ b/drivers/dma/qcom/hidma.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine interface * - * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -404,7 +404,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) spin_unlock_irqrestore(&mchan->lock, irqflags); /* this suspends the existing transfer */ - rc = hidma_ll_pause(dmadev->lldev); + rc = hidma_ll_disable(dmadev->lldev); if (rc) { dev_err(dmadev->ddev.dev, "channel did not pause\n"); goto out; @@ -427,7 +427,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) list_move(&mdesc->node, &mchan->free); } - rc = hidma_ll_resume(dmadev->lldev); + rc = hidma_ll_enable(dmadev->lldev); out: pm_runtime_mark_last_busy(dmadev->ddev.dev); pm_runtime_put_autosuspend(dmadev->ddev.dev); @@ -488,7 +488,7 @@ static int hidma_pause(struct dma_chan *chan) dmadev = to_hidma_dev(mchan->chan.device); if (!mchan->paused) { pm_runtime_get_sync(dmadev->ddev.dev); - if (hidma_ll_pause(dmadev->lldev)) + if (hidma_ll_disable(dmadev->lldev)) dev_warn(dmadev->ddev.dev, "channel did not stop\n"); mchan->paused = true; pm_runtime_mark_last_busy(dmadev->ddev.dev); @@ -507,7 +507,7 @@ static int hidma_resume(struct dma_chan *chan) dmadev = to_hidma_dev(mchan->chan.device); if (mchan->paused) { pm_runtime_get_sync(dmadev->ddev.dev); - rc = hidma_ll_resume(dmadev->lldev); + rc = hidma_ll_enable(dmadev->lldev); if (!rc) mchan->paused = false; else @@ -530,6 +530,43 @@ static irqreturn_t hidma_chirq_handler(int chirq, void *arg) return hidma_ll_inthandler(chirq, lldev); } +static ssize_t hidma_show_values(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct hidma_dev *mdev = platform_get_drvdata(pdev); + + buf[0] = 0; + + if (strcmp(attr->attr.name, "chid") == 0) + sprintf(buf, "%d\n", mdev->chidx); + + return strlen(buf); +} + +static int hidma_create_sysfs_entry(struct hidma_dev *dev, char *name, + int mode) +{ + struct device_attribute *attrs; + char *name_copy; + + attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute), + GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL); + if (!name_copy) + return -ENOMEM; + + attrs->attr.name = name_copy; + attrs->attr.mode = mode; + attrs->show = hidma_show_values; + sysfs_attr_init(&attrs->attr); + + return device_create_file(dev->ddev.dev, attrs); +} + static int hidma_probe(struct platform_device *pdev) { struct hidma_dev *dmadev; @@ -644,6 +681,8 @@ static int hidma_probe(struct platform_device *pdev) dmadev->irq = chirq; tasklet_init(&dmadev->task, hidma_issue_task, (unsigned long)dmadev); + hidma_debug_init(dmadev); + hidma_create_sysfs_entry(dmadev, "chid", S_IRUGO); dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n"); platform_set_drvdata(pdev, dmadev); pm_runtime_mark_last_busy(dmadev->ddev.dev); @@ -651,6 +690,7 @@ static int hidma_probe(struct platform_device *pdev) return 0; uninit: + hidma_debug_uninit(dmadev); hidma_ll_uninit(dmadev->lldev); dmafree: if (dmadev) @@ -668,6 +708,7 @@ static int hidma_remove(struct platform_device *pdev) pm_runtime_get_sync(dmadev->ddev.dev); dma_async_device_unregister(&dmadev->ddev); devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev); + hidma_debug_uninit(dmadev); hidma_ll_uninit(dmadev->lldev); hidma_free(dmadev); @@ -689,7 +730,6 @@ static const struct of_device_id hidma_match[] = { {.compatible = "qcom,hidma-1.0",}, {}, }; - MODULE_DEVICE_TABLE(of, hidma_match); static struct platform_driver hidma_driver = { diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h index 231e306f6..db413a5ef 100644 --- a/drivers/dma/qcom/hidma.h +++ b/drivers/dma/qcom/hidma.h @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA data structures * - * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -20,32 +20,29 @@ #include <linux/interrupt.h> #include <linux/dmaengine.h> -#define TRE_SIZE 32 /* each TRE is 32 bytes */ -#define TRE_CFG_IDX 0 -#define TRE_LEN_IDX 1 -#define TRE_SRC_LOW_IDX 2 -#define TRE_SRC_HI_IDX 3 -#define TRE_DEST_LOW_IDX 4 -#define TRE_DEST_HI_IDX 5 - -struct hidma_tx_status { - u8 err_info; /* error record in this transfer */ - u8 err_code; /* completion code */ -}; +#define HIDMA_TRE_SIZE 32 /* each TRE is 32 bytes */ +#define HIDMA_TRE_CFG_IDX 0 +#define HIDMA_TRE_LEN_IDX 1 +#define HIDMA_TRE_SRC_LOW_IDX 2 +#define HIDMA_TRE_SRC_HI_IDX 3 +#define HIDMA_TRE_DEST_LOW_IDX 4 +#define HIDMA_TRE_DEST_HI_IDX 5 struct hidma_tre { atomic_t allocated; /* if this channel is allocated */ bool queued; /* flag whether this is pending */ u16 status; /* status */ - u32 chidx; /* index of the tre */ + u32 idx; /* index of the tre */ u32 dma_sig; /* signature of the tre */ const char *dev_name; /* name of the device */ void (*callback)(void *data); /* requester callback */ void *data; /* Data associated with this channel*/ struct hidma_lldev *lldev; /* lldma device pointer */ - u32 tre_local[TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ + u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */ u32 tre_index; /* the offset where this was written*/ u32 int_flags; /* interrupt flags */ + u8 err_info; /* error record in this transfer */ + u8 err_code; /* completion code */ }; struct hidma_lldev { @@ -61,22 +58,21 @@ struct hidma_lldev { void __iomem *evca; /* Event Channel address */ struct hidma_tre **pending_tre_list; /* Pointers to pending TREs */ - struct hidma_tx_status - *tx_status_list; /* Pointers to pending TREs status*/ s32 pending_tre_count; /* Number of TREs pending */ void *tre_ring; /* TRE ring */ - dma_addr_t tre_ring_handle; /* TRE ring to be shared with HW */ + dma_addr_t tre_dma; /* TRE ring to be shared with HW */ u32 tre_ring_size; /* Byte size of the ring */ u32 tre_processed_off; /* last processed TRE */ void *evre_ring; /* EVRE ring */ - dma_addr_t evre_ring_handle; /* EVRE ring to be shared with HW */ + dma_addr_t evre_dma; /* EVRE ring to be shared with HW */ u32 evre_ring_size; /* Byte size of the ring */ u32 evre_processed_off; /* last processed EVRE */ u32 tre_write_offset; /* TRE write location */ struct tasklet_struct task; /* task delivering notifications */ + struct tasklet_struct rst_task; /* task to reset HW */ DECLARE_KFIFO_PTR(handoff_fifo, struct hidma_tre *); /* pending TREs FIFO */ }; @@ -145,8 +141,8 @@ enum dma_status hidma_ll_status(struct hidma_lldev *llhndl, u32 tre_ch); bool hidma_ll_isenabled(struct hidma_lldev *llhndl); void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch); void hidma_ll_start(struct hidma_lldev *llhndl); -int hidma_ll_pause(struct hidma_lldev *llhndl); -int hidma_ll_resume(struct hidma_lldev *llhndl); +int hidma_ll_disable(struct hidma_lldev *lldev); +int hidma_ll_enable(struct hidma_lldev *llhndl); void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch, dma_addr_t src, dma_addr_t dest, u32 len, u32 flags); int hidma_ll_setup(struct hidma_lldev *lldev); @@ -157,4 +153,6 @@ int hidma_ll_uninit(struct hidma_lldev *llhndl); irqreturn_t hidma_ll_inthandler(int irq, void *arg); void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info, u8 err_code); +int hidma_debug_init(struct hidma_dev *dmadev); +void hidma_debug_uninit(struct hidma_dev *dmadev); #endif diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c new file mode 100644 index 000000000..fa827e5ff --- /dev/null +++ b/drivers/dma/qcom/hidma_dbg.c @@ -0,0 +1,217 @@ +/* + * Qualcomm Technologies HIDMA debug file + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/pm_runtime.h> + +#include "hidma.h" + +static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch) +{ + struct hidma_lldev *lldev = llhndl; + struct hidma_tre *tre; + u32 length; + dma_addr_t src_start; + dma_addr_t dest_start; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch); + return; + } + tre = &lldev->trepool[tre_ch]; + seq_printf(s, "------Channel %d -----\n", tre_ch); + seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated)); + seq_printf(s, "queued = 0x%x\n", tre->queued); + seq_printf(s, "err_info = 0x%x\n", tre->err_info); + seq_printf(s, "err_code = 0x%x\n", tre->err_code); + seq_printf(s, "status = 0x%x\n", tre->status); + seq_printf(s, "idx = 0x%x\n", tre->idx); + seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig); + seq_printf(s, "dev_name=%s\n", tre->dev_name); + seq_printf(s, "callback=%p\n", tre->callback); + seq_printf(s, "data=%p\n", tre->data); + seq_printf(s, "tre_index = 0x%x\n", tre->tre_index); + + tre_local = &tre->tre_local[0]; + src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX]; + src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start; + dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX]; + dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32); + length = tre_local[HIDMA_TRE_LEN_IDX]; + + seq_printf(s, "src=%pap\n", &src_start); + seq_printf(s, "dest=%pap\n", &dest_start); + seq_printf(s, "length = 0x%x\n", length); +} + +static void hidma_ll_devstats(struct seq_file *s, void *llhndl) +{ + struct hidma_lldev *lldev = llhndl; + + seq_puts(s, "------Device -----\n"); + seq_printf(s, "lldev init = 0x%x\n", lldev->initialized); + seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state); + seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state); + seq_printf(s, "chidx = 0x%x\n", lldev->chidx); + seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres); + seq_printf(s, "trca=%p\n", lldev->trca); + seq_printf(s, "tre_ring=%p\n", lldev->tre_ring); + seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma); + seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size); + seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off); + seq_printf(s, "pending_tre_count=%d\n", lldev->pending_tre_count); + seq_printf(s, "evca=%p\n", lldev->evca); + seq_printf(s, "evre_ring=%p\n", lldev->evre_ring); + seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma); + seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size); + seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off); + seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset); +} + +/* + * hidma_chan_stats: display HIDMA channel statistics + * + * Display the statistics for the current HIDMA virtual channel device. + */ +static int hidma_chan_stats(struct seq_file *s, void *unused) +{ + struct hidma_chan *mchan = s->private; + struct hidma_desc *mdesc; + struct hidma_dev *dmadev = mchan->dmadev; + + pm_runtime_get_sync(dmadev->ddev.dev); + seq_printf(s, "paused=%u\n", mchan->paused); + seq_printf(s, "dma_sig=%u\n", mchan->dma_sig); + seq_puts(s, "prepared\n"); + list_for_each_entry(mdesc, &mchan->prepared, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "active\n"); + list_for_each_entry(mdesc, &mchan->active, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + seq_puts(s, "completed\n"); + list_for_each_entry(mdesc, &mchan->completed, node) + hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch); + + hidma_ll_devstats(s, mchan->dmadev->lldev); + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); + return 0; +} + +/* + * hidma_dma_info: display HIDMA device info + * + * Display the info for the current HIDMA device. + */ +static int hidma_dma_info(struct seq_file *s, void *unused) +{ + struct hidma_dev *dmadev = s->private; + resource_size_t sz; + + seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors); + seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca); + seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start); + sz = resource_size(dmadev->trca_resource); + seq_printf(s, "dev_trca_size=%pa\n", &sz); + seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca); + seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start); + sz = resource_size(dmadev->evca_resource); + seq_printf(s, "dev_evca_size=%pa\n", &sz); + return 0; +} + +static int hidma_chan_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, hidma_chan_stats, inode->i_private); +} + +static int hidma_dma_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, hidma_dma_info, inode->i_private); +} + +static const struct file_operations hidma_chan_fops = { + .open = hidma_chan_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations hidma_dma_fops = { + .open = hidma_dma_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void hidma_debug_uninit(struct hidma_dev *dmadev) +{ + debugfs_remove_recursive(dmadev->debugfs); + debugfs_remove_recursive(dmadev->stats); +} + +int hidma_debug_init(struct hidma_dev *dmadev) +{ + int rc = 0; + int chidx = 0; + struct list_head *position = NULL; + + dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL); + if (!dmadev->debugfs) { + rc = -ENODEV; + return rc; + } + + /* walk through the virtual channel list */ + list_for_each(position, &dmadev->ddev.channels) { + struct hidma_chan *chan; + + chan = list_entry(position, struct hidma_chan, + chan.device_node); + sprintf(chan->dbg_name, "chan%d", chidx); + chan->debugfs = debugfs_create_dir(chan->dbg_name, + dmadev->debugfs); + if (!chan->debugfs) { + rc = -ENOMEM; + goto cleanup; + } + chan->stats = debugfs_create_file("stats", S_IRUGO, + chan->debugfs, chan, + &hidma_chan_fops); + if (!chan->stats) { + rc = -ENOMEM; + goto cleanup; + } + chidx++; + } + + dmadev->stats = debugfs_create_file("stats", S_IRUGO, + dmadev->debugfs, dmadev, + &hidma_dma_fops); + if (!dmadev->stats) { + rc = -ENOMEM; + goto cleanup; + } + + return 0; +cleanup: + hidma_debug_uninit(dmadev); + return rc; +} diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c new file mode 100644 index 000000000..f39290015 --- /dev/null +++ b/drivers/dma/qcom/hidma_ll.c @@ -0,0 +1,872 @@ +/* + * Qualcomm Technologies HIDMA DMA engine low level code + * + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/dmaengine.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/dma-mapping.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <linux/iopoll.h> +#include <linux/kfifo.h> +#include <linux/bitops.h> + +#include "hidma.h" + +#define HIDMA_EVRE_SIZE 16 /* each EVRE is 16 bytes */ + +#define HIDMA_TRCA_CTRLSTS_REG 0x000 +#define HIDMA_TRCA_RING_LOW_REG 0x008 +#define HIDMA_TRCA_RING_HIGH_REG 0x00C +#define HIDMA_TRCA_RING_LEN_REG 0x010 +#define HIDMA_TRCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_CTRLSTS_REG 0x000 +#define HIDMA_EVCA_INTCTRL_REG 0x004 +#define HIDMA_EVCA_RING_LOW_REG 0x008 +#define HIDMA_EVCA_RING_HIGH_REG 0x00C +#define HIDMA_EVCA_RING_LEN_REG 0x010 +#define HIDMA_EVCA_WRITE_PTR_REG 0x020 +#define HIDMA_EVCA_DOORBELL_REG 0x400 + +#define HIDMA_EVCA_IRQ_STAT_REG 0x100 +#define HIDMA_EVCA_IRQ_CLR_REG 0x108 +#define HIDMA_EVCA_IRQ_EN_REG 0x110 + +#define HIDMA_EVRE_CFG_IDX 0 + +#define HIDMA_EVRE_ERRINFO_BIT_POS 24 +#define HIDMA_EVRE_CODE_BIT_POS 28 + +#define HIDMA_EVRE_ERRINFO_MASK GENMASK(3, 0) +#define HIDMA_EVRE_CODE_MASK GENMASK(3, 0) + +#define HIDMA_CH_CONTROL_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_MASK GENMASK(7, 0) +#define HIDMA_CH_STATE_BIT_POS 0x8 + +#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS 0 +#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS 1 +#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9 +#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS 10 +#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS 11 +#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS 14 + +#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS)) + +#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size) \ +do { \ + iter += size; \ + if (iter >= ring_size) \ + iter -= ring_size; \ +} while (0) + +#define HIDMA_CH_STATE(val) \ + ((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK) + +#define HIDMA_ERR_INT_MASK \ + (BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \ + BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS)) + +enum ch_command { + HIDMA_CH_DISABLE = 0, + HIDMA_CH_ENABLE = 1, + HIDMA_CH_SUSPEND = 2, + HIDMA_CH_RESET = 9, +}; + +enum ch_state { + HIDMA_CH_DISABLED = 0, + HIDMA_CH_ENABLED = 1, + HIDMA_CH_RUNNING = 2, + HIDMA_CH_SUSPENDED = 3, + HIDMA_CH_STOPPED = 4, +}; + +enum tre_type { + HIDMA_TRE_MEMCPY = 3, +}; + +enum err_code { + HIDMA_EVRE_STATUS_COMPLETE = 1, + HIDMA_EVRE_STATUS_ERROR = 4, +}; + +static int hidma_is_chan_enabled(int state) +{ + switch (state) { + case HIDMA_CH_ENABLED: + case HIDMA_CH_RUNNING: + return true; + default: + return false; + } +} + +void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch); + return; + } + + atomic_set(&tre->allocated, 0); +} + +int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name, + void (*callback)(void *data), void *data, u32 *tre_ch) +{ + unsigned int i; + struct hidma_tre *tre; + u32 *tre_local; + + if (!tre_ch || !lldev) + return -EINVAL; + + /* need to have at least one empty spot in the queue */ + for (i = 0; i < lldev->nr_tres - 1; i++) { + if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1)) + break; + } + + if (i == (lldev->nr_tres - 1)) + return -ENOMEM; + + tre = &lldev->trepool[i]; + tre->dma_sig = sig; + tre->dev_name = dev_name; + tre->callback = callback; + tre->data = data; + tre->idx = i; + tre->status = 0; + tre->queued = 0; + tre->err_code = 0; + tre->err_info = 0; + tre->lldev = lldev; + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_CFG_IDX] = HIDMA_TRE_MEMCPY; + tre_local[HIDMA_TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8; + tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16); /* set IEOB */ + *tre_ch = i; + if (callback) + callback(data); + return 0; +} + +/* + * Multiple TREs may be queued and waiting in the pending queue. + */ +static void hidma_ll_tre_complete(unsigned long arg) +{ + struct hidma_lldev *lldev = (struct hidma_lldev *)arg; + struct hidma_tre *tre; + + while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) { + /* call the user if it has been read by the hardware */ + if (tre->callback) + tre->callback(tre->data); + } +} + +static int hidma_post_completed(struct hidma_lldev *lldev, int tre_iterator, + u8 err_info, u8 err_code) +{ + struct hidma_tre *tre; + unsigned long flags; + + spin_lock_irqsave(&lldev->lock, flags); + tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE]; + if (!tre) { + spin_unlock_irqrestore(&lldev->lock, flags); + dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n", + tre_iterator / HIDMA_TRE_SIZE); + return -EINVAL; + } + lldev->pending_tre_list[tre->tre_index] = NULL; + + /* + * Keep track of pending TREs that SW is expecting to receive + * from HW. We got one now. Decrement our counter. + */ + lldev->pending_tre_count--; + if (lldev->pending_tre_count < 0) { + dev_warn(lldev->dev, "tre count mismatch on completion"); + lldev->pending_tre_count = 0; + } + + spin_unlock_irqrestore(&lldev->lock, flags); + + tre->err_info = err_info; + tre->err_code = err_code; + tre->queued = 0; + + kfifo_put(&lldev->handoff_fifo, tre); + tasklet_schedule(&lldev->task); + + return 0; +} + +/* + * Called to handle the interrupt for the channel. + * Return a positive number if TRE or EVRE were consumed on this run. + * Return a positive number if there are pending TREs or EVREs. + * Return 0 if there is nothing to consume or no pending TREs/EVREs found. + */ +static int hidma_handle_tre_completion(struct hidma_lldev *lldev) +{ + u32 evre_ring_size = lldev->evre_ring_size; + u32 tre_ring_size = lldev->tre_ring_size; + u32 err_info, err_code, evre_write_off; + u32 tre_iterator, evre_iterator; + u32 num_completed = 0; + + evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + tre_iterator = lldev->tre_processed_off; + evre_iterator = lldev->evre_processed_off; + + if ((evre_write_off > evre_ring_size) || + (evre_write_off % HIDMA_EVRE_SIZE)) { + dev_err(lldev->dev, "HW reports invalid EVRE write offset\n"); + return 0; + } + + /* + * By the time control reaches here the number of EVREs and TREs + * may not match. Only consume the ones that hardware told us. + */ + while ((evre_iterator != evre_write_off)) { + u32 *current_evre = lldev->evre_ring + evre_iterator; + u32 cfg; + + cfg = current_evre[HIDMA_EVRE_CFG_IDX]; + err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS; + err_info &= HIDMA_EVRE_ERRINFO_MASK; + err_code = + (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK; + + if (hidma_post_completed(lldev, tre_iterator, err_info, + err_code)) + break; + + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + tre_ring_size); + HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE, + evre_ring_size); + + /* + * Read the new event descriptor written by the HW. + * As we are processing the delivered events, other events + * get queued to the SW for processing. + */ + evre_write_off = + readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG); + num_completed++; + } + + if (num_completed) { + u32 evre_read_off = (lldev->evre_processed_off + + HIDMA_EVRE_SIZE * num_completed); + u32 tre_read_off = (lldev->tre_processed_off + + HIDMA_TRE_SIZE * num_completed); + + evre_read_off = evre_read_off % evre_ring_size; + tre_read_off = tre_read_off % tre_ring_size; + + writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG); + + /* record the last processed tre offset */ + lldev->tre_processed_off = tre_read_off; + lldev->evre_processed_off = evre_read_off; + } + + return num_completed; +} + +void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info, + u8 err_code) +{ + u32 tre_iterator; + u32 tre_ring_size = lldev->tre_ring_size; + int num_completed = 0; + u32 tre_read_off; + + tre_iterator = lldev->tre_processed_off; + while (lldev->pending_tre_count) { + if (hidma_post_completed(lldev, tre_iterator, err_info, + err_code)) + break; + HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE, + tre_ring_size); + num_completed++; + } + tre_read_off = (lldev->tre_processed_off + + HIDMA_TRE_SIZE * num_completed); + + tre_read_off = tre_read_off % tre_ring_size; + + /* record the last processed tre offset */ + lldev->tre_processed_off = tre_read_off; +} + +static int hidma_ll_reset(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not reset\n"); + return ret; + } + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_RESET << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Delay 10ms after reset to allow DMA logic to quiesce. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_DISABLED; + lldev->evch_state = HIDMA_CH_DISABLED; + return 0; +} + +/* + * Abort all transactions and perform a reset. + */ +static void hidma_ll_abort(unsigned long arg) +{ + struct hidma_lldev *lldev = (struct hidma_lldev *)arg; + u8 err_code = HIDMA_EVRE_STATUS_ERROR; + u8 err_info = 0xFF; + int rc; + + hidma_cleanup_pending_tre(lldev, err_info, err_code); + + /* reset the channel for recovery */ + rc = hidma_ll_setup(lldev); + if (rc) { + dev_err(lldev->dev, "channel reinitialize failed after error\n"); + return; + } + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); +} + +/* + * The interrupt handler for HIDMA will try to consume as many pending + * EVRE from the event queue as possible. Each EVRE has an associated + * TRE that holds the user interface parameters. EVRE reports the + * result of the transaction. Hardware guarantees ordering between EVREs + * and TREs. We use last processed offset to figure out which TRE is + * associated with which EVRE. If two TREs are consumed by HW, the EVREs + * are in order in the event ring. + * + * This handler will do a one pass for consuming EVREs. Other EVREs may + * be delivered while we are working. It will try to consume incoming + * EVREs one more time and return. + * + * For unprocessed EVREs, hardware will trigger another interrupt until + * all the interrupt bits are cleared. + * + * Hardware guarantees that by the time interrupt is observed, all data + * transactions in flight are delivered to their respective places and + * are visible to the CPU. + * + * On demand paging for IOMMU is only supported for PCIe via PRI + * (Page Request Interface) not for HIDMA. All other hardware instances + * including HIDMA work on pinned DMA addresses. + * + * HIDMA is not aware of IOMMU presence since it follows the DMA API. All + * IOMMU latency will be built into the data movement time. By the time + * interrupt happens, IOMMU lookups + data movement has already taken place. + * + * While the first read in a typical PCI endpoint ISR flushes all outstanding + * requests traditionally to the destination, this concept does not apply + * here for this HW. + */ +irqreturn_t hidma_ll_inthandler(int chirq, void *arg) +{ + struct hidma_lldev *lldev = arg; + u32 status; + u32 enable; + u32 cause; + + /* + * Fine tuned for this HW... + * + * This ISR has been designed for this particular hardware. Relaxed + * read and write accessors are used for performance reasons due to + * interrupt delivery guarantees. Do not copy this code blindly and + * expect that to work. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + + while (cause) { + if (cause & HIDMA_ERR_INT_MASK) { + dev_err(lldev->dev, "error 0x%x, resetting...\n", + cause); + + /* Clear out pending interrupts */ + writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + tasklet_schedule(&lldev->rst_task); + goto out; + } + + /* + * Try to consume as many EVREs as possible. + */ + hidma_handle_tre_completion(lldev); + + /* We consumed TREs or there are pending TREs or EVREs. */ + writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* + * Another interrupt might have arrived while we are + * processing this one. Read the new cause. + */ + status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + cause = status & enable; + } + +out: + return IRQ_HANDLED; +} + +int hidma_ll_enable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "event channel did not get enabled\n"); + return ret; + } + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_ENABLE << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + hidma_is_chan_enabled(HIDMA_CH_STATE(val)), + 1000, 10000); + if (ret) { + dev_err(lldev->dev, "transfer channel did not get enabled\n"); + return ret; + } + + lldev->trch_state = HIDMA_CH_ENABLED; + lldev->evch_state = HIDMA_CH_ENABLED; + + return 0; +} + +void hidma_ll_start(struct hidma_lldev *lldev) +{ + unsigned long irqflags; + + spin_lock_irqsave(&lldev->lock, irqflags); + writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG); + spin_unlock_irqrestore(&lldev->lock, irqflags); +} + +bool hidma_ll_isenabled(struct hidma_lldev *lldev) +{ + u32 val; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + + /* both channels have to be enabled before calling this function */ + if (hidma_is_chan_enabled(lldev->trch_state) && + hidma_is_chan_enabled(lldev->evch_state)) + return true; + + return false; +} + +void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch) +{ + struct hidma_tre *tre; + unsigned long flags; + + tre = &lldev->trepool[tre_ch]; + + /* copy the TRE into its location in the TRE ring */ + spin_lock_irqsave(&lldev->lock, flags); + tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE; + lldev->pending_tre_list[tre->tre_index] = tre; + memcpy(lldev->tre_ring + lldev->tre_write_offset, + &tre->tre_local[0], HIDMA_TRE_SIZE); + tre->err_code = 0; + tre->err_info = 0; + tre->queued = 1; + lldev->pending_tre_count++; + lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE) + % lldev->tre_ring_size; + spin_unlock_irqrestore(&lldev->lock, flags); +} + +/* + * Note that even though we stop this channel if there is a pending transaction + * in flight it will complete and follow the callback. This request will + * prevent further requests to be made. + */ +int hidma_ll_disable(struct hidma_lldev *lldev) +{ + u32 val; + int ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + lldev->evch_state = HIDMA_CH_STATE(val); + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + lldev->trch_state = HIDMA_CH_STATE(val); + + /* already suspended by this OS */ + if ((lldev->trch_state == HIDMA_CH_SUSPENDED) || + (lldev->evch_state == HIDMA_CH_SUSPENDED)) + return 0; + + /* already stopped by the manager */ + if ((lldev->trch_state == HIDMA_CH_STOPPED) || + (lldev->evch_state == HIDMA_CH_STOPPED)) + return 0; + + val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed. + * Do a polled read up to 1ms and 10ms maximum. + */ + ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + val &= ~(HIDMA_CH_CONTROL_MASK << 16); + val |= HIDMA_CH_SUSPEND << 16; + writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG); + + /* + * Start the wait right after the suspend is confirmed + * Delay up to 10ms after reset to allow DMA logic to quiesce. + */ + ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val, + HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED, + 1000, 10000); + if (ret) + return ret; + + lldev->trch_state = HIDMA_CH_SUSPENDED; + lldev->evch_state = HIDMA_CH_SUSPENDED; + return 0; +} + +void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch, + dma_addr_t src, dma_addr_t dest, u32 len, + u32 flags) +{ + struct hidma_tre *tre; + u32 *tre_local; + + if (tre_ch >= lldev->nr_tres) { + dev_err(lldev->dev, "invalid TRE number in transfer params:%d", + tre_ch); + return; + } + + tre = &lldev->trepool[tre_ch]; + if (atomic_read(&tre->allocated) != true) { + dev_err(lldev->dev, "trying to set params on an unused TRE:%d", + tre_ch); + return; + } + + tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_LEN_IDX] = len; + tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src); + tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src); + tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest); + tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest); + tre->int_flags = flags; +} + +/* + * Called during initialization and after an error condition + * to restore hardware state. + */ +int hidma_ll_setup(struct hidma_lldev *lldev) +{ + int rc; + u64 addr; + u32 val; + u32 nr_tres = lldev->nr_tres; + + lldev->pending_tre_count = 0; + lldev->tre_processed_off = 0; + lldev->evre_processed_off = 0; + lldev->tre_write_offset = 0; + + /* disable interrupts */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + /* clear all pending interrupts */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + rc = hidma_ll_reset(lldev); + if (rc) + return rc; + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + + /* disable interrupts again after reset */ + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + addr = lldev->tre_dma; + writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG); + writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG); + + addr = lldev->evre_dma; + writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG); + writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG); + writel(HIDMA_EVRE_SIZE * nr_tres, + lldev->evca + HIDMA_EVCA_RING_LEN_REG); + + /* support IRQ only for now */ + val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG); + val &= ~0xF; + val |= 0x1; + writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG); + + /* clear all pending interrupts and enable them */ + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + + return hidma_ll_enable(lldev); +} + +struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres, + void __iomem *trca, void __iomem *evca, + u8 chidx) +{ + u32 required_bytes; + struct hidma_lldev *lldev; + int rc; + size_t sz; + + if (!trca || !evca || !dev || !nr_tres) + return NULL; + + /* need at least four TREs */ + if (nr_tres < 4) + return NULL; + + /* need an extra space */ + nr_tres += 1; + + lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL); + if (!lldev) + return NULL; + + lldev->evca = evca; + lldev->trca = trca; + lldev->dev = dev; + sz = sizeof(struct hidma_tre); + lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL); + if (!lldev->trepool) + return NULL; + + required_bytes = sizeof(lldev->pending_tre_list[0]); + lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes, + GFP_KERNEL); + if (!lldev->pending_tre_list) + return NULL; + + sz = (HIDMA_TRE_SIZE + 1) * nr_tres; + lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma, + GFP_KERNEL); + if (!lldev->tre_ring) + return NULL; + + memset(lldev->tre_ring, 0, (HIDMA_TRE_SIZE + 1) * nr_tres); + lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres; + lldev->nr_tres = nr_tres; + + /* the TRE ring has to be TRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) { + u8 tre_ring_shift; + + tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE; + tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift; + lldev->tre_dma += tre_ring_shift; + lldev->tre_ring += tre_ring_shift; + } + + sz = (HIDMA_EVRE_SIZE + 1) * nr_tres; + lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma, + GFP_KERNEL); + if (!lldev->evre_ring) + return NULL; + + memset(lldev->evre_ring, 0, (HIDMA_EVRE_SIZE + 1) * nr_tres); + lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres; + + /* the EVRE ring has to be EVRE_SIZE aligned */ + if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) { + u8 evre_ring_shift; + + evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE; + evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift; + lldev->evre_dma += evre_ring_shift; + lldev->evre_ring += evre_ring_shift; + } + lldev->nr_tres = nr_tres; + lldev->chidx = chidx; + + sz = nr_tres * sizeof(struct hidma_tre *); + rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL); + if (rc) + return NULL; + + rc = hidma_ll_setup(lldev); + if (rc) + return NULL; + + spin_lock_init(&lldev->lock); + tasklet_init(&lldev->rst_task, hidma_ll_abort, (unsigned long)lldev); + tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev); + lldev->initialized = 1; + writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return lldev; +} + +int hidma_ll_uninit(struct hidma_lldev *lldev) +{ + u32 required_bytes; + int rc = 0; + u32 val; + + if (!lldev) + return -ENODEV; + + if (!lldev->initialized) + return 0; + + lldev->initialized = 0; + + required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres; + tasklet_kill(&lldev->task); + memset(lldev->trepool, 0, required_bytes); + lldev->trepool = NULL; + lldev->pending_tre_count = 0; + lldev->tre_write_offset = 0; + + rc = hidma_ll_reset(lldev); + + /* + * Clear all pending interrupts again. + * Otherwise, we observe reset complete interrupts. + */ + val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG); + writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG); + writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG); + return rc; +} + +enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch) +{ + enum dma_status ret = DMA_ERROR; + struct hidma_tre *tre; + unsigned long flags; + u8 err_code; + + spin_lock_irqsave(&lldev->lock, flags); + + tre = &lldev->trepool[tre_ch]; + err_code = tre->err_code; + + if (err_code & HIDMA_EVRE_STATUS_COMPLETE) + ret = DMA_COMPLETE; + else if (err_code & HIDMA_EVRE_STATUS_ERROR) + ret = DMA_ERROR; + else + ret = DMA_IN_PROGRESS; + spin_unlock_irqrestore(&lldev->lock, flags); + + return ret; +} diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index ef491b893..c0e365321 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine Management interface * - * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,13 +17,14 @@ #include <linux/acpi.h> #include <linux/of.h> #include <linux/property.h> -#include <linux/interrupt.h> -#include <linux/platform_device.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/pm_runtime.h> #include <linux/bitops.h> +#include <linux/dma-mapping.h> #include "hidma_mgmt.h" @@ -298,5 +299,109 @@ static struct platform_driver hidma_mgmt_driver = { }, }; -module_platform_driver(hidma_mgmt_driver); +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) +static int object_counter; + +static int __init hidma_mgmt_of_populate_channels(struct device_node *np) +{ + struct platform_device *pdev_parent = of_find_device_by_node(np); + struct platform_device_info pdevinfo; + struct of_phandle_args out_irq; + struct device_node *child; + struct resource *res; + const __be32 *cell; + int ret = 0, size, i, num; + u64 addr, addr_size; + + for_each_available_child_of_node(np, child) { + struct resource *res_iter; + struct platform_device *new_pdev; + + cell = of_get_property(child, "reg", &size); + if (!cell) { + ret = -EINVAL; + goto out; + } + + size /= sizeof(*cell); + num = size / + (of_n_addr_cells(child) + of_n_size_cells(child)) + 1; + + /* allocate a resource array */ + res = kcalloc(num, sizeof(*res), GFP_KERNEL); + if (!res) { + ret = -ENOMEM; + goto out; + } + + /* read each reg value */ + i = 0; + res_iter = res; + while (i < size) { + addr = of_read_number(&cell[i], + of_n_addr_cells(child)); + i += of_n_addr_cells(child); + + addr_size = of_read_number(&cell[i], + of_n_size_cells(child)); + i += of_n_size_cells(child); + + res_iter->start = addr; + res_iter->end = res_iter->start + addr_size - 1; + res_iter->flags = IORESOURCE_MEM; + res_iter++; + } + + ret = of_irq_parse_one(child, 0, &out_irq); + if (ret) + goto out; + + res_iter->start = irq_create_of_mapping(&out_irq); + res_iter->name = "hidma event irq"; + res_iter->flags = IORESOURCE_IRQ; + + memset(&pdevinfo, 0, sizeof(pdevinfo)); + pdevinfo.fwnode = &child->fwnode; + pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL; + pdevinfo.name = child->name; + pdevinfo.id = object_counter++; + pdevinfo.res = res; + pdevinfo.num_res = num; + pdevinfo.data = NULL; + pdevinfo.size_data = 0; + pdevinfo.dma_mask = DMA_BIT_MASK(64); + new_pdev = platform_device_register_full(&pdevinfo); + if (!new_pdev) { + ret = -ENODEV; + goto out; + } + of_dma_configure(&new_pdev->dev, child); + + kfree(res); + res = NULL; + } +out: + kfree(res); + + return ret; +} +#endif + +static int __init hidma_mgmt_init(void) +{ +#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ) + struct device_node *child; + + for (child = of_find_matching_node(NULL, hidma_mgmt_match); child; + child = of_find_matching_node(child, hidma_mgmt_match)) { + /* device tree based firmware here */ + hidma_mgmt_of_populate_channels(child); + of_node_put(child); + } +#endif + platform_driver_register(&hidma_mgmt_driver); + + return 0; +} +module_init(hidma_mgmt_init); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c index e0df233dd..57aa227bf 100644 --- a/drivers/dma/sun4i-dma.c +++ b/drivers/dma/sun4i-dma.c @@ -461,25 +461,25 @@ generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, /* Source burst */ ret = convert_burst(sconfig->src_maxburst); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); /* Destination burst */ ret = convert_burst(sconfig->dst_maxburst); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); /* Source bus width */ ret = convert_buswidth(sconfig->src_addr_width); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); /* Destination bus width */ ret = convert_buswidth(sconfig->dst_addr_width); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); @@ -518,25 +518,25 @@ generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, /* Source burst */ ret = convert_burst(sconfig->src_maxburst); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); /* Destination burst */ ret = convert_burst(sconfig->dst_maxburst); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); /* Source bus width */ ret = convert_buswidth(sconfig->src_addr_width); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); /* Destination bus width */ ret = convert_buswidth(sconfig->dst_addr_width); - if (IS_ERR_VALUE(ret)) + if (ret < 0) goto fail; promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 2db12e493..5065ca43f 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -146,6 +146,8 @@ struct sun6i_vchan { struct dma_slave_config cfg; struct sun6i_pchan *phy; u8 port; + u8 irq_type; + bool cyclic; }; struct sun6i_dma_dev { @@ -254,6 +256,30 @@ static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width) return addr_width >> 1; } +static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan) +{ + struct sun6i_desc *txd = pchan->desc; + struct sun6i_dma_lli *lli; + size_t bytes; + dma_addr_t pos; + + pos = readl(pchan->base + DMA_CHAN_LLI_ADDR); + bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + + if (pos == LLI_LAST_ITEM) + return bytes; + + for (lli = txd->v_lli; lli; lli = lli->v_lli_next) { + if (lli->p_lli_next == pos) { + for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next) + bytes += lli->len; + break; + } + } + + return bytes; +} + static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, struct sun6i_dma_lli *next, dma_addr_t next_phy, @@ -276,45 +302,6 @@ static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, return next; } -static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, - dma_addr_t src, - dma_addr_t dst, u32 len, - struct dma_slave_config *config) -{ - u8 src_width, dst_width, src_burst, dst_burst; - - if (!config) - return -EINVAL; - - src_burst = convert_burst(config->src_maxburst); - if (src_burst) - return src_burst; - - dst_burst = convert_burst(config->dst_maxburst); - if (dst_burst) - return dst_burst; - - src_width = convert_buswidth(config->src_addr_width); - if (src_width) - return src_width; - - dst_width = convert_buswidth(config->dst_addr_width); - if (dst_width) - return dst_width; - - lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | - DMA_CHAN_CFG_SRC_WIDTH(src_width) | - DMA_CHAN_CFG_DST_BURST(dst_burst) | - DMA_CHAN_CFG_DST_WIDTH(dst_width); - - lli->src = src; - lli->dst = dst; - lli->len = len; - lli->para = NORMAL_WAIT; - - return 0; -} - static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan, struct sun6i_dma_lli *lli) { @@ -381,9 +368,13 @@ static int sun6i_dma_start_desc(struct sun6i_vchan *vchan) irq_reg = pchan->idx / DMA_IRQ_CHAN_NR; irq_offset = pchan->idx % DMA_IRQ_CHAN_NR; - irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset)); - irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH); - writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset)); + vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE; + + irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg)); + irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) << + (irq_offset * DMA_IRQ_CHAN_WIDTH)); + irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH); + writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg)); writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR); writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE); @@ -479,11 +470,12 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) writel(status, sdev->base + DMA_IRQ_STAT(i)); for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) { - if (status & DMA_IRQ_QUEUE) { - pchan = sdev->pchans + j; - vchan = pchan->vchan; - - if (vchan) { + pchan = sdev->pchans + j; + vchan = pchan->vchan; + if (vchan && (status & vchan->irq_type)) { + if (vchan->cyclic) { + vchan_cyclic_callback(&pchan->desc->vd); + } else { spin_lock(&vchan->vc.lock); vchan_cookie_complete(&pchan->desc->vd); pchan->done = pchan->desc; @@ -502,6 +494,55 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) return ret; } +static int set_config(struct sun6i_dma_dev *sdev, + struct dma_slave_config *sconfig, + enum dma_transfer_direction direction, + u32 *p_cfg) +{ + s8 src_width, dst_width, src_burst, dst_burst; + + switch (direction) { + case DMA_MEM_TO_DEV: + src_burst = convert_burst(sconfig->src_maxburst ? + sconfig->src_maxburst : 8); + src_width = convert_buswidth(sconfig->src_addr_width != + DMA_SLAVE_BUSWIDTH_UNDEFINED ? + sconfig->src_addr_width : + DMA_SLAVE_BUSWIDTH_4_BYTES); + dst_burst = convert_burst(sconfig->dst_maxburst); + dst_width = convert_buswidth(sconfig->dst_addr_width); + break; + case DMA_DEV_TO_MEM: + src_burst = convert_burst(sconfig->src_maxburst); + src_width = convert_buswidth(sconfig->src_addr_width); + dst_burst = convert_burst(sconfig->dst_maxburst ? + sconfig->dst_maxburst : 8); + dst_width = convert_buswidth(sconfig->dst_addr_width != + DMA_SLAVE_BUSWIDTH_UNDEFINED ? + sconfig->dst_addr_width : + DMA_SLAVE_BUSWIDTH_4_BYTES); + break; + default: + return -EINVAL; + } + + if (src_burst < 0) + return src_burst; + if (src_width < 0) + return src_width; + if (dst_burst < 0) + return dst_burst; + if (dst_width < 0) + return dst_width; + + *p_cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | + DMA_CHAN_CFG_SRC_WIDTH(src_width) | + DMA_CHAN_CFG_DST_BURST(dst_burst) | + DMA_CHAN_CFG_DST_WIDTH(dst_width); + + return 0; +} + static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -569,13 +610,15 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( struct sun6i_desc *txd; struct scatterlist *sg; dma_addr_t p_lli; + u32 lli_cfg; int i, ret; if (!sgl) return NULL; - if (!is_slave_direction(dir)) { - dev_err(chan2dev(chan), "Invalid DMA direction\n"); + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); return NULL; } @@ -588,14 +631,14 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( if (!v_lli) goto err_lli_free; - if (dir == DMA_MEM_TO_DEV) { - ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg), - sconfig->dst_addr, sg_dma_len(sg), - sconfig); - if (ret) - goto err_cur_lli_free; + v_lli->len = sg_dma_len(sg); + v_lli->para = NORMAL_WAIT; - v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE | + if (dir == DMA_MEM_TO_DEV) { + v_lli->src = sg_dma_address(sg); + v_lli->dst = sconfig->dst_addr; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_IO_MODE | DMA_CHAN_CFG_SRC_LINEAR_MODE | DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(vchan->port); @@ -607,13 +650,10 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( sg_dma_len(sg), flags); } else { - ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr, - sg_dma_address(sg), sg_dma_len(sg), - sconfig); - if (ret) - goto err_cur_lli_free; - - v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE | + v_lli->src = sconfig->src_addr; + v_lli->dst = sg_dma_address(sg); + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_LINEAR_MODE | DMA_CHAN_CFG_SRC_IO_MODE | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_SRC_DRQ(vchan->port); @@ -634,8 +674,78 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( return vchan_tx_prep(&vchan->vc, &txd->vd, flags); -err_cur_lli_free: - dma_pool_free(sdev->pool, v_lli, p_lli); +err_lli_free: + for (prev = txd->v_lli; prev; prev = prev->v_lli_next) + dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); + kfree(txd); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic( + struct dma_chan *chan, + dma_addr_t buf_addr, + size_t buf_len, + size_t period_len, + enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + dma_addr_t p_lli; + u32 lli_cfg; + unsigned int i, periods = buf_len / period_len; + int ret; + + ret = set_config(sdev, sconfig, dir, &lli_cfg); + if (ret) { + dev_err(chan2dev(chan), "Invalid DMA configuration\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for (i = 0; i < periods; i++) { + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + goto err_lli_free; + } + + v_lli->len = period_len; + v_lli->para = NORMAL_WAIT; + + if (dir == DMA_MEM_TO_DEV) { + v_lli->src = buf_addr + period_len * i; + v_lli->dst = sconfig->dst_addr; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_IO_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(vchan->port); + } else { + v_lli->src = sconfig->src_addr; + v_lli->dst = buf_addr + period_len * i; + v_lli->cfg = lli_cfg | + DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_IO_MODE | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_SRC_DRQ(vchan->port); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + prev->p_lli_next = txd->p_lli; /* cyclic list */ + + vchan->cyclic = true; + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + err_lli_free: for (prev = txd->v_lli; prev; prev = prev->v_lli_next) dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); @@ -712,6 +822,16 @@ static int sun6i_dma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&vchan->vc.lock, flags); + if (vchan->cyclic) { + vchan->cyclic = false; + if (pchan && pchan->desc) { + struct virt_dma_desc *vd = &pchan->desc->vd; + struct virt_dma_chan *vc = &vchan->vc; + + list_add_tail(&vd->node, &vc->desc_completed); + } + } + vchan_get_all_descriptors(&vchan->vc, &head); if (pchan) { @@ -759,7 +879,7 @@ static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan, } else if (!pchan || !pchan->desc) { bytes = 0; } else { - bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + bytes = sun6i_get_chan_size(pchan); } spin_unlock_irqrestore(&vchan->vc.lock, flags); @@ -963,6 +1083,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask); dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask); dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask); INIT_LIST_HEAD(&sdc->slave.channels); sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources; @@ -970,6 +1091,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; + sdc->slave.device_prep_dma_cyclic = sun6i_dma_prep_dma_cyclic; sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 3871f29e5..01e316f73 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -54,6 +54,7 @@ #define TEGRA_APBDMA_CSR_ONCE BIT(27) #define TEGRA_APBDMA_CSR_FLOW BIT(21) #define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT 16 +#define TEGRA_APBDMA_CSR_REQ_SEL_MASK 0x1F #define TEGRA_APBDMA_CSR_WCOUNT_MASK 0xFFFC /* STATUS register */ @@ -114,6 +115,8 @@ /* Channel base address offset from APBDMA base address */ #define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET 0x1000 +#define TEGRA_APBDMA_SLAVE_ID_INVALID (TEGRA_APBDMA_CSR_REQ_SEL_MASK + 1) + struct tegra_dma; /* @@ -353,8 +356,11 @@ static int tegra_dma_slave_config(struct dma_chan *dc, } memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig)); - if (!tdc->slave_id) + if (tdc->slave_id == TEGRA_APBDMA_SLAVE_ID_INVALID) { + if (sconfig->slave_id > TEGRA_APBDMA_CSR_REQ_SEL_MASK) + return -EINVAL; tdc->slave_id = sconfig->slave_id; + } tdc->config_init = true; return 0; } @@ -1236,7 +1242,7 @@ static void tegra_dma_free_chan_resources(struct dma_chan *dc) } pm_runtime_put(tdma->dev); - tdc->slave_id = 0; + tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID; } static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -1246,6 +1252,11 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, struct dma_chan *chan; struct tegra_dma_channel *tdc; + if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) { + dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]); + return NULL; + } + chan = dma_get_any_slave_channel(&tdma->dma_dev); if (!chan) return NULL; @@ -1389,6 +1400,7 @@ static int tegra_dma_probe(struct platform_device *pdev) &tdma->dma_dev.channels); tdc->tdma = tdma; tdc->id = i; + tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID; tasklet_init(&tdc->tasklet, tegra_dma_tasklet, (unsigned long)tdc); diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c new file mode 100644 index 000000000..c4b121c45 --- /dev/null +++ b/drivers/dma/tegra210-adma.c @@ -0,0 +1,840 @@ +/* + * ADMA driver for Nvidia's Tegra210 ADMA controller. + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/of_irq.h> +#include <linux/pm_clock.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> + +#include "virt-dma.h" + +#define ADMA_CH_CMD 0x00 +#define ADMA_CH_STATUS 0x0c +#define ADMA_CH_STATUS_XFER_EN BIT(0) + +#define ADMA_CH_INT_STATUS 0x10 +#define ADMA_CH_INT_STATUS_XFER_DONE BIT(0) + +#define ADMA_CH_INT_CLEAR 0x1c +#define ADMA_CH_CTRL 0x24 +#define ADMA_CH_CTRL_TX_REQ(val) (((val) & 0xf) << 28) +#define ADMA_CH_CTRL_TX_REQ_MAX 10 +#define ADMA_CH_CTRL_RX_REQ(val) (((val) & 0xf) << 24) +#define ADMA_CH_CTRL_RX_REQ_MAX 10 +#define ADMA_CH_CTRL_DIR(val) (((val) & 0xf) << 12) +#define ADMA_CH_CTRL_DIR_AHUB2MEM 2 +#define ADMA_CH_CTRL_DIR_MEM2AHUB 4 +#define ADMA_CH_CTRL_MODE_CONTINUOUS (2 << 8) +#define ADMA_CH_CTRL_FLOWCTRL_EN BIT(1) + +#define ADMA_CH_CONFIG 0x28 +#define ADMA_CH_CONFIG_SRC_BUF(val) (((val) & 0x7) << 28) +#define ADMA_CH_CONFIG_TRG_BUF(val) (((val) & 0x7) << 24) +#define ADMA_CH_CONFIG_BURST_SIZE(val) (((val) & 0x7) << 20) +#define ADMA_CH_CONFIG_BURST_16 5 +#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf) +#define ADMA_CH_CONFIG_MAX_BUFS 8 + +#define ADMA_CH_FIFO_CTRL 0x2c +#define ADMA_CH_FIFO_CTRL_OVRFW_THRES(val) (((val) & 0xf) << 24) +#define ADMA_CH_FIFO_CTRL_STARV_THRES(val) (((val) & 0xf) << 16) +#define ADMA_CH_FIFO_CTRL_TX_SIZE(val) (((val) & 0xf) << 8) +#define ADMA_CH_FIFO_CTRL_RX_SIZE(val) ((val) & 0xf) + +#define ADMA_CH_LOWER_SRC_ADDR 0x34 +#define ADMA_CH_LOWER_TRG_ADDR 0x3c +#define ADMA_CH_TC 0x44 +#define ADMA_CH_TC_COUNT_MASK 0x3ffffffc + +#define ADMA_CH_XFER_STATUS 0x54 +#define ADMA_CH_XFER_STATUS_COUNT_MASK 0xffff + +#define ADMA_GLOBAL_CMD 0xc00 +#define ADMA_GLOBAL_SOFT_RESET 0xc04 +#define ADMA_GLOBAL_INT_CLEAR 0xc20 +#define ADMA_GLOBAL_CTRL 0xc24 + +#define ADMA_CH_REG_OFFSET(a) (a * 0x80) + +#define ADMA_CH_FIFO_CTRL_DEFAULT (ADMA_CH_FIFO_CTRL_OVRFW_THRES(1) | \ + ADMA_CH_FIFO_CTRL_STARV_THRES(1) | \ + ADMA_CH_FIFO_CTRL_TX_SIZE(3) | \ + ADMA_CH_FIFO_CTRL_RX_SIZE(3)) +struct tegra_adma; + +/* + * struct tegra_adma_chip_data - Tegra chip specific data + * @nr_channels: Number of DMA channels available. + */ +struct tegra_adma_chip_data { + int nr_channels; +}; + +/* + * struct tegra_adma_chan_regs - Tegra ADMA channel registers + */ +struct tegra_adma_chan_regs { + unsigned int ctrl; + unsigned int config; + unsigned int src_addr; + unsigned int trg_addr; + unsigned int fifo_ctrl; + unsigned int tc; +}; + +/* + * struct tegra_adma_desc - Tegra ADMA descriptor to manage transfer requests. + */ +struct tegra_adma_desc { + struct virt_dma_desc vd; + struct tegra_adma_chan_regs ch_regs; + size_t buf_len; + size_t period_len; + size_t num_periods; +}; + +/* + * struct tegra_adma_chan - Tegra ADMA channel information + */ +struct tegra_adma_chan { + struct virt_dma_chan vc; + struct tegra_adma_desc *desc; + struct tegra_adma *tdma; + int irq; + void __iomem *chan_addr; + + /* Slave channel configuration info */ + struct dma_slave_config sconfig; + enum dma_transfer_direction sreq_dir; + unsigned int sreq_index; + bool sreq_reserved; + + /* Transfer count and position info */ + unsigned int tx_buf_count; + unsigned int tx_buf_pos; +}; + +/* + * struct tegra_adma - Tegra ADMA controller information + */ +struct tegra_adma { + struct dma_device dma_dev; + struct device *dev; + void __iomem *base_addr; + unsigned int nr_channels; + unsigned long rx_requests_reserved; + unsigned long tx_requests_reserved; + + /* Used to store global command register state when suspending */ + unsigned int global_cmd; + + /* Last member of the structure */ + struct tegra_adma_chan channels[0]; +}; + +static inline void tdma_write(struct tegra_adma *tdma, u32 reg, u32 val) +{ + writel(val, tdma->base_addr + reg); +} + +static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg) +{ + return readl(tdma->base_addr + reg); +} + +static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val) +{ + writel(val, tdc->chan_addr + reg); +} + +static inline u32 tdma_ch_read(struct tegra_adma_chan *tdc, u32 reg) +{ + return readl(tdc->chan_addr + reg); +} + +static inline struct tegra_adma_chan *to_tegra_adma_chan(struct dma_chan *dc) +{ + return container_of(dc, struct tegra_adma_chan, vc.chan); +} + +static inline struct tegra_adma_desc *to_tegra_adma_desc( + struct dma_async_tx_descriptor *td) +{ + return container_of(td, struct tegra_adma_desc, vd.tx); +} + +static inline struct device *tdc2dev(struct tegra_adma_chan *tdc) +{ + return tdc->tdma->dev; +} + +static void tegra_adma_desc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct tegra_adma_desc, vd)); +} + +static int tegra_adma_slave_config(struct dma_chan *dc, + struct dma_slave_config *sconfig) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + + memcpy(&tdc->sconfig, sconfig, sizeof(*sconfig)); + + return 0; +} + +static int tegra_adma_init(struct tegra_adma *tdma) +{ + u32 status; + int ret; + + /* Clear any interrupts */ + tdma_write(tdma, ADMA_GLOBAL_INT_CLEAR, 0x1); + + /* Assert soft reset */ + tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1); + + /* Wait for reset to clear */ + ret = readx_poll_timeout(readl, + tdma->base_addr + ADMA_GLOBAL_SOFT_RESET, + status, status == 0, 20, 10000); + if (ret) + return ret; + + /* Enable global ADMA registers */ + tdma_write(tdma, ADMA_GLOBAL_CMD, 1); + + return 0; +} + +static int tegra_adma_request_alloc(struct tegra_adma_chan *tdc, + enum dma_transfer_direction direction) +{ + struct tegra_adma *tdma = tdc->tdma; + unsigned int sreq_index = tdc->sreq_index; + + if (tdc->sreq_reserved) + return tdc->sreq_dir == direction ? 0 : -EINVAL; + + switch (direction) { + case DMA_MEM_TO_DEV: + if (sreq_index > ADMA_CH_CTRL_TX_REQ_MAX) { + dev_err(tdma->dev, "invalid DMA request\n"); + return -EINVAL; + } + + if (test_and_set_bit(sreq_index, &tdma->tx_requests_reserved)) { + dev_err(tdma->dev, "DMA request reserved\n"); + return -EINVAL; + } + break; + + case DMA_DEV_TO_MEM: + if (sreq_index > ADMA_CH_CTRL_RX_REQ_MAX) { + dev_err(tdma->dev, "invalid DMA request\n"); + return -EINVAL; + } + + if (test_and_set_bit(sreq_index, &tdma->rx_requests_reserved)) { + dev_err(tdma->dev, "DMA request reserved\n"); + return -EINVAL; + } + break; + + default: + dev_WARN(tdma->dev, "channel %s has invalid transfer type\n", + dma_chan_name(&tdc->vc.chan)); + return -EINVAL; + } + + tdc->sreq_dir = direction; + tdc->sreq_reserved = true; + + return 0; +} + +static void tegra_adma_request_free(struct tegra_adma_chan *tdc) +{ + struct tegra_adma *tdma = tdc->tdma; + + if (!tdc->sreq_reserved) + return; + + switch (tdc->sreq_dir) { + case DMA_MEM_TO_DEV: + clear_bit(tdc->sreq_index, &tdma->tx_requests_reserved); + break; + + case DMA_DEV_TO_MEM: + clear_bit(tdc->sreq_index, &tdma->rx_requests_reserved); + break; + + default: + dev_WARN(tdma->dev, "channel %s has invalid transfer type\n", + dma_chan_name(&tdc->vc.chan)); + return; + } + + tdc->sreq_reserved = false; +} + +static u32 tegra_adma_irq_status(struct tegra_adma_chan *tdc) +{ + u32 status = tdma_ch_read(tdc, ADMA_CH_INT_STATUS); + + return status & ADMA_CH_INT_STATUS_XFER_DONE; +} + +static u32 tegra_adma_irq_clear(struct tegra_adma_chan *tdc) +{ + u32 status = tegra_adma_irq_status(tdc); + + if (status) + tdma_ch_write(tdc, ADMA_CH_INT_CLEAR, status); + + return status; +} + +static void tegra_adma_stop(struct tegra_adma_chan *tdc) +{ + unsigned int status; + + /* Disable ADMA */ + tdma_ch_write(tdc, ADMA_CH_CMD, 0); + + /* Clear interrupt status */ + tegra_adma_irq_clear(tdc); + + if (readx_poll_timeout_atomic(readl, tdc->chan_addr + ADMA_CH_STATUS, + status, !(status & ADMA_CH_STATUS_XFER_EN), + 20, 10000)) { + dev_err(tdc2dev(tdc), "unable to stop DMA channel\n"); + return; + } + + kfree(tdc->desc); + tdc->desc = NULL; +} + +static void tegra_adma_start(struct tegra_adma_chan *tdc) +{ + struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc); + struct tegra_adma_chan_regs *ch_regs; + struct tegra_adma_desc *desc; + + if (!vd) + return; + + list_del(&vd->node); + + desc = to_tegra_adma_desc(&vd->tx); + + if (!desc) { + dev_warn(tdc2dev(tdc), "unable to start DMA, no descriptor\n"); + return; + } + + ch_regs = &desc->ch_regs; + + tdc->tx_buf_pos = 0; + tdc->tx_buf_count = 0; + tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl); + tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr); + tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr); + tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl); + tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config); + + /* Start ADMA */ + tdma_ch_write(tdc, ADMA_CH_CMD, 1); + + tdc->desc = desc; +} + +static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc) +{ + struct tegra_adma_desc *desc = tdc->desc; + unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1; + unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS); + unsigned int periods_remaining; + + /* + * Handle wrap around of buffer count register + */ + if (pos < tdc->tx_buf_pos) + tdc->tx_buf_count += pos + (max - tdc->tx_buf_pos); + else + tdc->tx_buf_count += pos - tdc->tx_buf_pos; + + periods_remaining = tdc->tx_buf_count % desc->num_periods; + tdc->tx_buf_pos = pos; + + return desc->buf_len - (periods_remaining * desc->period_len); +} + +static irqreturn_t tegra_adma_isr(int irq, void *dev_id) +{ + struct tegra_adma_chan *tdc = dev_id; + unsigned long status; + unsigned long flags; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + status = tegra_adma_irq_clear(tdc); + if (status == 0 || !tdc->desc) { + spin_unlock_irqrestore(&tdc->vc.lock, flags); + return IRQ_NONE; + } + + vchan_cyclic_callback(&tdc->desc->vd); + + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + return IRQ_HANDLED; +} + +static void tegra_adma_issue_pending(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + unsigned long flags; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (vchan_issue_pending(&tdc->vc)) { + if (!tdc->desc) + tegra_adma_start(tdc); + } + + spin_unlock_irqrestore(&tdc->vc.lock, flags); +} + +static int tegra_adma_terminate_all(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&tdc->vc.lock, flags); + + if (tdc->desc) + tegra_adma_stop(tdc); + + tegra_adma_request_free(tdc); + vchan_get_all_descriptors(&tdc->vc, &head); + spin_unlock_irqrestore(&tdc->vc.lock, flags); + vchan_dma_desc_free_list(&tdc->vc, &head); + + return 0; +} + +static enum dma_status tegra_adma_tx_status(struct dma_chan *dc, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + unsigned int residual; + + ret = dma_cookie_status(dc, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&tdc->vc.lock, flags); + + vd = vchan_find_desc(&tdc->vc, cookie); + if (vd) { + desc = to_tegra_adma_desc(&vd->tx); + residual = desc->ch_regs.tc; + } else if (tdc->desc && tdc->desc->vd.tx.cookie == cookie) { + residual = tegra_adma_get_residue(tdc); + } else { + residual = 0; + } + + spin_unlock_irqrestore(&tdc->vc.lock, flags); + + dma_set_residue(txstate, residual); + + return ret; +} + +static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc, + struct tegra_adma_desc *desc, + dma_addr_t buf_addr, + enum dma_transfer_direction direction) +{ + struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs; + unsigned int burst_size, adma_dir; + + if (desc->num_periods > ADMA_CH_CONFIG_MAX_BUFS) + return -EINVAL; + + switch (direction) { + case DMA_MEM_TO_DEV: + adma_dir = ADMA_CH_CTRL_DIR_MEM2AHUB; + burst_size = fls(tdc->sconfig.dst_maxburst); + ch_regs->config = ADMA_CH_CONFIG_SRC_BUF(desc->num_periods - 1); + ch_regs->ctrl = ADMA_CH_CTRL_TX_REQ(tdc->sreq_index); + ch_regs->src_addr = buf_addr; + break; + + case DMA_DEV_TO_MEM: + adma_dir = ADMA_CH_CTRL_DIR_AHUB2MEM; + burst_size = fls(tdc->sconfig.src_maxburst); + ch_regs->config = ADMA_CH_CONFIG_TRG_BUF(desc->num_periods - 1); + ch_regs->ctrl = ADMA_CH_CTRL_RX_REQ(tdc->sreq_index); + ch_regs->trg_addr = buf_addr; + break; + + default: + dev_err(tdc2dev(tdc), "DMA direction is not supported\n"); + return -EINVAL; + } + + if (!burst_size || burst_size > ADMA_CH_CONFIG_BURST_16) + burst_size = ADMA_CH_CONFIG_BURST_16; + + ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) | + ADMA_CH_CTRL_MODE_CONTINUOUS | + ADMA_CH_CTRL_FLOWCTRL_EN; + ch_regs->config |= ADMA_CH_CONFIG_BURST_SIZE(burst_size); + ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1); + ch_regs->fifo_ctrl = ADMA_CH_FIFO_CTRL_DEFAULT; + ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK; + + return tegra_adma_request_alloc(tdc, direction); +} + +static struct dma_async_tx_descriptor *tegra_adma_prep_dma_cyclic( + struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + struct tegra_adma_desc *desc = NULL; + + if (!buf_len || !period_len || period_len > ADMA_CH_TC_COUNT_MASK) { + dev_err(tdc2dev(tdc), "invalid buffer/period len\n"); + return NULL; + } + + if (buf_len % period_len) { + dev_err(tdc2dev(tdc), "buf_len not a multiple of period_len\n"); + return NULL; + } + + if (!IS_ALIGNED(buf_addr, 4)) { + dev_err(tdc2dev(tdc), "invalid buffer alignment\n"); + return NULL; + } + + desc = kzalloc(sizeof(*desc), GFP_NOWAIT); + if (!desc) + return NULL; + + desc->buf_len = buf_len; + desc->period_len = period_len; + desc->num_periods = buf_len / period_len; + + if (tegra_adma_set_xfer_params(tdc, desc, buf_addr, direction)) { + kfree(desc); + return NULL; + } + + return vchan_tx_prep(&tdc->vc, &desc->vd, flags); +} + +static int tegra_adma_alloc_chan_resources(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + int ret; + + ret = request_irq(tdc->irq, tegra_adma_isr, 0, dma_chan_name(dc), tdc); + if (ret) { + dev_err(tdc2dev(tdc), "failed to get interrupt for %s\n", + dma_chan_name(dc)); + return ret; + } + + ret = pm_runtime_get_sync(tdc2dev(tdc)); + if (ret < 0) { + free_irq(tdc->irq, tdc); + return ret; + } + + dma_cookie_init(&tdc->vc.chan); + + return 0; +} + +static void tegra_adma_free_chan_resources(struct dma_chan *dc) +{ + struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc); + + tegra_adma_terminate_all(dc); + vchan_free_chan_resources(&tdc->vc); + tasklet_kill(&tdc->vc.task); + free_irq(tdc->irq, tdc); + pm_runtime_put(tdc2dev(tdc)); + + tdc->sreq_index = 0; + tdc->sreq_dir = DMA_TRANS_NONE; +} + +static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct tegra_adma *tdma = ofdma->of_dma_data; + struct tegra_adma_chan *tdc; + struct dma_chan *chan; + unsigned int sreq_index; + + if (dma_spec->args_count != 1) + return NULL; + + sreq_index = dma_spec->args[0]; + + if (sreq_index == 0) { + dev_err(tdma->dev, "DMA request must not be 0\n"); + return NULL; + } + + chan = dma_get_any_slave_channel(&tdma->dma_dev); + if (!chan) + return NULL; + + tdc = to_tegra_adma_chan(chan); + tdc->sreq_index = sreq_index; + + return chan; +} + +static int tegra_adma_runtime_suspend(struct device *dev) +{ + struct tegra_adma *tdma = dev_get_drvdata(dev); + + tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + + return pm_clk_suspend(dev); +} + +static int tegra_adma_runtime_resume(struct device *dev) +{ + struct tegra_adma *tdma = dev_get_drvdata(dev); + int ret; + + ret = pm_clk_resume(dev); + if (ret) + return ret; + + tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + + return 0; +} + +static const struct tegra_adma_chip_data tegra210_chip_data = { + .nr_channels = 22, +}; + +static const struct of_device_id tegra_adma_of_match[] = { + { .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_adma_of_match); + +static int tegra_adma_probe(struct platform_device *pdev) +{ + const struct tegra_adma_chip_data *cdata; + struct tegra_adma *tdma; + struct resource *res; + struct clk *clk; + int ret, i; + + cdata = of_device_get_match_data(&pdev->dev); + if (!cdata) { + dev_err(&pdev->dev, "device match data not found\n"); + return -ENODEV; + } + + tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels * + sizeof(struct tegra_adma_chan), GFP_KERNEL); + if (!tdma) + return -ENOMEM; + + tdma->dev = &pdev->dev; + tdma->nr_channels = cdata->nr_channels; + platform_set_drvdata(pdev, tdma); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tdma->base_addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(tdma->base_addr)) + return PTR_ERR(tdma->base_addr); + + ret = pm_clk_create(&pdev->dev); + if (ret) + return ret; + + clk = clk_get(&pdev->dev, "d_audio"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "ADMA clock not found\n"); + ret = PTR_ERR(clk); + goto clk_destroy; + } + + ret = pm_clk_add_clk(&pdev->dev, clk); + if (ret) { + clk_put(clk); + goto clk_destroy; + } + + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + goto rpm_disable; + + ret = tegra_adma_init(tdma); + if (ret) + goto rpm_put; + + INIT_LIST_HEAD(&tdma->dma_dev.channels); + for (i = 0; i < tdma->nr_channels; i++) { + struct tegra_adma_chan *tdc = &tdma->channels[i]; + + tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i); + + tdc->irq = of_irq_get(pdev->dev.of_node, i); + if (tdc->irq < 0) { + ret = tdc->irq; + goto irq_dispose; + } + + vchan_init(&tdc->vc, &tdma->dma_dev); + tdc->vc.desc_free = tegra_adma_desc_free; + tdc->tdma = tdma; + } + + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); + dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + + tdma->dma_dev.dev = &pdev->dev; + tdma->dma_dev.device_alloc_chan_resources = + tegra_adma_alloc_chan_resources; + tdma->dma_dev.device_free_chan_resources = + tegra_adma_free_chan_resources; + tdma->dma_dev.device_issue_pending = tegra_adma_issue_pending; + tdma->dma_dev.device_prep_dma_cyclic = tegra_adma_prep_dma_cyclic; + tdma->dma_dev.device_config = tegra_adma_slave_config; + tdma->dma_dev.device_tx_status = tegra_adma_tx_status; + tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all; + tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + + ret = dma_async_device_register(&tdma->dma_dev); + if (ret < 0) { + dev_err(&pdev->dev, "ADMA registration failed: %d\n", ret); + goto irq_dispose; + } + + ret = of_dma_controller_register(pdev->dev.of_node, + tegra_dma_of_xlate, tdma); + if (ret < 0) { + dev_err(&pdev->dev, "ADMA OF registration failed %d\n", ret); + goto dma_remove; + } + + pm_runtime_put(&pdev->dev); + + dev_info(&pdev->dev, "Tegra210 ADMA driver registered %d channels\n", + tdma->nr_channels); + + return 0; + +dma_remove: + dma_async_device_unregister(&tdma->dma_dev); +irq_dispose: + while (--i >= 0) + irq_dispose_mapping(tdma->channels[i].irq); +rpm_put: + pm_runtime_put_sync(&pdev->dev); +rpm_disable: + pm_runtime_disable(&pdev->dev); +clk_destroy: + pm_clk_destroy(&pdev->dev); + + return ret; +} + +static int tegra_adma_remove(struct platform_device *pdev) +{ + struct tegra_adma *tdma = platform_get_drvdata(pdev); + int i; + + dma_async_device_unregister(&tdma->dma_dev); + + for (i = 0; i < tdma->nr_channels; ++i) + irq_dispose_mapping(tdma->channels[i].irq); + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + pm_clk_destroy(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int tegra_adma_pm_suspend(struct device *dev) +{ + return pm_runtime_suspended(dev) == false; +} +#endif + +static const struct dev_pm_ops tegra_adma_dev_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_adma_runtime_suspend, + tegra_adma_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_adma_pm_suspend, NULL) +}; + +static struct platform_driver tegra_admac_driver = { + .driver = { + .name = "tegra-adma", + .pm = &tegra_adma_dev_pm_ops, + .of_match_table = tegra_adma_of_match, + }, + .probe = tegra_adma_probe, + .remove = tegra_adma_remove, +}; + +module_platform_driver(tegra_admac_driver); + +MODULE_ALIAS("platform:tegra210-adma"); +MODULE_DESCRIPTION("NVIDIA Tegra ADMA driver"); +MODULE_AUTHOR("Dara Ramesh <dramesh@nvidia.com>"); +MODULE_AUTHOR("Jon Hunter <jonathanh@nvidia.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c index ef67f278e..df9118540 100644 --- a/drivers/dma/xilinx/xilinx_vdma.c +++ b/drivers/dma/xilinx/xilinx_vdma.c @@ -16,6 +16,15 @@ * video device (S2MM). Initialization, status, interrupt and management * registers are accessed through an AXI4-Lite slave interface. * + * The AXI Direct Memory Access (AXI DMA) core is a soft Xilinx IP core that + * provides high-bandwidth one dimensional direct memory access between memory + * and AXI4-Stream target peripherals. It supports one receive and one + * transmit channel, both of them optional at synthesis time. + * + * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory + * Access (DMA) between a memory-mapped source address and a memory-mapped + * destination address. + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or @@ -35,116 +44,138 @@ #include <linux/of_platform.h> #include <linux/of_irq.h> #include <linux/slab.h> +#include <linux/clk.h> #include "../dmaengine.h" /* Register/Descriptor Offsets */ -#define XILINX_VDMA_MM2S_CTRL_OFFSET 0x0000 -#define XILINX_VDMA_S2MM_CTRL_OFFSET 0x0030 +#define XILINX_DMA_MM2S_CTRL_OFFSET 0x0000 +#define XILINX_DMA_S2MM_CTRL_OFFSET 0x0030 #define XILINX_VDMA_MM2S_DESC_OFFSET 0x0050 #define XILINX_VDMA_S2MM_DESC_OFFSET 0x00a0 /* Control Registers */ -#define XILINX_VDMA_REG_DMACR 0x0000 -#define XILINX_VDMA_DMACR_DELAY_MAX 0xff -#define XILINX_VDMA_DMACR_DELAY_SHIFT 24 -#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX 0xff -#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT 16 -#define XILINX_VDMA_DMACR_ERR_IRQ BIT(14) -#define XILINX_VDMA_DMACR_DLY_CNT_IRQ BIT(13) -#define XILINX_VDMA_DMACR_FRM_CNT_IRQ BIT(12) -#define XILINX_VDMA_DMACR_MASTER_SHIFT 8 -#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT 5 -#define XILINX_VDMA_DMACR_FRAMECNT_EN BIT(4) -#define XILINX_VDMA_DMACR_GENLOCK_EN BIT(3) -#define XILINX_VDMA_DMACR_RESET BIT(2) -#define XILINX_VDMA_DMACR_CIRC_EN BIT(1) -#define XILINX_VDMA_DMACR_RUNSTOP BIT(0) -#define XILINX_VDMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) - -#define XILINX_VDMA_REG_DMASR 0x0004 -#define XILINX_VDMA_DMASR_EOL_LATE_ERR BIT(15) -#define XILINX_VDMA_DMASR_ERR_IRQ BIT(14) -#define XILINX_VDMA_DMASR_DLY_CNT_IRQ BIT(13) -#define XILINX_VDMA_DMASR_FRM_CNT_IRQ BIT(12) -#define XILINX_VDMA_DMASR_SOF_LATE_ERR BIT(11) -#define XILINX_VDMA_DMASR_SG_DEC_ERR BIT(10) -#define XILINX_VDMA_DMASR_SG_SLV_ERR BIT(9) -#define XILINX_VDMA_DMASR_EOF_EARLY_ERR BIT(8) -#define XILINX_VDMA_DMASR_SOF_EARLY_ERR BIT(7) -#define XILINX_VDMA_DMASR_DMA_DEC_ERR BIT(6) -#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR BIT(5) -#define XILINX_VDMA_DMASR_DMA_INT_ERR BIT(4) -#define XILINX_VDMA_DMASR_IDLE BIT(1) -#define XILINX_VDMA_DMASR_HALTED BIT(0) -#define XILINX_VDMA_DMASR_DELAY_MASK GENMASK(31, 24) -#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16) - -#define XILINX_VDMA_REG_CURDESC 0x0008 -#define XILINX_VDMA_REG_TAILDESC 0x0010 -#define XILINX_VDMA_REG_REG_INDEX 0x0014 -#define XILINX_VDMA_REG_FRMSTORE 0x0018 -#define XILINX_VDMA_REG_THRESHOLD 0x001c -#define XILINX_VDMA_REG_FRMPTR_STS 0x0024 -#define XILINX_VDMA_REG_PARK_PTR 0x0028 -#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT 8 -#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT 0 -#define XILINX_VDMA_REG_VDMA_VERSION 0x002c +#define XILINX_DMA_REG_DMACR 0x0000 +#define XILINX_DMA_DMACR_DELAY_MAX 0xff +#define XILINX_DMA_DMACR_DELAY_SHIFT 24 +#define XILINX_DMA_DMACR_FRAME_COUNT_MAX 0xff +#define XILINX_DMA_DMACR_FRAME_COUNT_SHIFT 16 +#define XILINX_DMA_DMACR_ERR_IRQ BIT(14) +#define XILINX_DMA_DMACR_DLY_CNT_IRQ BIT(13) +#define XILINX_DMA_DMACR_FRM_CNT_IRQ BIT(12) +#define XILINX_DMA_DMACR_MASTER_SHIFT 8 +#define XILINX_DMA_DMACR_FSYNCSRC_SHIFT 5 +#define XILINX_DMA_DMACR_FRAMECNT_EN BIT(4) +#define XILINX_DMA_DMACR_GENLOCK_EN BIT(3) +#define XILINX_DMA_DMACR_RESET BIT(2) +#define XILINX_DMA_DMACR_CIRC_EN BIT(1) +#define XILINX_DMA_DMACR_RUNSTOP BIT(0) +#define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) + +#define XILINX_DMA_REG_DMASR 0x0004 +#define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15) +#define XILINX_DMA_DMASR_ERR_IRQ BIT(14) +#define XILINX_DMA_DMASR_DLY_CNT_IRQ BIT(13) +#define XILINX_DMA_DMASR_FRM_CNT_IRQ BIT(12) +#define XILINX_DMA_DMASR_SOF_LATE_ERR BIT(11) +#define XILINX_DMA_DMASR_SG_DEC_ERR BIT(10) +#define XILINX_DMA_DMASR_SG_SLV_ERR BIT(9) +#define XILINX_DMA_DMASR_EOF_EARLY_ERR BIT(8) +#define XILINX_DMA_DMASR_SOF_EARLY_ERR BIT(7) +#define XILINX_DMA_DMASR_DMA_DEC_ERR BIT(6) +#define XILINX_DMA_DMASR_DMA_SLAVE_ERR BIT(5) +#define XILINX_DMA_DMASR_DMA_INT_ERR BIT(4) +#define XILINX_DMA_DMASR_IDLE BIT(1) +#define XILINX_DMA_DMASR_HALTED BIT(0) +#define XILINX_DMA_DMASR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16) + +#define XILINX_DMA_REG_CURDESC 0x0008 +#define XILINX_DMA_REG_TAILDESC 0x0010 +#define XILINX_DMA_REG_REG_INDEX 0x0014 +#define XILINX_DMA_REG_FRMSTORE 0x0018 +#define XILINX_DMA_REG_THRESHOLD 0x001c +#define XILINX_DMA_REG_FRMPTR_STS 0x0024 +#define XILINX_DMA_REG_PARK_PTR 0x0028 +#define XILINX_DMA_PARK_PTR_WR_REF_SHIFT 8 +#define XILINX_DMA_PARK_PTR_RD_REF_SHIFT 0 +#define XILINX_DMA_REG_VDMA_VERSION 0x002c /* Register Direct Mode Registers */ -#define XILINX_VDMA_REG_VSIZE 0x0000 -#define XILINX_VDMA_REG_HSIZE 0x0004 +#define XILINX_DMA_REG_VSIZE 0x0000 +#define XILINX_DMA_REG_HSIZE 0x0004 -#define XILINX_VDMA_REG_FRMDLY_STRIDE 0x0008 -#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24 -#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT 0 +#define XILINX_DMA_REG_FRMDLY_STRIDE 0x0008 +#define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24 +#define XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT 0 #define XILINX_VDMA_REG_START_ADDRESS(n) (0x000c + 4 * (n)) +#define XILINX_VDMA_REG_START_ADDRESS_64(n) (0x000c + 8 * (n)) /* HW specific definitions */ -#define XILINX_VDMA_MAX_CHANS_PER_DEVICE 0x2 - -#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK \ - (XILINX_VDMA_DMASR_FRM_CNT_IRQ | \ - XILINX_VDMA_DMASR_DLY_CNT_IRQ | \ - XILINX_VDMA_DMASR_ERR_IRQ) - -#define XILINX_VDMA_DMASR_ALL_ERR_MASK \ - (XILINX_VDMA_DMASR_EOL_LATE_ERR | \ - XILINX_VDMA_DMASR_SOF_LATE_ERR | \ - XILINX_VDMA_DMASR_SG_DEC_ERR | \ - XILINX_VDMA_DMASR_SG_SLV_ERR | \ - XILINX_VDMA_DMASR_EOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_SOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_DMA_DEC_ERR | \ - XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \ - XILINX_VDMA_DMASR_DMA_INT_ERR) +#define XILINX_DMA_MAX_CHANS_PER_DEVICE 0x2 + +#define XILINX_DMA_DMAXR_ALL_IRQ_MASK \ + (XILINX_DMA_DMASR_FRM_CNT_IRQ | \ + XILINX_DMA_DMASR_DLY_CNT_IRQ | \ + XILINX_DMA_DMASR_ERR_IRQ) + +#define XILINX_DMA_DMASR_ALL_ERR_MASK \ + (XILINX_DMA_DMASR_EOL_LATE_ERR | \ + XILINX_DMA_DMASR_SOF_LATE_ERR | \ + XILINX_DMA_DMASR_SG_DEC_ERR | \ + XILINX_DMA_DMASR_SG_SLV_ERR | \ + XILINX_DMA_DMASR_EOF_EARLY_ERR | \ + XILINX_DMA_DMASR_SOF_EARLY_ERR | \ + XILINX_DMA_DMASR_DMA_DEC_ERR | \ + XILINX_DMA_DMASR_DMA_SLAVE_ERR | \ + XILINX_DMA_DMASR_DMA_INT_ERR) /* * Recoverable errors are DMA Internal error, SOF Early, EOF Early * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC * is enabled in the h/w system. */ -#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK \ - (XILINX_VDMA_DMASR_SOF_LATE_ERR | \ - XILINX_VDMA_DMASR_EOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_SOF_EARLY_ERR | \ - XILINX_VDMA_DMASR_DMA_INT_ERR) +#define XILINX_DMA_DMASR_ERR_RECOVER_MASK \ + (XILINX_DMA_DMASR_SOF_LATE_ERR | \ + XILINX_DMA_DMASR_EOF_EARLY_ERR | \ + XILINX_DMA_DMASR_SOF_EARLY_ERR | \ + XILINX_DMA_DMASR_DMA_INT_ERR) /* Axi VDMA Flush on Fsync bits */ -#define XILINX_VDMA_FLUSH_S2MM 3 -#define XILINX_VDMA_FLUSH_MM2S 2 -#define XILINX_VDMA_FLUSH_BOTH 1 +#define XILINX_DMA_FLUSH_S2MM 3 +#define XILINX_DMA_FLUSH_MM2S 2 +#define XILINX_DMA_FLUSH_BOTH 1 /* Delay loop counter to prevent hardware failure */ -#define XILINX_VDMA_LOOP_COUNT 1000000 +#define XILINX_DMA_LOOP_COUNT 1000000 + +/* AXI DMA Specific Registers/Offsets */ +#define XILINX_DMA_REG_SRCDSTADDR 0x18 +#define XILINX_DMA_REG_BTT 0x28 + +/* AXI DMA Specific Masks/Bit fields */ +#define XILINX_DMA_MAX_TRANS_LEN GENMASK(22, 0) +#define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16) +#define XILINX_DMA_CR_COALESCE_SHIFT 16 +#define XILINX_DMA_BD_SOP BIT(27) +#define XILINX_DMA_BD_EOP BIT(26) +#define XILINX_DMA_COALESCE_MAX 255 +#define XILINX_DMA_NUM_APP_WORDS 5 + +/* AXI CDMA Specific Registers/Offsets */ +#define XILINX_CDMA_REG_SRCADDR 0x18 +#define XILINX_CDMA_REG_DSTADDR 0x20 + +/* AXI CDMA Specific Masks */ +#define XILINX_CDMA_CR_SGMODE BIT(3) /** * struct xilinx_vdma_desc_hw - Hardware Descriptor * @next_desc: Next Descriptor Pointer @0x00 * @pad1: Reserved @0x04 * @buf_addr: Buffer address @0x08 - * @pad2: Reserved @0x0C + * @buf_addr_msb: MSB of Buffer address @0x0C * @vsize: Vertical Size @0x10 * @hsize: Horizontal Size @0x14 * @stride: Number of bytes between the first @@ -154,13 +185,59 @@ struct xilinx_vdma_desc_hw { u32 next_desc; u32 pad1; u32 buf_addr; - u32 pad2; + u32 buf_addr_msb; u32 vsize; u32 hsize; u32 stride; } __aligned(64); /** + * struct xilinx_axidma_desc_hw - Hardware Descriptor for AXI DMA + * @next_desc: Next Descriptor Pointer @0x00 + * @pad1: Reserved @0x04 + * @buf_addr: Buffer address @0x08 + * @pad2: Reserved @0x0C + * @pad3: Reserved @0x10 + * @pad4: Reserved @0x14 + * @control: Control field @0x18 + * @status: Status field @0x1C + * @app: APP Fields @0x20 - 0x30 + */ +struct xilinx_axidma_desc_hw { + u32 next_desc; + u32 pad1; + u32 buf_addr; + u32 pad2; + u32 pad3; + u32 pad4; + u32 control; + u32 status; + u32 app[XILINX_DMA_NUM_APP_WORDS]; +} __aligned(64); + +/** + * struct xilinx_cdma_desc_hw - Hardware Descriptor + * @next_desc: Next Descriptor Pointer @0x00 + * @pad1: Reserved @0x04 + * @src_addr: Source address @0x08 + * @pad2: Reserved @0x0C + * @dest_addr: Destination address @0x10 + * @pad3: Reserved @0x14 + * @control: Control field @0x18 + * @status: Status field @0x1C + */ +struct xilinx_cdma_desc_hw { + u32 next_desc; + u32 pad1; + u32 src_addr; + u32 pad2; + u32 dest_addr; + u32 pad3; + u32 control; + u32 status; +} __aligned(64); + +/** * struct xilinx_vdma_tx_segment - Descriptor segment * @hw: Hardware descriptor * @node: Node in the descriptor segments list @@ -173,19 +250,43 @@ struct xilinx_vdma_tx_segment { } __aligned(64); /** - * struct xilinx_vdma_tx_descriptor - Per Transaction structure + * struct xilinx_axidma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_axidma_tx_segment { + struct xilinx_axidma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_cdma_tx_segment - Descriptor segment + * @hw: Hardware descriptor + * @node: Node in the descriptor segments list + * @phys: Physical address of segment + */ +struct xilinx_cdma_tx_segment { + struct xilinx_cdma_desc_hw hw; + struct list_head node; + dma_addr_t phys; +} __aligned(64); + +/** + * struct xilinx_dma_tx_descriptor - Per Transaction structure * @async_tx: Async transaction descriptor * @segments: TX segments list * @node: Node in the channel descriptors list */ -struct xilinx_vdma_tx_descriptor { +struct xilinx_dma_tx_descriptor { struct dma_async_tx_descriptor async_tx; struct list_head segments; struct list_head node; }; /** - * struct xilinx_vdma_chan - Driver specific VDMA channel structure + * struct xilinx_dma_chan - Driver specific DMA channel structure * @xdev: Driver specific device structure * @ctrl_offset: Control registers offset * @desc_offset: TX descriptor registers offset @@ -207,9 +308,14 @@ struct xilinx_vdma_tx_descriptor { * @config: Device configuration info * @flush_on_fsync: Flush on Frame sync * @desc_pendingcount: Descriptor pending count + * @ext_addr: Indicates 64 bit addressing is supported by dma channel + * @desc_submitcount: Descriptor h/w submitted count + * @residue: Residue for AXI DMA + * @seg_v: Statically allocated segments base + * @start_transfer: Differentiate b/w DMA IP's transfer */ -struct xilinx_vdma_chan { - struct xilinx_vdma_device *xdev; +struct xilinx_dma_chan { + struct xilinx_dma_device *xdev; u32 ctrl_offset; u32 desc_offset; spinlock_t lock; @@ -230,73 +336,122 @@ struct xilinx_vdma_chan { struct xilinx_vdma_config config; bool flush_on_fsync; u32 desc_pendingcount; + bool ext_addr; + u32 desc_submitcount; + u32 residue; + struct xilinx_axidma_tx_segment *seg_v; + void (*start_transfer)(struct xilinx_dma_chan *chan); +}; + +struct xilinx_dma_config { + enum xdma_ip_type dmatype; + int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **txs_clk, + struct clk **rx_clk, struct clk **rxs_clk); }; /** - * struct xilinx_vdma_device - VDMA device structure + * struct xilinx_dma_device - DMA device structure * @regs: I/O mapped base address * @dev: Device Structure * @common: DMA device structure - * @chan: Driver specific VDMA channel + * @chan: Driver specific DMA channel * @has_sg: Specifies whether Scatter-Gather is present or not * @flush_on_fsync: Flush on frame sync + * @ext_addr: Indicates 64 bit addressing is supported by dma device + * @pdev: Platform device structure pointer + * @dma_config: DMA config structure + * @axi_clk: DMA Axi4-lite interace clock + * @tx_clk: DMA mm2s clock + * @txs_clk: DMA mm2s stream clock + * @rx_clk: DMA s2mm clock + * @rxs_clk: DMA s2mm stream clock */ -struct xilinx_vdma_device { +struct xilinx_dma_device { void __iomem *regs; struct device *dev; struct dma_device common; - struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE]; + struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE]; bool has_sg; u32 flush_on_fsync; + bool ext_addr; + struct platform_device *pdev; + const struct xilinx_dma_config *dma_config; + struct clk *axi_clk; + struct clk *tx_clk; + struct clk *txs_clk; + struct clk *rx_clk; + struct clk *rxs_clk; }; /* Macros */ #define to_xilinx_chan(chan) \ - container_of(chan, struct xilinx_vdma_chan, common) -#define to_vdma_tx_descriptor(tx) \ - container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx) -#define xilinx_vdma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ + container_of(chan, struct xilinx_dma_chan, common) +#define to_dma_tx_descriptor(tx) \ + container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) +#define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ cond, delay_us, timeout_us) /* IO accessors */ -static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg) +static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) { return ioread32(chan->xdev->regs + reg); } -static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value) +static inline void dma_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) { iowrite32(value, chan->xdev->regs + reg); } -static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg, +static inline void vdma_desc_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) { - vdma_write(chan, chan->desc_offset + reg, value); + dma_write(chan, chan->desc_offset + reg, value); } -static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg) +static inline u32 dma_ctrl_read(struct xilinx_dma_chan *chan, u32 reg) { - return vdma_read(chan, chan->ctrl_offset + reg); + return dma_read(chan, chan->ctrl_offset + reg); } -static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg, +static inline void dma_ctrl_write(struct xilinx_dma_chan *chan, u32 reg, u32 value) { - vdma_write(chan, chan->ctrl_offset + reg, value); + dma_write(chan, chan->ctrl_offset + reg, value); } -static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg, +static inline void dma_ctrl_clr(struct xilinx_dma_chan *chan, u32 reg, u32 clr) { - vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr); + dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) & ~clr); } -static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg, +static inline void dma_ctrl_set(struct xilinx_dma_chan *chan, u32 reg, u32 set) { - vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set); + dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) | set); +} + +/** + * vdma_desc_write_64 - 64-bit descriptor write + * @chan: Driver specific VDMA channel + * @reg: Register to write + * @value_lsb: lower address of the descriptor. + * @value_msb: upper address of the descriptor. + * + * Since vdma driver is trying to write to a register offset which is not a + * multiple of 64 bits(ex : 0x5c), we are writing as two separate 32 bits + * instead of a single 64 bit register write. + */ +static inline void vdma_desc_write_64(struct xilinx_dma_chan *chan, u32 reg, + u32 value_lsb, u32 value_msb) +{ + /* Write the lsb 32 bits*/ + writel(value_lsb, chan->xdev->regs + chan->desc_offset + reg); + + /* Write the msb 32 bits */ + writel(value_msb, chan->xdev->regs + chan->desc_offset + reg + 4); } /* ----------------------------------------------------------------------------- @@ -305,16 +460,59 @@ static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg, /** * xilinx_vdma_alloc_tx_segment - Allocate transaction segment - * @chan: Driver specific VDMA channel + * @chan: Driver specific DMA channel * * Return: The allocated segment on success and NULL on failure. */ static struct xilinx_vdma_tx_segment * -xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan) +xilinx_vdma_alloc_tx_segment(struct xilinx_dma_chan *chan) { struct xilinx_vdma_tx_segment *segment; dma_addr_t phys; + segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + segment->phys = phys; + + return segment; +} + +/** + * xilinx_cdma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_cdma_tx_segment * +xilinx_cdma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_cdma_tx_segment *segment; + dma_addr_t phys; + + segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys); + if (!segment) + return NULL; + + memset(segment, 0, sizeof(*segment)); + segment->phys = phys; + + return segment; +} + +/** + * xilinx_axidma_alloc_tx_segment - Allocate transaction segment + * @chan: Driver specific DMA channel + * + * Return: The allocated segment on success and NULL on failure. + */ +static struct xilinx_axidma_tx_segment * +xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan) +{ + struct xilinx_axidma_tx_segment *segment; + dma_addr_t phys; + segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys); if (!segment) return NULL; @@ -326,26 +524,48 @@ xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan) } /** + * xilinx_dma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_axidma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** + * xilinx_cdma_free_tx_segment - Free transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment + */ +static void xilinx_cdma_free_tx_segment(struct xilinx_dma_chan *chan, + struct xilinx_cdma_tx_segment *segment) +{ + dma_pool_free(chan->desc_pool, segment, segment->phys); +} + +/** * xilinx_vdma_free_tx_segment - Free transaction segment - * @chan: Driver specific VDMA channel - * @segment: VDMA transaction segment + * @chan: Driver specific DMA channel + * @segment: DMA transaction segment */ -static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan, +static void xilinx_vdma_free_tx_segment(struct xilinx_dma_chan *chan, struct xilinx_vdma_tx_segment *segment) { dma_pool_free(chan->desc_pool, segment, segment->phys); } /** - * xilinx_vdma_tx_descriptor - Allocate transaction descriptor - * @chan: Driver specific VDMA channel + * xilinx_dma_tx_descriptor - Allocate transaction descriptor + * @chan: Driver specific DMA channel * * Return: The allocated descriptor on success and NULL on failure. */ -static struct xilinx_vdma_tx_descriptor * -xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan) +static struct xilinx_dma_tx_descriptor * +xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan) { - struct xilinx_vdma_tx_descriptor *desc; + struct xilinx_dma_tx_descriptor *desc; desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) @@ -357,22 +577,38 @@ xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_free_tx_descriptor - Free transaction descriptor - * @chan: Driver specific VDMA channel - * @desc: VDMA transaction descriptor + * xilinx_dma_free_tx_descriptor - Free transaction descriptor + * @chan: Driver specific DMA channel + * @desc: DMA transaction descriptor */ static void -xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan, - struct xilinx_vdma_tx_descriptor *desc) +xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) { struct xilinx_vdma_tx_segment *segment, *next; + struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next; + struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next; if (!desc) return; - list_for_each_entry_safe(segment, next, &desc->segments, node) { - list_del(&segment->node); - xilinx_vdma_free_tx_segment(chan, segment); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + list_for_each_entry_safe(segment, next, &desc->segments, node) { + list_del(&segment->node); + xilinx_vdma_free_tx_segment(chan, segment); + } + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + list_for_each_entry_safe(cdma_segment, cdma_next, + &desc->segments, node) { + list_del(&cdma_segment->node); + xilinx_cdma_free_tx_segment(chan, cdma_segment); + } + } else { + list_for_each_entry_safe(axidma_segment, axidma_next, + &desc->segments, node) { + list_del(&axidma_segment->node); + xilinx_dma_free_tx_segment(chan, axidma_segment); + } } kfree(desc); @@ -381,60 +617,62 @@ xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan, /* Required functions */ /** - * xilinx_vdma_free_desc_list - Free descriptors list - * @chan: Driver specific VDMA channel + * xilinx_dma_free_desc_list - Free descriptors list + * @chan: Driver specific DMA channel * @list: List to parse and delete the descriptor */ -static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan, +static void xilinx_dma_free_desc_list(struct xilinx_dma_chan *chan, struct list_head *list) { - struct xilinx_vdma_tx_descriptor *desc, *next; + struct xilinx_dma_tx_descriptor *desc, *next; list_for_each_entry_safe(desc, next, list, node) { list_del(&desc->node); - xilinx_vdma_free_tx_descriptor(chan, desc); + xilinx_dma_free_tx_descriptor(chan, desc); } } /** - * xilinx_vdma_free_descriptors - Free channel descriptors - * @chan: Driver specific VDMA channel + * xilinx_dma_free_descriptors - Free channel descriptors + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan) +static void xilinx_dma_free_descriptors(struct xilinx_dma_chan *chan) { unsigned long flags; spin_lock_irqsave(&chan->lock, flags); - xilinx_vdma_free_desc_list(chan, &chan->pending_list); - xilinx_vdma_free_desc_list(chan, &chan->done_list); - xilinx_vdma_free_desc_list(chan, &chan->active_list); + xilinx_dma_free_desc_list(chan, &chan->pending_list); + xilinx_dma_free_desc_list(chan, &chan->done_list); + xilinx_dma_free_desc_list(chan, &chan->active_list); spin_unlock_irqrestore(&chan->lock, flags); } /** - * xilinx_vdma_free_chan_resources - Free channel resources + * xilinx_dma_free_chan_resources - Free channel resources * @dchan: DMA channel */ -static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan) +static void xilinx_dma_free_chan_resources(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); dev_dbg(chan->dev, "Free all channel resources.\n"); - xilinx_vdma_free_descriptors(chan); + xilinx_dma_free_descriptors(chan); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + xilinx_dma_free_tx_segment(chan, chan->seg_v); dma_pool_destroy(chan->desc_pool); chan->desc_pool = NULL; } /** - * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors - * @chan: Driver specific VDMA channel + * xilinx_dma_chan_desc_cleanup - Clean channel descriptors + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan) +static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) { - struct xilinx_vdma_tx_descriptor *desc, *next; + struct xilinx_dma_tx_descriptor *desc, *next; unsigned long flags; spin_lock_irqsave(&chan->lock, flags); @@ -457,32 +695,32 @@ static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan) /* Run any dependencies, then free the descriptor */ dma_run_dependencies(&desc->async_tx); - xilinx_vdma_free_tx_descriptor(chan, desc); + xilinx_dma_free_tx_descriptor(chan, desc); } spin_unlock_irqrestore(&chan->lock, flags); } /** - * xilinx_vdma_do_tasklet - Schedule completion tasklet - * @data: Pointer to the Xilinx VDMA channel structure + * xilinx_dma_do_tasklet - Schedule completion tasklet + * @data: Pointer to the Xilinx DMA channel structure */ -static void xilinx_vdma_do_tasklet(unsigned long data) +static void xilinx_dma_do_tasklet(unsigned long data) { - struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data; + struct xilinx_dma_chan *chan = (struct xilinx_dma_chan *)data; - xilinx_vdma_chan_desc_cleanup(chan); + xilinx_dma_chan_desc_cleanup(chan); } /** - * xilinx_vdma_alloc_chan_resources - Allocate channel resources + * xilinx_dma_alloc_chan_resources - Allocate channel resources * @dchan: DMA channel * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) +static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); /* Has this channel already been allocated? */ if (chan->desc_pool) @@ -492,10 +730,26 @@ static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) * We need the descriptor to be aligned to 64bytes * for meeting Xilinx VDMA specification requirement. */ - chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", - chan->dev, - sizeof(struct xilinx_vdma_tx_segment), - __alignof__(struct xilinx_vdma_tx_segment), 0); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + chan->desc_pool = dma_pool_create("xilinx_dma_desc_pool", + chan->dev, + sizeof(struct xilinx_axidma_tx_segment), + __alignof__(struct xilinx_axidma_tx_segment), + 0); + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool", + chan->dev, + sizeof(struct xilinx_cdma_tx_segment), + __alignof__(struct xilinx_cdma_tx_segment), + 0); + } else { + chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", + chan->dev, + sizeof(struct xilinx_vdma_tx_segment), + __alignof__(struct xilinx_vdma_tx_segment), + 0); + } + if (!chan->desc_pool) { dev_err(chan->dev, "unable to allocate channel %d descriptor pool\n", @@ -503,110 +757,160 @@ static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan) return -ENOMEM; } + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + /* + * For AXI DMA case after submitting a pending_list, keep + * an extra segment allocated so that the "next descriptor" + * pointer on the tail descriptor always points to a + * valid descriptor, even when paused after reaching taildesc. + * This way, it is possible to issue additional + * transfers without halting and restarting the channel. + */ + chan->seg_v = xilinx_axidma_alloc_tx_segment(chan); + dma_cookie_init(dchan); + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + /* For AXI DMA resetting once channel will reset the + * other channel as well so enable the interrupts here. + */ + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); + } + + if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg) + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_CDMA_CR_SGMODE); + return 0; } /** - * xilinx_vdma_tx_status - Get VDMA transaction status + * xilinx_dma_tx_status - Get DMA transaction status * @dchan: DMA channel * @cookie: Transaction identifier * @txstate: Transaction state * * Return: DMA transaction status */ -static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan, +static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, struct dma_tx_state *txstate) { - return dma_cookie_status(dchan, cookie, txstate); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_axidma_tx_segment *segment; + struct xilinx_axidma_desc_hw *hw; + enum dma_status ret; + unsigned long flags; + u32 residue = 0; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + spin_lock_irqsave(&chan->lock, flags); + + desc = list_last_entry(&chan->active_list, + struct xilinx_dma_tx_descriptor, node); + if (chan->has_sg) { + list_for_each_entry(segment, &desc->segments, node) { + hw = &segment->hw; + residue += (hw->control - hw->status) & + XILINX_DMA_MAX_TRANS_LEN; + } + } + spin_unlock_irqrestore(&chan->lock, flags); + + chan->residue = residue; + dma_set_residue(txstate, chan->residue); + } + + return ret; } /** - * xilinx_vdma_is_running - Check if VDMA channel is running - * @chan: Driver specific VDMA channel + * xilinx_dma_is_running - Check if DMA channel is running + * @chan: Driver specific DMA channel * * Return: '1' if running, '0' if not. */ -static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan) +static bool xilinx_dma_is_running(struct xilinx_dma_chan *chan) { - return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & - XILINX_VDMA_DMASR_HALTED) && - (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) & - XILINX_VDMA_DMACR_RUNSTOP); + return !(dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_HALTED) && + (dma_ctrl_read(chan, XILINX_DMA_REG_DMACR) & + XILINX_DMA_DMACR_RUNSTOP); } /** - * xilinx_vdma_is_idle - Check if VDMA channel is idle - * @chan: Driver specific VDMA channel + * xilinx_dma_is_idle - Check if DMA channel is idle + * @chan: Driver specific DMA channel * * Return: '1' if idle, '0' if not. */ -static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan) +static bool xilinx_dma_is_idle(struct xilinx_dma_chan *chan) { - return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) & - XILINX_VDMA_DMASR_IDLE; + return dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) & + XILINX_DMA_DMASR_IDLE; } /** - * xilinx_vdma_halt - Halt VDMA channel - * @chan: Driver specific VDMA channel + * xilinx_dma_halt - Halt DMA channel + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan) +static void xilinx_dma_halt(struct xilinx_dma_chan *chan) { int err; u32 val; - vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP); + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP); /* Wait for the hardware to halt */ - err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMASR, val, - (val & XILINX_VDMA_DMASR_HALTED), 0, - XILINX_VDMA_LOOP_COUNT); + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + (val & XILINX_DMA_DMASR_HALTED), 0, + XILINX_DMA_LOOP_COUNT); if (err) { dev_err(chan->dev, "Cannot stop channel %p: %x\n", - chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); chan->err = true; } - - return; } /** - * xilinx_vdma_start - Start VDMA channel - * @chan: Driver specific VDMA channel + * xilinx_dma_start - Start DMA channel + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_start(struct xilinx_vdma_chan *chan) +static void xilinx_dma_start(struct xilinx_dma_chan *chan) { int err; u32 val; - vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP); + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP); /* Wait for the hardware to start */ - err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMASR, val, - !(val & XILINX_VDMA_DMASR_HALTED), 0, - XILINX_VDMA_LOOP_COUNT); + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val, + !(val & XILINX_DMA_DMASR_HALTED), 0, + XILINX_DMA_LOOP_COUNT); if (err) { dev_err(chan->dev, "Cannot start channel %p: %x\n", - chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); chan->err = true; } - - return; } /** * xilinx_vdma_start_transfer - Starts VDMA transfer * @chan: Driver specific channel struct pointer */ -static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) +static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan) { struct xilinx_vdma_config *config = &chan->config; - struct xilinx_vdma_tx_descriptor *desc, *tail_desc; + struct xilinx_dma_tx_descriptor *desc, *tail_desc; u32 reg; struct xilinx_vdma_tx_segment *tail_segment; @@ -618,16 +922,16 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) return; desc = list_first_entry(&chan->pending_list, - struct xilinx_vdma_tx_descriptor, node); + struct xilinx_dma_tx_descriptor, node); tail_desc = list_last_entry(&chan->pending_list, - struct xilinx_vdma_tx_descriptor, node); + struct xilinx_dma_tx_descriptor, node); tail_segment = list_last_entry(&tail_desc->segments, struct xilinx_vdma_tx_segment, node); /* If it is SG mode and hardware is busy, cannot submit */ - if (chan->has_sg && xilinx_vdma_is_running(chan) && - !xilinx_vdma_is_idle(chan)) { + if (chan->has_sg && xilinx_dma_is_running(chan) && + !xilinx_dma_is_idle(chan)) { dev_dbg(chan->dev, "DMA controller still busy\n"); return; } @@ -637,19 +941,19 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) * done, start new transfers */ if (chan->has_sg) - vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC, + dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC, desc->async_tx.phys); /* Configure the hardware using info in the config structure */ - reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR); + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); if (config->frm_cnt_en) - reg |= XILINX_VDMA_DMACR_FRAMECNT_EN; + reg |= XILINX_DMA_DMACR_FRAMECNT_EN; else - reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN; + reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN; /* Configure channel to allow number frame buffers */ - vdma_ctrl_write(chan, XILINX_VDMA_REG_FRMSTORE, + dma_ctrl_write(chan, XILINX_DMA_REG_FRMSTORE, chan->desc_pendingcount); /* @@ -657,45 +961,53 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) * In direct register mode, if not parking, enable circular mode */ if (chan->has_sg || !config->park) - reg |= XILINX_VDMA_DMACR_CIRC_EN; + reg |= XILINX_DMA_DMACR_CIRC_EN; if (config->park) - reg &= ~XILINX_VDMA_DMACR_CIRC_EN; + reg &= ~XILINX_DMA_DMACR_CIRC_EN; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg); + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); if (config->park && (config->park_frm >= 0) && (config->park_frm < chan->num_frms)) { if (chan->direction == DMA_MEM_TO_DEV) - vdma_write(chan, XILINX_VDMA_REG_PARK_PTR, + dma_write(chan, XILINX_DMA_REG_PARK_PTR, config->park_frm << - XILINX_VDMA_PARK_PTR_RD_REF_SHIFT); + XILINX_DMA_PARK_PTR_RD_REF_SHIFT); else - vdma_write(chan, XILINX_VDMA_REG_PARK_PTR, + dma_write(chan, XILINX_DMA_REG_PARK_PTR, config->park_frm << - XILINX_VDMA_PARK_PTR_WR_REF_SHIFT); + XILINX_DMA_PARK_PTR_WR_REF_SHIFT); } /* Start the hardware */ - xilinx_vdma_start(chan); + xilinx_dma_start(chan); if (chan->err) return; /* Start the transfer */ if (chan->has_sg) { - vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC, + dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC, tail_segment->phys); } else { struct xilinx_vdma_tx_segment *segment, *last = NULL; int i = 0; - list_for_each_entry(desc, &chan->pending_list, node) { - segment = list_first_entry(&desc->segments, - struct xilinx_vdma_tx_segment, node); - vdma_desc_write(chan, + if (chan->desc_submitcount < chan->num_frms) + i = chan->desc_submitcount; + + list_for_each_entry(segment, &desc->segments, node) { + if (chan->ext_addr) + vdma_desc_write_64(chan, + XILINX_VDMA_REG_START_ADDRESS_64(i++), + segment->hw.buf_addr, + segment->hw.buf_addr_msb); + else + vdma_desc_write(chan, XILINX_VDMA_REG_START_ADDRESS(i++), segment->hw.buf_addr); + last = segment; } @@ -703,10 +1015,164 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) return; /* HW expects these parameters to be same for one transaction */ - vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize); - vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE, + vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize); + vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE, last->hw.stride); - vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize); + vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize); + } + + if (!chan->has_sg) { + list_del(&desc->node); + list_add_tail(&desc->node, &chan->active_list); + chan->desc_submitcount++; + chan->desc_pendingcount--; + if (chan->desc_submitcount == chan->num_frms) + chan->desc_submitcount = 0; + } else { + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; + } +} + +/** + * xilinx_cdma_start_transfer - Starts cdma transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_cdma_tx_segment *tail_segment; + u32 ctrl_reg = dma_read(chan, XILINX_DMA_REG_DMACR); + + if (chan->err) + return; + + if (list_empty(&chan->pending_list)) + return; + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_cdma_tx_segment, node); + + if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) { + ctrl_reg &= ~XILINX_DMA_CR_COALESCE_MAX; + ctrl_reg |= chan->desc_pendingcount << + XILINX_DMA_CR_COALESCE_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, ctrl_reg); + } + + if (chan->has_sg) { + dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC, + head_desc->async_tx.phys); + + /* Update tail ptr register which will start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC, + tail_segment->phys); + } else { + /* In simple mode */ + struct xilinx_cdma_tx_segment *segment; + struct xilinx_cdma_desc_hw *hw; + + segment = list_first_entry(&head_desc->segments, + struct xilinx_cdma_tx_segment, + node); + + hw = &segment->hw; + + dma_ctrl_write(chan, XILINX_CDMA_REG_SRCADDR, hw->src_addr); + dma_ctrl_write(chan, XILINX_CDMA_REG_DSTADDR, hw->dest_addr); + + /* Start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_BTT, + hw->control & XILINX_DMA_MAX_TRANS_LEN); + } + + list_splice_tail_init(&chan->pending_list, &chan->active_list); + chan->desc_pendingcount = 0; +} + +/** + * xilinx_dma_start_transfer - Starts DMA transfer + * @chan: Driver specific channel struct pointer + */ +static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan) +{ + struct xilinx_dma_tx_descriptor *head_desc, *tail_desc; + struct xilinx_axidma_tx_segment *tail_segment, *old_head, *new_head; + u32 reg; + + if (chan->err) + return; + + if (list_empty(&chan->pending_list)) + return; + + /* If it is SG mode and hardware is busy, cannot submit */ + if (chan->has_sg && xilinx_dma_is_running(chan) && + !xilinx_dma_is_idle(chan)) { + dev_dbg(chan->dev, "DMA controller still busy\n"); + return; + } + + head_desc = list_first_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_desc = list_last_entry(&chan->pending_list, + struct xilinx_dma_tx_descriptor, node); + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_axidma_tx_segment, node); + + old_head = list_first_entry(&head_desc->segments, + struct xilinx_axidma_tx_segment, node); + new_head = chan->seg_v; + /* Copy Buffer Descriptor fields. */ + new_head->hw = old_head->hw; + + /* Swap and save new reserve */ + list_replace_init(&old_head->node, &new_head->node); + chan->seg_v = old_head; + + tail_segment->hw.next_desc = chan->seg_v->phys; + head_desc->async_tx.phys = new_head->phys; + + reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); + + if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) { + reg &= ~XILINX_DMA_CR_COALESCE_MAX; + reg |= chan->desc_pendingcount << + XILINX_DMA_CR_COALESCE_SHIFT; + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg); + } + + if (chan->has_sg) + dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC, + head_desc->async_tx.phys); + + xilinx_dma_start(chan); + + if (chan->err) + return; + + /* Start the transfer */ + if (chan->has_sg) { + dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC, + tail_segment->phys); + } else { + struct xilinx_axidma_tx_segment *segment; + struct xilinx_axidma_desc_hw *hw; + + segment = list_first_entry(&head_desc->segments, + struct xilinx_axidma_tx_segment, + node); + hw = &segment->hw; + + dma_ctrl_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr); + + /* Start the transfer */ + dma_ctrl_write(chan, XILINX_DMA_REG_BTT, + hw->control & XILINX_DMA_MAX_TRANS_LEN); } list_splice_tail_init(&chan->pending_list, &chan->active_list); @@ -714,28 +1180,28 @@ static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_issue_pending - Issue pending transactions + * xilinx_dma_issue_pending - Issue pending transactions * @dchan: DMA channel */ -static void xilinx_vdma_issue_pending(struct dma_chan *dchan) +static void xilinx_dma_issue_pending(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; spin_lock_irqsave(&chan->lock, flags); - xilinx_vdma_start_transfer(chan); + chan->start_transfer(chan); spin_unlock_irqrestore(&chan->lock, flags); } /** - * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete + * xilinx_dma_complete_descriptor - Mark the active descriptor as complete * @chan : xilinx DMA channel * * CONTEXT: hardirq */ -static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan) +static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan) { - struct xilinx_vdma_tx_descriptor *desc, *next; + struct xilinx_dma_tx_descriptor *desc, *next; /* This function was invoked with lock held */ if (list_empty(&chan->active_list)) @@ -749,27 +1215,27 @@ static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_reset - Reset VDMA channel - * @chan: Driver specific VDMA channel + * xilinx_dma_reset - Reset DMA channel + * @chan: Driver specific DMA channel * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan) +static int xilinx_dma_reset(struct xilinx_dma_chan *chan) { int err; u32 tmp; - vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET); + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RESET); /* Wait for the hardware to finish reset */ - err = xilinx_vdma_poll_timeout(chan, XILINX_VDMA_REG_DMACR, tmp, - !(tmp & XILINX_VDMA_DMACR_RESET), 0, - XILINX_VDMA_LOOP_COUNT); + err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp, + !(tmp & XILINX_DMA_DMACR_RESET), 0, + XILINX_DMA_LOOP_COUNT); if (err) { dev_err(chan->dev, "reset timeout, cr %x, sr %x\n", - vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR), - vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR)); + dma_ctrl_read(chan, XILINX_DMA_REG_DMACR), + dma_ctrl_read(chan, XILINX_DMA_REG_DMASR)); return -ETIMEDOUT; } @@ -779,48 +1245,48 @@ static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan) } /** - * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts - * @chan: Driver specific VDMA channel + * xilinx_dma_chan_reset - Reset DMA channel and enable interrupts + * @chan: Driver specific DMA channel * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan) +static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan) { int err; /* Reset VDMA */ - err = xilinx_vdma_reset(chan); + err = xilinx_dma_reset(chan); if (err) return err; /* Enable interrupts */ - vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, - XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); return 0; } /** - * xilinx_vdma_irq_handler - VDMA Interrupt handler + * xilinx_dma_irq_handler - DMA Interrupt handler * @irq: IRQ number - * @data: Pointer to the Xilinx VDMA channel structure + * @data: Pointer to the Xilinx DMA channel structure * * Return: IRQ_HANDLED/IRQ_NONE */ -static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) +static irqreturn_t xilinx_dma_irq_handler(int irq, void *data) { - struct xilinx_vdma_chan *chan = data; + struct xilinx_dma_chan *chan = data; u32 status; /* Read the status and ack the interrupts. */ - status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR); - if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK)) + status = dma_ctrl_read(chan, XILINX_DMA_REG_DMASR); + if (!(status & XILINX_DMA_DMAXR_ALL_IRQ_MASK)) return IRQ_NONE; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR, - status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + dma_ctrl_write(chan, XILINX_DMA_REG_DMASR, + status & XILINX_DMA_DMAXR_ALL_IRQ_MASK); - if (status & XILINX_VDMA_DMASR_ERR_IRQ) { + if (status & XILINX_DMA_DMASR_ERR_IRQ) { /* * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the * error is recoverable, ignore it. Otherwise flag the error. @@ -828,22 +1294,23 @@ static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) * Only recoverable errors can be cleared in the DMASR register, * make sure not to write to other error bits to 1. */ - u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR, - errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK); + u32 errors = status & XILINX_DMA_DMASR_ALL_ERR_MASK; + + dma_ctrl_write(chan, XILINX_DMA_REG_DMASR, + errors & XILINX_DMA_DMASR_ERR_RECOVER_MASK); if (!chan->flush_on_fsync || - (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) { + (errors & ~XILINX_DMA_DMASR_ERR_RECOVER_MASK)) { dev_err(chan->dev, "Channel %p has errors %x, cdr %x tdr %x\n", chan, errors, - vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC), - vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC)); + dma_ctrl_read(chan, XILINX_DMA_REG_CURDESC), + dma_ctrl_read(chan, XILINX_DMA_REG_TAILDESC)); chan->err = true; } } - if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) { + if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) { /* * Device takes too long to do the transfer when user requires * responsiveness. @@ -851,10 +1318,10 @@ static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) dev_dbg(chan->dev, "Inter-packet latency too long\n"); } - if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) { + if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) { spin_lock(&chan->lock); - xilinx_vdma_complete_descriptor(chan); - xilinx_vdma_start_transfer(chan); + xilinx_dma_complete_descriptor(chan); + chan->start_transfer(chan); spin_unlock(&chan->lock); } @@ -867,11 +1334,13 @@ static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data) * @chan: Driver specific dma channel * @desc: dma transaction descriptor */ -static void append_desc_queue(struct xilinx_vdma_chan *chan, - struct xilinx_vdma_tx_descriptor *desc) +static void append_desc_queue(struct xilinx_dma_chan *chan, + struct xilinx_dma_tx_descriptor *desc) { struct xilinx_vdma_tx_segment *tail_segment; - struct xilinx_vdma_tx_descriptor *tail_desc; + struct xilinx_dma_tx_descriptor *tail_desc; + struct xilinx_axidma_tx_segment *axidma_tail_segment; + struct xilinx_cdma_tx_segment *cdma_tail_segment; if (list_empty(&chan->pending_list)) goto append; @@ -881,10 +1350,23 @@ static void append_desc_queue(struct xilinx_vdma_chan *chan, * that already exists in memory. */ tail_desc = list_last_entry(&chan->pending_list, - struct xilinx_vdma_tx_descriptor, node); - tail_segment = list_last_entry(&tail_desc->segments, - struct xilinx_vdma_tx_segment, node); - tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + struct xilinx_dma_tx_descriptor, node); + if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_vdma_tx_segment, + node); + tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + cdma_tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_cdma_tx_segment, + node); + cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } else { + axidma_tail_segment = list_last_entry(&tail_desc->segments, + struct xilinx_axidma_tx_segment, + node); + axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys; + } /* * Add the software descriptor and all children to the list @@ -894,22 +1376,23 @@ append: list_add_tail(&desc->node, &chan->pending_list); chan->desc_pendingcount++; - if (unlikely(chan->desc_pendingcount > chan->num_frms)) { + if (chan->has_sg && (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) + && unlikely(chan->desc_pendingcount > chan->num_frms)) { dev_dbg(chan->dev, "desc pendingcount is too high\n"); chan->desc_pendingcount = chan->num_frms; } } /** - * xilinx_vdma_tx_submit - Submit DMA transaction + * xilinx_dma_tx_submit - Submit DMA transaction * @tx: Async transaction descriptor * * Return: cookie value on success and failure value on error */ -static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx) +static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) { - struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx); - struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan); + struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx); + struct xilinx_dma_chan *chan = to_xilinx_chan(tx->chan); dma_cookie_t cookie; unsigned long flags; int err; @@ -919,7 +1402,7 @@ static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx) * If reset fails, need to hard reset the system. * Channel is no longer functional */ - err = xilinx_vdma_chan_reset(chan); + err = xilinx_dma_chan_reset(chan); if (err < 0) return err; } @@ -950,8 +1433,8 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, struct dma_interleaved_template *xt, unsigned long flags) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); - struct xilinx_vdma_tx_descriptor *desc; + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; struct xilinx_vdma_tx_segment *segment, *prev = NULL; struct xilinx_vdma_desc_hw *hw; @@ -965,12 +1448,12 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, return NULL; /* Allocate a transaction descriptor. */ - desc = xilinx_vdma_alloc_tx_descriptor(chan); + desc = xilinx_dma_alloc_tx_descriptor(chan); if (!desc) return NULL; dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); - desc->async_tx.tx_submit = xilinx_vdma_tx_submit; + desc->async_tx.tx_submit = xilinx_dma_tx_submit; async_tx_ack(&desc->async_tx); /* Allocate the link descriptor from DMA pool */ @@ -983,14 +1466,25 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, hw->vsize = xt->numf; hw->hsize = xt->sgl[0].size; hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) << - XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT; + XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT; hw->stride |= chan->config.frm_dly << - XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT; - - if (xt->dir != DMA_MEM_TO_DEV) - hw->buf_addr = xt->dst_start; - else - hw->buf_addr = xt->src_start; + XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT; + + if (xt->dir != DMA_MEM_TO_DEV) { + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(xt->dst_start); + hw->buf_addr_msb = upper_32_bits(xt->dst_start); + } else { + hw->buf_addr = xt->dst_start; + } + } else { + if (chan->ext_addr) { + hw->buf_addr = lower_32_bits(xt->src_start); + hw->buf_addr_msb = upper_32_bits(xt->src_start); + } else { + hw->buf_addr = xt->src_start; + } + } /* Insert the segment into the descriptor segments list. */ list_add_tail(&segment->node, &desc->segments); @@ -1005,29 +1499,194 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan, return &desc->async_tx; error: - xilinx_vdma_free_tx_descriptor(chan, desc); + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_cdma_prep_memcpy - prepare descriptors for a memcpy transaction + * @dchan: DMA channel + * @dma_dst: destination address + * @dma_src: source address + * @len: transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_cdma_tx_segment *segment, *prev; + struct xilinx_cdma_desc_hw *hw; + + if (!len || len > XILINX_DMA_MAX_TRANS_LEN) + return NULL; + + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Allocate the link descriptor from DMA pool */ + segment = xilinx_cdma_alloc_tx_segment(chan); + if (!segment) + goto error; + + hw = &segment->hw; + hw->control = len; + hw->src_addr = dma_src; + hw->dest_addr = dma_dst; + + /* Fill the previous next descriptor with current */ + prev = list_last_entry(&desc->segments, + struct xilinx_cdma_tx_segment, node); + prev->hw.next_desc = segment->phys; + + /* Insert the segment into the descriptor segments list. */ + list_add_tail(&segment->node, &desc->segments); + + prev = segment; + + /* Link the last hardware descriptor with the first. */ + segment = list_first_entry(&desc->segments, + struct xilinx_cdma_tx_segment, node); + desc->async_tx.phys = segment->phys; + prev->hw.next_desc = segment->phys; + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); + return NULL; +} + +/** + * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @dchan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: transfer ack flags + * @context: APP words of the descriptor + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, + void *context) +{ + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_tx_descriptor *desc; + struct xilinx_axidma_tx_segment *segment = NULL, *prev = NULL; + u32 *app_w = (u32 *)context; + struct scatterlist *sg; + size_t copy; + size_t sg_used; + unsigned int i; + + if (!is_slave_direction(direction)) + return NULL; + + /* Allocate a transaction descriptor. */ + desc = xilinx_dma_alloc_tx_descriptor(chan); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->async_tx, &chan->common); + desc->async_tx.tx_submit = xilinx_dma_tx_submit; + + /* Build transactions using information in the scatter gather list */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + struct xilinx_axidma_desc_hw *hw; + + /* Get a free segment */ + segment = xilinx_axidma_alloc_tx_segment(chan); + if (!segment) + goto error; + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the hw limit + */ + copy = min_t(size_t, sg_dma_len(sg) - sg_used, + XILINX_DMA_MAX_TRANS_LEN); + hw = &segment->hw; + + /* Fill in the descriptor */ + hw->buf_addr = sg_dma_address(sg) + sg_used; + + hw->control = copy; + + if (chan->direction == DMA_MEM_TO_DEV) { + if (app_w) + memcpy(hw->app, app_w, sizeof(u32) * + XILINX_DMA_NUM_APP_WORDS); + } + + if (prev) + prev->hw.next_desc = segment->phys; + + prev = segment; + sg_used += copy; + + /* + * Insert the segment into the descriptor segments + * list. + */ + list_add_tail(&segment->node, &desc->segments); + } + } + + segment = list_first_entry(&desc->segments, + struct xilinx_axidma_tx_segment, node); + desc->async_tx.phys = segment->phys; + prev->hw.next_desc = segment->phys; + + /* For the last DMA_MEM_TO_DEV transfer, set EOP */ + if (chan->direction == DMA_MEM_TO_DEV) { + segment->hw.control |= XILINX_DMA_BD_SOP; + segment = list_last_entry(&desc->segments, + struct xilinx_axidma_tx_segment, + node); + segment->hw.control |= XILINX_DMA_BD_EOP; + } + + return &desc->async_tx; + +error: + xilinx_dma_free_tx_descriptor(chan, desc); return NULL; } /** - * xilinx_vdma_terminate_all - Halt the channel and free descriptors - * @chan: Driver specific VDMA Channel pointer + * xilinx_dma_terminate_all - Halt the channel and free descriptors + * @chan: Driver specific DMA Channel pointer */ -static int xilinx_vdma_terminate_all(struct dma_chan *dchan) +static int xilinx_dma_terminate_all(struct dma_chan *dchan) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); /* Halt the DMA engine */ - xilinx_vdma_halt(chan); + xilinx_dma_halt(chan); /* Remove and free all of the descriptors in the lists */ - xilinx_vdma_free_descriptors(chan); + xilinx_dma_free_descriptors(chan); return 0; } /** - * xilinx_vdma_channel_set_config - Configure VDMA channel + * xilinx_dma_channel_set_config - Configure VDMA channel * Run-time configuration for Axi VDMA, supports: * . halt the channel * . configure interrupt coalescing and inter-packet delay threshold @@ -1042,13 +1701,13 @@ static int xilinx_vdma_terminate_all(struct dma_chan *dchan) int xilinx_vdma_channel_set_config(struct dma_chan *dchan, struct xilinx_vdma_config *cfg) { - struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan); + struct xilinx_dma_chan *chan = to_xilinx_chan(dchan); u32 dmacr; if (cfg->reset) - return xilinx_vdma_chan_reset(chan); + return xilinx_dma_chan_reset(chan); - dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR); + dmacr = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR); chan->config.frm_dly = cfg->frm_dly; chan->config.park = cfg->park; @@ -1058,8 +1717,8 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.master = cfg->master; if (cfg->gen_lock && chan->genlock) { - dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN; - dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT; + dmacr |= XILINX_DMA_DMACR_GENLOCK_EN; + dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT; } chan->config.frm_cnt_en = cfg->frm_cnt_en; @@ -1071,21 +1730,21 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.coalesc = cfg->coalesc; chan->config.delay = cfg->delay; - if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) { - dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT; + if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) { + dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT; chan->config.coalesc = cfg->coalesc; } - if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) { - dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT; + if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) { + dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT; chan->config.delay = cfg->delay; } /* FSync Source selection */ - dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK; - dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT; + dmacr &= ~XILINX_DMA_DMACR_FSYNCSRC_MASK; + dmacr |= cfg->ext_fsync << XILINX_DMA_DMACR_FSYNCSRC_SHIFT; - vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr); + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, dmacr); return 0; } @@ -1096,14 +1755,14 @@ EXPORT_SYMBOL(xilinx_vdma_channel_set_config); */ /** - * xilinx_vdma_chan_remove - Per Channel remove function - * @chan: Driver specific VDMA channel + * xilinx_dma_chan_remove - Per Channel remove function + * @chan: Driver specific DMA channel */ -static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) +static void xilinx_dma_chan_remove(struct xilinx_dma_chan *chan) { /* Disable all interrupts */ - vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, - XILINX_VDMA_DMAXR_ALL_IRQ_MASK); + dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, + XILINX_DMA_DMAXR_ALL_IRQ_MASK); if (chan->irq > 0) free_irq(chan->irq, chan); @@ -1113,8 +1772,197 @@ static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) list_del(&chan->common.device_node); } +static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **rx_clk, + struct clk **sg_clk, struct clk **tmp_clk) +{ + int err; + + *tmp_clk = NULL; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) { + err = PTR_ERR(*axi_clk); + dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err); + return err; + } + + *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk"); + if (IS_ERR(*tx_clk)) + *tx_clk = NULL; + + *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + + *sg_clk = devm_clk_get(&pdev->dev, "m_axi_sg_aclk"); + if (IS_ERR(*sg_clk)) + *sg_clk = NULL; + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*tx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + goto err_disable_axiclk; + } + + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txclk; + } + + err = clk_prepare_enable(*sg_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable sg_clk (%u)\n", err); + goto err_disable_rxclk; + } + + return 0; + +err_disable_rxclk: + clk_disable_unprepare(*rx_clk); +err_disable_txclk: + clk_disable_unprepare(*tx_clk); +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **dev_clk, struct clk **tmp_clk, + struct clk **tmp1_clk, struct clk **tmp2_clk) +{ + int err; + + *tmp_clk = NULL; + *tmp1_clk = NULL; + *tmp2_clk = NULL; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) { + err = PTR_ERR(*axi_clk); + dev_err(&pdev->dev, "failed to get axi_clk (%u)\n", err); + return err; + } + + *dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk"); + if (IS_ERR(*dev_clk)) { + err = PTR_ERR(*dev_clk); + dev_err(&pdev->dev, "failed to get dev_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*dev_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable dev_clk (%u)\n", err); + goto err_disable_axiclk; + } + + return 0; + +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, + struct clk **tx_clk, struct clk **txs_clk, + struct clk **rx_clk, struct clk **rxs_clk) +{ + int err; + + *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); + if (IS_ERR(*axi_clk)) { + err = PTR_ERR(*axi_clk); + dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err); + return err; + } + + *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk"); + if (IS_ERR(*tx_clk)) + *tx_clk = NULL; + + *txs_clk = devm_clk_get(&pdev->dev, "m_axis_mm2s_aclk"); + if (IS_ERR(*txs_clk)) + *txs_clk = NULL; + + *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + + *rxs_clk = devm_clk_get(&pdev->dev, "s_axis_s2mm_aclk"); + if (IS_ERR(*rxs_clk)) + *rxs_clk = NULL; + + err = clk_prepare_enable(*axi_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + return err; + } + + err = clk_prepare_enable(*tx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + goto err_disable_axiclk; + } + + err = clk_prepare_enable(*txs_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable txs_clk (%u)\n", err); + goto err_disable_txclk; + } + + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txsclk; + } + + err = clk_prepare_enable(*rxs_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rxs_clk (%u)\n", err); + goto err_disable_rxclk; + } + + return 0; + +err_disable_rxclk: + clk_disable_unprepare(*rx_clk); +err_disable_txsclk: + clk_disable_unprepare(*txs_clk); +err_disable_txclk: + clk_disable_unprepare(*tx_clk); +err_disable_axiclk: + clk_disable_unprepare(*axi_clk); + + return err; +} + +static void xdma_disable_allclks(struct xilinx_dma_device *xdev) +{ + clk_disable_unprepare(xdev->rxs_clk); + clk_disable_unprepare(xdev->rx_clk); + clk_disable_unprepare(xdev->txs_clk); + clk_disable_unprepare(xdev->tx_clk); + clk_disable_unprepare(xdev->axi_clk); +} + /** - * xilinx_vdma_chan_probe - Per Channel Probing + * xilinx_dma_chan_probe - Per Channel Probing * It get channel features from the device tree entry and * initialize special channel handling routines * @@ -1123,10 +1971,10 @@ static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan) * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, +static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, struct device_node *node) { - struct xilinx_vdma_chan *chan; + struct xilinx_dma_chan *chan; bool has_dre = false; u32 value, width; int err; @@ -1140,6 +1988,7 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, chan->xdev = xdev; chan->has_sg = xdev->has_sg; chan->desc_pendingcount = 0x0; + chan->ext_addr = xdev->ext_addr; spin_lock_init(&chan->lock); INIT_LIST_HEAD(&chan->pending_list); @@ -1169,23 +2018,27 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, chan->direction = DMA_MEM_TO_DEV; chan->id = 0; - chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET; - chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET; + chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET; - if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH || - xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S) - chan->flush_on_fsync = true; + if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_DMA_FLUSH_MM2S) + chan->flush_on_fsync = true; + } } else if (of_device_is_compatible(node, "xlnx,axi-vdma-s2mm-channel")) { chan->direction = DMA_DEV_TO_MEM; chan->id = 1; - chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET; - chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET; + chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET; - if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH || - xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM) - chan->flush_on_fsync = true; + if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH || + xdev->flush_on_fsync == XILINX_DMA_FLUSH_S2MM) + chan->flush_on_fsync = true; + } } else { dev_err(xdev->dev, "Invalid channel compatible node\n"); return -EINVAL; @@ -1193,15 +2046,22 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, /* Request the interrupt */ chan->irq = irq_of_parse_and_map(node, 0); - err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED, - "xilinx-vdma-controller", chan); + err = request_irq(chan->irq, xilinx_dma_irq_handler, IRQF_SHARED, + "xilinx-dma-controller", chan); if (err) { dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq); return err; } + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) + chan->start_transfer = xilinx_dma_start_transfer; + else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) + chan->start_transfer = xilinx_cdma_start_transfer; + else + chan->start_transfer = xilinx_vdma_start_transfer; + /* Initialize the tasklet */ - tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet, + tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet, (unsigned long)chan); /* @@ -1214,7 +2074,7 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, xdev->chan[chan->id] = chan; /* Reset the channel */ - err = xilinx_vdma_chan_reset(chan); + err = xilinx_dma_chan_reset(chan); if (err < 0) { dev_err(xdev->dev, "Reset channel failed\n"); return err; @@ -1233,28 +2093,54 @@ static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev, static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { - struct xilinx_vdma_device *xdev = ofdma->of_dma_data; + struct xilinx_dma_device *xdev = ofdma->of_dma_data; int chan_id = dma_spec->args[0]; - if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id]) + if (chan_id >= XILINX_DMA_MAX_CHANS_PER_DEVICE || !xdev->chan[chan_id]) return NULL; return dma_get_slave_channel(&xdev->chan[chan_id]->common); } +static const struct xilinx_dma_config axidma_config = { + .dmatype = XDMA_TYPE_AXIDMA, + .clk_init = axidma_clk_init, +}; + +static const struct xilinx_dma_config axicdma_config = { + .dmatype = XDMA_TYPE_CDMA, + .clk_init = axicdma_clk_init, +}; + +static const struct xilinx_dma_config axivdma_config = { + .dmatype = XDMA_TYPE_VDMA, + .clk_init = axivdma_clk_init, +}; + +static const struct of_device_id xilinx_dma_of_ids[] = { + { .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config }, + { .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config }, + { .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config }, + {} +}; +MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids); + /** - * xilinx_vdma_probe - Driver probe function + * xilinx_dma_probe - Driver probe function * @pdev: Pointer to the platform_device structure * * Return: '0' on success and failure value on error */ -static int xilinx_vdma_probe(struct platform_device *pdev) +static int xilinx_dma_probe(struct platform_device *pdev) { + int (*clk_init)(struct platform_device *, struct clk **, struct clk **, + struct clk **, struct clk **, struct clk **) + = axivdma_clk_init; struct device_node *node = pdev->dev.of_node; - struct xilinx_vdma_device *xdev; - struct device_node *child; + struct xilinx_dma_device *xdev; + struct device_node *child, *np = pdev->dev.of_node; struct resource *io; - u32 num_frames; + u32 num_frames, addr_width; int i, err; /* Allocate and initialize the DMA engine structure */ @@ -1263,6 +2149,20 @@ static int xilinx_vdma_probe(struct platform_device *pdev) return -ENOMEM; xdev->dev = &pdev->dev; + if (np) { + const struct of_device_id *match; + + match = of_match_node(xilinx_dma_of_ids, np); + if (match && match->data) { + xdev->dma_config = match->data; + clk_init = xdev->dma_config->clk_init; + } + } + + err = clk_init(pdev, &xdev->axi_clk, &xdev->tx_clk, &xdev->txs_clk, + &xdev->rx_clk, &xdev->rxs_clk); + if (err) + return err; /* Request and map I/O memory */ io = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1273,46 +2173,77 @@ static int xilinx_vdma_probe(struct platform_device *pdev) /* Retrieve the DMA engine properties from the device tree */ xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg"); - err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames); - if (err < 0) { - dev_err(xdev->dev, "missing xlnx,num-fstores property\n"); - return err; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + err = of_property_read_u32(node, "xlnx,num-fstores", + &num_frames); + if (err < 0) { + dev_err(xdev->dev, + "missing xlnx,num-fstores property\n"); + return err; + } + + err = of_property_read_u32(node, "xlnx,flush-fsync", + &xdev->flush_on_fsync); + if (err < 0) + dev_warn(xdev->dev, + "missing xlnx,flush-fsync property\n"); } - err = of_property_read_u32(node, "xlnx,flush-fsync", - &xdev->flush_on_fsync); + err = of_property_read_u32(node, "xlnx,addrwidth", &addr_width); if (err < 0) - dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n"); + dev_warn(xdev->dev, "missing xlnx,addrwidth property\n"); + + if (addr_width > 32) + xdev->ext_addr = true; + else + xdev->ext_addr = false; + + /* Set the dma mask bits */ + dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width)); /* Initialize the DMA engine */ xdev->common.dev = &pdev->dev; INIT_LIST_HEAD(&xdev->common.channels); - dma_cap_set(DMA_SLAVE, xdev->common.cap_mask); - dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask); + if (!(xdev->dma_config->dmatype == XDMA_TYPE_CDMA)) { + dma_cap_set(DMA_SLAVE, xdev->common.cap_mask); + dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask); + } xdev->common.device_alloc_chan_resources = - xilinx_vdma_alloc_chan_resources; + xilinx_dma_alloc_chan_resources; xdev->common.device_free_chan_resources = - xilinx_vdma_free_chan_resources; - xdev->common.device_prep_interleaved_dma = + xilinx_dma_free_chan_resources; + xdev->common.device_terminate_all = xilinx_dma_terminate_all; + xdev->common.device_tx_status = xilinx_dma_tx_status; + xdev->common.device_issue_pending = xilinx_dma_issue_pending; + if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) { + xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg; + /* Residue calculation is supported by only AXI DMA */ + xdev->common.residue_granularity = + DMA_RESIDUE_GRANULARITY_SEGMENT; + } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) { + dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask); + xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy; + } else { + xdev->common.device_prep_interleaved_dma = xilinx_vdma_dma_prep_interleaved; - xdev->common.device_terminate_all = xilinx_vdma_terminate_all; - xdev->common.device_tx_status = xilinx_vdma_tx_status; - xdev->common.device_issue_pending = xilinx_vdma_issue_pending; + } platform_set_drvdata(pdev, xdev); /* Initialize the channels */ for_each_child_of_node(node, child) { - err = xilinx_vdma_chan_probe(xdev, child); + err = xilinx_dma_chan_probe(xdev, child); if (err < 0) - goto error; + goto disable_clks; } - for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) - if (xdev->chan[i]) - xdev->chan[i]->num_frms = num_frames; + if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { + for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) + if (xdev->chan[i]) + xdev->chan[i]->num_frms = num_frames; + } /* Register the DMA engine with the core */ dma_async_device_register(&xdev->common); @@ -1329,49 +2260,47 @@ static int xilinx_vdma_probe(struct platform_device *pdev) return 0; +disable_clks: + xdma_disable_allclks(xdev); error: - for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) if (xdev->chan[i]) - xilinx_vdma_chan_remove(xdev->chan[i]); + xilinx_dma_chan_remove(xdev->chan[i]); return err; } /** - * xilinx_vdma_remove - Driver remove function + * xilinx_dma_remove - Driver remove function * @pdev: Pointer to the platform_device structure * * Return: Always '0' */ -static int xilinx_vdma_remove(struct platform_device *pdev) +static int xilinx_dma_remove(struct platform_device *pdev) { - struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev); + struct xilinx_dma_device *xdev = platform_get_drvdata(pdev); int i; of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&xdev->common); - for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++) + for (i = 0; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) if (xdev->chan[i]) - xilinx_vdma_chan_remove(xdev->chan[i]); + xilinx_dma_chan_remove(xdev->chan[i]); + + xdma_disable_allclks(xdev); return 0; } -static const struct of_device_id xilinx_vdma_of_ids[] = { - { .compatible = "xlnx,axi-vdma-1.00.a",}, - {} -}; -MODULE_DEVICE_TABLE(of, xilinx_vdma_of_ids); - static struct platform_driver xilinx_vdma_driver = { .driver = { .name = "xilinx-vdma", - .of_match_table = xilinx_vdma_of_ids, + .of_match_table = xilinx_dma_of_ids, }, - .probe = xilinx_vdma_probe, - .remove = xilinx_vdma_remove, + .probe = xilinx_dma_probe, + .remove = xilinx_dma_remove, }; module_platform_driver(xilinx_vdma_driver); |