summaryrefslogtreecommitdiff
path: root/arch/arc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arc/mm')
-rw-r--r--arch/arc/mm/cache.c184
-rw-r--r--arch/arc/mm/dma.c18
2 files changed, 163 insertions, 39 deletions
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 1cd6695b6..0d1a6e968 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -22,15 +22,22 @@
#include <asm/setup.h>
static int l2_line_sz;
+int ioc_exists;
+volatile int slc_enable = 1, ioc_enable = 1;
void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
unsigned long sz, const int cacheop);
+void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz);
+void (*__dma_cache_inv)(unsigned long start, unsigned long sz);
+void (*__dma_cache_wback)(unsigned long start, unsigned long sz);
+
char *arc_cache_mumbojumbo(int c, char *buf, int len)
{
int n = 0;
struct cpuinfo_arc_cache *p;
+#define IS_USED_RUN(v) ((v) ? "" : "(disabled) ")
#define PR_CACHE(p, cfg, str) \
if (!(p)->ver) \
n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
@@ -45,10 +52,18 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+ if (!is_isa_arcv2())
+ return buf;
+
p = &cpuinfo_arc700[c].slc;
if (p->ver)
n += scnprintf(buf + n, len - n,
- "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
+ "SLC\t\t: %uK, %uB Line%s\n",
+ p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
+
+ if (ioc_exists)
+ n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
+ IS_USED_RUN(ioc_enable));
return buf;
}
@@ -58,18 +73,9 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
* the cpuinfo structure for later use.
* No Validation done here, simply read/convert the BCRs
*/
-void read_decode_cache_bcr(void)
+static void read_decode_cache_bcr_arcv2(int cpu)
{
- struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc;
- unsigned int cpu = smp_processor_id();
- struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
- unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
- } ibcr, dbcr;
-
+ struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
struct bcr_generic sbcr;
struct bcr_slc_cfg {
@@ -80,6 +86,39 @@ void read_decode_cache_bcr(void)
#endif
} slc_cfg;
+ struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+ unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+ } cbcr;
+
+ READ_BCR(ARC_REG_SLC_BCR, sbcr);
+ if (sbcr.ver) {
+ READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+ p_slc->ver = sbcr.ver;
+ p_slc->sz_k = 128 << slc_cfg.sz;
+ l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+ }
+
+ READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
+ if (cbcr.c && ioc_enable)
+ ioc_exists = 1;
+}
+
+void read_decode_cache_bcr(void)
+{
+ struct cpuinfo_arc_cache *p_ic, *p_dc;
+ unsigned int cpu = smp_processor_id();
+ struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+ unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+ } ibcr, dbcr;
+
p_ic = &cpuinfo_arc700[cpu].icache;
READ_BCR(ARC_REG_IC_BCR, ibcr);
@@ -122,17 +161,8 @@ dc_chk:
p_dc->ver = dbcr.ver;
slc_chk:
- if (!is_isa_arcv2())
- return;
-
- p_slc = &cpuinfo_arc700[cpu].slc;
- READ_BCR(ARC_REG_SLC_BCR, sbcr);
- if (sbcr.ver) {
- READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
- p_slc->ver = sbcr.ver;
- p_slc->sz_k = 128 << slc_cfg.sz;
- l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
- }
+ if (is_isa_arcv2())
+ read_decode_cache_bcr_arcv2(cpu);
}
/*
@@ -516,11 +546,6 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
#endif
}
-static inline int need_slc_flush(void)
-{
- return is_isa_arcv2() && l2_line_sz;
-}
-
/***********************************************************
* Exported APIs
*/
@@ -569,30 +594,74 @@ void flush_dcache_page(struct page *page)
}
EXPORT_SYMBOL(flush_dcache_page);
-void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+/*
+ * DMA ops for systems with L1 cache only
+ * Make memory coherent with L1 cache by flushing/invalidating L1 lines
+ */
+static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+}
- if (need_slc_flush())
- slc_op(start, sz, OP_FLUSH_N_INV);
+static void __dma_cache_inv_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_INV);
}
-EXPORT_SYMBOL(dma_cache_wback_inv);
-void dma_cache_inv(unsigned long start, unsigned long sz)
+static void __dma_cache_wback_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with both L1 and L2 caches, but without IOC
+ * Both L1 and L2 lines need to be explicity flushed/invalidated
+ */
+static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+ slc_op(start, sz, OP_FLUSH_N_INV);
+}
+
+static void __dma_cache_inv_slc(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_INV);
+ slc_op(start, sz, OP_INV);
+}
+
+static void __dma_cache_wback_slc(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+ slc_op(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with IOC
+ * IOC hardware snoops all DMA traffic keeping the caches consistent with
+ * memory - eliding need for any explicit cache maintenance of DMA buffers
+ */
+static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {}
+static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {}
+static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {}
+
+/*
+ * Exported DMA API
+ */
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+ __dma_cache_wback_inv(start, sz);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
- if (need_slc_flush())
- slc_op(start, sz, OP_INV);
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+ __dma_cache_inv(start, sz);
}
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz)
{
- __dc_line_op_k(start, sz, OP_FLUSH);
-
- if (need_slc_flush())
- slc_op(start, sz, OP_FLUSH);
+ __dma_cache_wback(start, sz);
}
EXPORT_SYMBOL(dma_cache_wback);
@@ -848,4 +917,41 @@ void arc_cache_init(void)
panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
}
}
+
+ if (is_isa_arcv2() && l2_line_sz && !slc_enable) {
+
+ /* IM set : flush before invalidate */
+ write_aux_reg(ARC_REG_SLC_CTRL,
+ read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM);
+
+ write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
+
+ /* Important to wait for flush to complete */
+ while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+ write_aux_reg(ARC_REG_SLC_CTRL,
+ read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
+ }
+
+ if (is_isa_arcv2() && ioc_exists) {
+ /* IO coherency base - 0x8z */
+ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
+ /* IO coherency aperture size - 512Mb: 0x8z-0xAz */
+ write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11);
+ /* Enable partial writes */
+ write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
+ /* Enable IO coherency */
+ write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
+
+ __dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
+ __dma_cache_inv = __dma_cache_inv_ioc;
+ __dma_cache_wback = __dma_cache_wback_ioc;
+ } else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
+ __dma_cache_wback_inv = __dma_cache_wback_inv_slc;
+ __dma_cache_inv = __dma_cache_inv_slc;
+ __dma_cache_wback = __dma_cache_wback_slc;
+ } else {
+ __dma_cache_wback_inv = __dma_cache_wback_inv_l1;
+ __dma_cache_inv = __dma_cache_inv_l1;
+ __dma_cache_wback = __dma_cache_wback_l1;
+ }
}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 57706a9c6..29a46bb19 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -19,6 +19,7 @@
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/export.h>
+#include <asm/cache.h>
#include <asm/cacheflush.h>
/*
@@ -53,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
{
void *paddr, *kvaddr;
+ /*
+ * IOC relies on all data (even coherent DMA data) being in cache
+ * Thus allocate normal cached memory
+ *
+ * The gains with IOC are two pronged:
+ * -For streaming data, elides needs for cache maintenance, saving
+ * cycles in flush code, and bus bandwidth as all the lines of a
+ * buffer need to be flushed out to memory
+ * -For coherent data, Read/Write to buffers terminate early in cache
+ * (vs. always going to memory - thus are faster)
+ */
+ if (is_isa_arcv2() && ioc_exists)
+ return dma_alloc_noncoherent(dev, size, dma_handle, gfp);
+
/* This is linear addr (0x8000_0000 based) */
paddr = alloc_pages_exact(size, gfp);
if (!paddr)
@@ -85,6 +100,9 @@ EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
dma_addr_t dma_handle)
{
+ if (is_isa_arcv2() && ioc_exists)
+ return dma_free_noncoherent(dev, size, kvaddr, dma_handle);
+
iounmap((void __force __iomem *)kvaddr);
free_pages_exact((void *)dma_handle, size);