summaryrefslogtreecommitdiff
path: root/arch/arm/mm/dma-mapping.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm/dma-mapping.c')
-rw-r--r--arch/arm/mm/dma-mapping.c249
1 files changed, 199 insertions, 50 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 0eca38125..c941e9304 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -42,6 +42,55 @@
#include "dma.h"
#include "mm.h"
+struct arm_dma_alloc_args {
+ struct device *dev;
+ size_t size;
+ gfp_t gfp;
+ pgprot_t prot;
+ const void *caller;
+ bool want_vaddr;
+};
+
+struct arm_dma_free_args {
+ struct device *dev;
+ size_t size;
+ void *cpu_addr;
+ struct page *page;
+ bool want_vaddr;
+};
+
+struct arm_dma_allocator {
+ void *(*alloc)(struct arm_dma_alloc_args *args,
+ struct page **ret_page);
+ void (*free)(struct arm_dma_free_args *args);
+};
+
+struct arm_dma_buffer {
+ struct list_head list;
+ void *virt;
+ struct arm_dma_allocator *allocator;
+};
+
+static LIST_HEAD(arm_dma_bufs);
+static DEFINE_SPINLOCK(arm_dma_bufs_lock);
+
+static struct arm_dma_buffer *arm_dma_buffer_find(void *virt)
+{
+ struct arm_dma_buffer *buf, *found = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&arm_dma_bufs_lock, flags);
+ list_for_each_entry(buf, &arm_dma_bufs, list) {
+ if (buf->virt == virt) {
+ list_del(&buf->list);
+ found = buf;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
+ return found;
+}
+
/*
* The DMA API is built upon the notion of "buffer ownership". A buffer
* is either exclusively owned by the CPU (and therefore may be accessed
@@ -592,7 +641,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
#define __alloc_from_pool(size, ret_page) NULL
#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL
-#define __free_from_pool(cpu_addr, size) 0
+#define __free_from_pool(cpu_addr, size) do { } while (0)
#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
#define __dma_free_remap(cpu_addr, size) do { } while (0)
@@ -610,7 +659,78 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
return page_address(page);
}
+static void *simple_allocator_alloc(struct arm_dma_alloc_args *args,
+ struct page **ret_page)
+{
+ return __alloc_simple_buffer(args->dev, args->size, args->gfp,
+ ret_page);
+}
+
+static void simple_allocator_free(struct arm_dma_free_args *args)
+{
+ __dma_free_buffer(args->page, args->size);
+}
+static struct arm_dma_allocator simple_allocator = {
+ .alloc = simple_allocator_alloc,
+ .free = simple_allocator_free,
+};
+
+static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
+ struct page **ret_page)
+{
+ return __alloc_from_contiguous(args->dev, args->size, args->prot,
+ ret_page, args->caller,
+ args->want_vaddr);
+}
+
+static void cma_allocator_free(struct arm_dma_free_args *args)
+{
+ __free_from_contiguous(args->dev, args->page, args->cpu_addr,
+ args->size, args->want_vaddr);
+}
+
+static struct arm_dma_allocator cma_allocator = {
+ .alloc = cma_allocator_alloc,
+ .free = cma_allocator_free,
+};
+
+static void *pool_allocator_alloc(struct arm_dma_alloc_args *args,
+ struct page **ret_page)
+{
+ return __alloc_from_pool(args->size, ret_page);
+}
+
+static void pool_allocator_free(struct arm_dma_free_args *args)
+{
+ __free_from_pool(args->cpu_addr, args->size);
+}
+
+static struct arm_dma_allocator pool_allocator = {
+ .alloc = pool_allocator_alloc,
+ .free = pool_allocator_free,
+};
+
+static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
+ struct page **ret_page)
+{
+ return __alloc_remap_buffer(args->dev, args->size, args->gfp,
+ args->prot, ret_page, args->caller,
+ args->want_vaddr);
+}
+
+static void remap_allocator_free(struct arm_dma_free_args *args)
+{
+ if (args->want_vaddr)
+ __dma_free_remap(args->cpu_addr, args->size);
+
+ __dma_free_buffer(args->page, args->size);
+}
+
+static struct arm_dma_allocator remap_allocator = {
+ .alloc = remap_allocator_alloc,
+ .free = remap_allocator_free,
+};
static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
gfp_t gfp, pgprot_t prot, bool is_coherent,
@@ -619,7 +739,16 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
u64 mask = get_coherent_dma_mask(dev);
struct page *page = NULL;
void *addr;
- bool want_vaddr;
+ bool allowblock, cma;
+ struct arm_dma_buffer *buf;
+ struct arm_dma_alloc_args args = {
+ .dev = dev,
+ .size = PAGE_ALIGN(size),
+ .gfp = gfp,
+ .prot = prot,
+ .caller = caller,
+ .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
+ };
#ifdef CONFIG_DMA_API_DEBUG
u64 limit = (mask + 1) & ~mask;
@@ -633,6 +762,11 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (!mask)
return NULL;
+ buf = kzalloc(sizeof(*buf),
+ gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM));
+ if (!buf)
+ return NULL;
+
if (mask < 0xffffffffULL)
gfp |= GFP_DMA;
@@ -644,28 +778,37 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
* platform; see CONFIG_HUGETLBFS.
*/
gfp &= ~(__GFP_COMP);
+ args.gfp = gfp;
*handle = DMA_ERROR_CODE;
- size = PAGE_ALIGN(size);
- want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
-
- if (nommu())
- addr = __alloc_simple_buffer(dev, size, gfp, &page);
- else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
- addr = __alloc_from_contiguous(dev, size, prot, &page,
- caller, want_vaddr);
- else if (is_coherent)
- addr = __alloc_simple_buffer(dev, size, gfp, &page);
- else if (!gfpflags_allow_blocking(gfp))
- addr = __alloc_from_pool(size, &page);
+ allowblock = gfpflags_allow_blocking(gfp);
+ cma = allowblock ? dev_get_cma_area(dev) : false;
+
+ if (cma)
+ buf->allocator = &cma_allocator;
+ else if (nommu() || is_coherent)
+ buf->allocator = &simple_allocator;
+ else if (allowblock)
+ buf->allocator = &remap_allocator;
else
- addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
- caller, want_vaddr);
+ buf->allocator = &pool_allocator;
+
+ addr = buf->allocator->alloc(&args, &page);
+
+ if (page) {
+ unsigned long flags;
- if (page)
*handle = pfn_to_dma(dev, page_to_pfn(page));
+ buf->virt = args.want_vaddr ? addr : page;
+
+ spin_lock_irqsave(&arm_dma_bufs_lock, flags);
+ list_add(&buf->list, &arm_dma_bufs);
+ spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
+ } else {
+ kfree(buf);
+ }
- return want_vaddr ? addr : page;
+ return args.want_vaddr ? addr : page;
}
/*
@@ -741,25 +884,21 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
bool is_coherent)
{
struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
- bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
-
- size = PAGE_ALIGN(size);
-
- if (nommu()) {
- __dma_free_buffer(page, size);
- } else if (!is_coherent && __free_from_pool(cpu_addr, size)) {
+ struct arm_dma_buffer *buf;
+ struct arm_dma_free_args args = {
+ .dev = dev,
+ .size = PAGE_ALIGN(size),
+ .cpu_addr = cpu_addr,
+ .page = page,
+ .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
+ };
+
+ buf = arm_dma_buffer_find(cpu_addr);
+ if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr))
return;
- } else if (!dev_get_cma_area(dev)) {
- if (want_vaddr && !is_coherent)
- __dma_free_remap(cpu_addr, size);
- __dma_free_buffer(page, size);
- } else {
- /*
- * Non-atomic allocations cannot be freed with IRQs disabled
- */
- WARN_ON(irqs_disabled());
- __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr);
- }
+
+ buf->allocator->free(&args);
+ kfree(buf);
}
void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
@@ -1122,6 +1261,9 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
spin_unlock_irqrestore(&mapping->lock, flags);
}
+/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */
+static const int iommu_order_array[] = { 9, 8, 4, 0 };
+
static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
gfp_t gfp, struct dma_attrs *attrs)
{
@@ -1129,6 +1271,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
int count = size >> PAGE_SHIFT;
int array_size = count * sizeof(struct page *);
int i = 0;
+ int order_idx = 0;
if (array_size <= PAGE_SIZE)
pages = kzalloc(array_size, GFP_KERNEL);
@@ -1154,6 +1297,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
return pages;
}
+ /* Go straight to 4K chunks if caller says it's OK. */
+ if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+ order_idx = ARRAY_SIZE(iommu_order_array) - 1;
+
/*
* IOMMU can map any pages, so himem can also be used here
*/
@@ -1162,22 +1309,24 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
while (count) {
int j, order;
- for (order = __fls(count); order > 0; --order) {
- /*
- * We do not want OOM killer to be invoked as long
- * as we can fall back to single pages, so we force
- * __GFP_NORETRY for orders higher than zero.
- */
- pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
- if (pages[i])
- break;
+ order = iommu_order_array[order_idx];
+
+ /* Drop down when we get small */
+ if (__fls(count) < order) {
+ order_idx++;
+ continue;
}
- if (!pages[i]) {
- /*
- * Fall back to single page allocation.
- * Might invoke OOM killer as last resort.
- */
+ if (order) {
+ /* See if it's easy to allocate a high-order chunk */
+ pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
+
+ /* Go down a notch at first sign of pressure */
+ if (!pages[i]) {
+ order_idx++;
+ continue;
+ }
+ } else {
pages[i] = alloc_pages(gfp, 0);
if (!pages[i])
goto error;