From c81e2fcdbe53d1a8b77130447763096e66a4dfb1 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Tue, 7 Jun 2016 10:57:43 -0700 Subject: arm64: dma-mapping: Respect device coherency in IOMMU mapper Coherent devices can make use of the CPU cache, so they should get coherent IOMMU mappings and should have their cache maintenance operations skipped. Implement this for the vanilla arm64 IOMMU mapper. Change-Id: Iab6715a071a5fa5556934d7771a6d07d678e9703 Signed-off-by: Mitchel Humpherys --- arch/arm64/mm/dma-mapping.c | 75 +++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index b132174d5658..eff70892dada 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -49,6 +49,17 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, return prot; } +static int __get_iommu_pgprot(struct dma_attrs *attrs, int prot, + bool coherent) +{ + if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs)) + prot |= IOMMU_NOEXEC; + if (coherent) + prot |= IOMMU_CACHE; + + return prot; +} + static struct gen_pool *atomic_pool; #define NO_KERNEL_MAPPING_DUMMY 0x2222 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K @@ -1153,7 +1164,7 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) /* IOMMU */ static void __dma_clear_buffer(struct page *page, size_t size, - struct dma_attrs *attrs) + struct dma_attrs *attrs, bool is_coherent) { /* * Ensure that the allocated pages are zeroed, and that any data @@ -1162,7 +1173,8 @@ static void __dma_clear_buffer(struct page *page, size_t size, void *ptr = page_address(page); if (!dma_get_attr(DMA_ATTR_SKIP_ZEROING, attrs)) memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); + if (!is_coherent) + dmac_flush_range(ptr, ptr + size); } static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, @@ -1212,6 +1224,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, size_t count = size >> PAGE_SHIFT; size_t array_size = count * sizeof(struct page *); int i = 0; + bool is_coherent = is_device_dma_coherent(dev); if (array_size <= PAGE_SIZE) pages = kzalloc(array_size, gfp); @@ -1228,7 +1241,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, if (!page) goto error; - __dma_clear_buffer(page, size, attrs); + __dma_clear_buffer(page, size, attrs, is_coherent); for (i = 0; i < count; i++) pages[i] = page + i; @@ -1257,7 +1270,8 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, pages[i + j] = pages[i] + j; } - __dma_clear_buffer(pages[i], PAGE_SIZE << order, attrs); + __dma_clear_buffer(pages[i], PAGE_SIZE << order, attrs, + is_coherent); i += 1 << order; count -= 1 << order; } @@ -1322,9 +1336,8 @@ static dma_addr_t __iommu_create_mapping(struct device *dev, dma_addr = __alloc_iova(mapping, size); if (dma_addr == DMA_ERROR_CODE) return dma_addr; - - if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs)) - prot |= IOMMU_NOEXEC; + prot = __get_iommu_pgprot(attrs, prot, + is_device_dma_coherent(dev)); iova = dma_addr; for (i = 0; i < count; ) { @@ -1404,6 +1417,7 @@ static void *__iommu_alloc_atomic(struct device *dev, size_t size, size_t array_size = count * sizeof(struct page *); int i; void *addr; + bool coherent = is_device_dma_coherent(dev); if (array_size <= PAGE_SIZE) pages = kzalloc(array_size, gfp); @@ -1413,7 +1427,13 @@ static void *__iommu_alloc_atomic(struct device *dev, size_t size, if (!pages) return NULL; - addr = __alloc_from_pool(size, &page, gfp); + if (coherent) { + page = alloc_pages(gfp, get_order(size)); + addr = page ? page_address(page) : NULL; + } else { + addr = __alloc_from_pool(size, &page, gfp); + } + if (!addr) goto err_free; @@ -1428,7 +1448,10 @@ static void *__iommu_alloc_atomic(struct device *dev, size_t size, return addr; err_mapping: - __free_from_pool(addr, size); + if (coherent) + __free_pages(page, get_order(size)); + else + __free_from_pool(addr, size); err_free: kvfree(pages); return NULL; @@ -1444,7 +1467,8 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr, static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); + bool coherent = is_device_dma_coherent(dev); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); struct page **pages; void *addr = NULL; @@ -1495,8 +1519,10 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); + bool coherent = is_device_dma_coherent(dev); - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false); + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + coherent); if (!pages) return -ENXIO; @@ -1607,9 +1633,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, dev_err(dev, "Couldn't allocate iova for sg %p\n", sg); return 0; } - - if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs)) - prot |= IOMMU_NOEXEC; + prot = __get_iommu_pgprot(attrs, prot, + is_device_dma_coherent(dev)); ret = iommu_map_sg(mapping->domain, iova, sg, nents, prot); if (ret != total_length) { @@ -1663,6 +1688,9 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct scatterlist *s; int i; + if (is_device_dma_coherent(dev)) + return; + for_each_sg(sg, s, nents, i) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); @@ -1681,6 +1709,9 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct scatterlist *s; int i; + if (is_device_dma_coherent(dev)) + return; + for_each_sg(sg, s, nents, i) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); } @@ -1709,8 +1740,8 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, return dma_addr; prot = __dma_direction_to_prot(dir); - if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs)) - prot |= IOMMU_NOEXEC; + prot = __get_iommu_pgprot(attrs, prot, + is_device_dma_coherent(dev)); ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); @@ -1737,7 +1768,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!is_device_dma_coherent(dev) && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); @@ -1766,7 +1798,8 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!(is_device_dma_coherent(dev) || + dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); @@ -1785,7 +1818,8 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, if (!iova) return; - __dma_page_dev_to_cpu(page, offset, size, dir); + if (!is_device_dma_coherent(dev)) + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_iommu_sync_single_for_device(struct device *dev, @@ -1800,7 +1834,8 @@ static void arm_iommu_sync_single_for_device(struct device *dev, if (!iova) return; - __dma_page_cpu_to_dev(page, offset, size, dir); + if (!is_device_dma_coherent(dev)) + __dma_page_cpu_to_dev(page, offset, size, dir); } static int arm_iommu_dma_supported(struct device *dev, u64 mask) -- cgit v1.2.3