From 5b432907fedfa6413bfe5971068853a786c4be16 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Nov 2015 13:26:19 +0000 Subject: UPSTREAM: arm64: mm: detect bad __create_mapping uses If a caller of __create_mapping provides a PA and VA which have different sub-page offsets, it is not clear which offset they expect to apply to the mapping, and is indicative of a bad caller. In some cases, the region we wish to map may validly have a sub-page offset in the physical and virtual addresses. For example, EFI runtime regions have 4K granularity, yet may be mapped by a 64K page kernel. So long as the physical and virtual offsets are the same, the region will be mapped at the expected VAs. Disallow calls with differing sub-page offsets, and WARN when they are encountered, so that we can detect and fix such cases. Cc: Laura Abbott Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Reviewed-by: Steve Capper Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Bug: 30369029 Patchset: __create_mapping-fixes (cherry picked from commit cc5d2b3b95cdbb3fed4e38e667d17b9ac7250f7a) Signed-off-by: Jeff Vander Stoep Change-Id: I114a1265b10ff76daff385728d2125e618c313a1 --- arch/arm64/mm/mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9a8dc4a79d8b..52f03bb07429 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -251,6 +251,13 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, { unsigned long addr, length, end, next; + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); -- cgit v1.2.3 From ab8158d22085047097653a799fc9bae6e9920edf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Nov 2015 13:26:20 +0000 Subject: UPSTREAM: arm64: mm: allow sections for unaligned bases Callees of __create_mapping may decide to create section mappings if sufficient low bits of the physical and virtual addresses they were passed are zero. While __create_mapping rounds the virtual base address down, it does not similarly round the physical base address down, and hence non-zero bits in the physical address can prevent use of a section mapping, even where a whole next-level table would be used instead. Round down the physical base address in __create_mapping to enable all callees to always create section mappings when such a mapping is possible. Cc: Laura Abbott Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Reviewed-by: Steve Capper Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Bug: 30369029 Patchset: __create_mapping-fixes (cherry picked from commit 9c4e08a3022b6df90d31ef4007291faabfce5431) Signed-off-by: Jeff Vander Stoep Change-Id: Ic447350efdba3cd9f9e101c72183e04e39dd28d2 --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 52f03bb07429..4c381f0e77a7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -258,6 +258,7 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) return; + phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); -- cgit v1.2.3 From 6b51f3d20c52982bd71028509ebb3a4a2389850b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 20 Nov 2015 17:59:10 +0800 Subject: UPSTREAM: arm64: add __init/__initdata section marker to some functions/variables These functions/variables are not needed after booting, so mark them as __init or __initdata. Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon Bug: 30369029 (cherry picked from commit a7c61a3452d39078919f0e1f493ff966fb64f0db) Signed-off-by: Jeff Vander Stoep Change-Id: I50a3362e186750e139d2440d2c1e1d49ace896e1 --- arch/arm64/kernel/armv8_deprecated.c | 6 +++--- arch/arm64/kernel/cpufeature.c | 9 +++++---- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/mm/dma-mapping.c | 4 ++-- arch/arm64/mm/init.c | 6 +++--- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 937f5e58a4d3..3e01207917b1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -62,7 +62,7 @@ struct insn_emulation { }; static LIST_HEAD(insn_emulation); -static int nr_insn_emulated; +static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); static void register_emulation_hooks(struct insn_emulation_ops *ops) @@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static void register_insn_emulation(struct insn_emulation_ops *ops) +static void __init register_insn_emulation(struct insn_emulation_ops *ops) { unsigned long flags; struct insn_emulation *insn; @@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = { { } }; -static void register_insn_emulation_sysctl(struct ctl_table *table) +static void __init register_insn_emulation_sysctl(struct ctl_table *table) { unsigned long flags; int i = 0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0669c63281ea..5c90aa490a2b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -684,7 +684,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -729,7 +729,7 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void setup_cpu_hwcaps(void) +static void __init setup_cpu_hwcaps(void) { int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; @@ -758,7 +758,8 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; @@ -897,7 +898,7 @@ static inline void set_sys_caps_initialised(void) #endif /* CONFIG_HOTPLUG_CPU */ -static void setup_feature_capabilities(void) +static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); enable_cpu_capabilities(arm64_features); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4c46c54a3ad7..acc1afd5c749 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = { .notifier_call = fpsimd_cpu_pm_notifier, }; -static void fpsimd_pm_init(void) +static void __init fpsimd_pm_init(void) { cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6ad554b01e95..7b71b7c47adb 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -40,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, static struct gen_pool *atomic_pool; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { @@ -896,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb, return 0; } -static int register_iommu_dma_ops_notifier(struct bus_type *bus) +static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) { struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); int ret; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 325f89ff897e..d43715a14383 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -71,7 +71,7 @@ early_param("initrd", early_initrd); * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -128,11 +128,11 @@ EXPORT_SYMBOL(pfn_valid); #endif #ifndef CONFIG_SPARSEMEM -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { } #else -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { struct memblock_region *reg; -- cgit v1.2.3 From c47e4a12f511721dab3d2407d18e0e1a6b7b0e1c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Dec 2015 12:44:36 +0000 Subject: UPSTREAM: arm64: mm: remove pointless PAGE_MASKing As pgd_offset{,_k} shift the input address by PGDIR_SHIFT, the sub-page bits will always be shifted out. There is no need to apply PAGE_MASK before this. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit e2c30ee320eb96304896c7ab84499e5bc5e5fb6e) Signed-off-by: Jeff Vander Stoep Change-Id: I86d3438aecf7295d5895e1430c1e19fbc82c9719 --- arch/arm64/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4c381f0e77a7..7b6e58a58bf3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -288,7 +288,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, early_alloc); } @@ -309,7 +309,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + return __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, late_alloc); } -- cgit v1.2.3 From 8ace694d891ede66ac585fa12aa76cb9c5530427 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Dec 2015 12:44:38 +0000 Subject: UPSTREAM: arm64: mm: fold alternatives into .init Currently we treat the alternatives separately from other data that's only used during initialisation, using separate .altinstructions and .altinstr_replacement linker sections. These are freed for general allocation separately from .init*. This is problematic as: * We do not remove execute permissions, as we do for .init, leaving the memory executable. * We pad between them, making the kernel Image bianry up to PAGE_SIZE bytes larger than necessary. This patch moves the two sections into the contiguous region used for .init*. This saves some memory, ensures that we remove execute permissions, and allows us to remove some code made redundant by this reorganisation. Signed-off-by: Mark Rutland Cc: Andre Przywara Cc: Catalin Marinas Cc: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 9aa4ec1571da62366cfddc20f3b923609604fe63) Signed-off-by: Jeff Vander Stoep Change-Id: I3fee118ead5c73ade50ea10d436881c9424a549c --- arch/arm64/include/asm/alternative.h | 1 - arch/arm64/kernel/alternative.c | 6 ------ arch/arm64/kernel/vmlinux.lds.S | 5 ++--- arch/arm64/mm/init.c | 1 - 4 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index d56ec0715157..e4962f04201e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -19,7 +19,6 @@ struct alt_instr { void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index ab9db0e9818c..d2ee1b21a10d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length) __apply_alternatives(®ion); } - -void free_alternatives_memory(void) -{ - free_reserved_area(__alt_instructions, __alt_instructions_end, - 0, "alternatives"); -} diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1cc195872a1f..1a056bb26633 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -142,9 +142,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); - __init_end = .; - . = ALIGN(4); .altinstructions : { __alt_instructions = .; @@ -156,6 +153,8 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_end = .; + _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d43715a14383..f13078bbf66e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -362,7 +362,6 @@ void free_initmem(void) { fixup_init(); free_initmem_default(0); - free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v1.2.3 From 2393897c7c7e10c2e8fe7d1c32466cbb885c9047 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 11 Dec 2015 11:04:31 +0000 Subject: UPSTREAM: arm64: mm: place __cpu_setup in .text We drop __cpu_setup in .text.init, which ends up being part of .text. The .text.init section was a legacy section name which has been unused elsewhere for a long time. The ".text.init" name is misleading if read as a synonym for ".init.text". Any CPU may execute __cpu_setup before turning the MMU on, so it should simply live in .text. Remove the pointless section assignment. This will leave __cpu_setup in the .text section. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit f00083cae331e5d3eecade6b4fdc35d0825e73ef) Signed-off-by: Jeff Vander Stoep Change-Id: I2e9b154cd6de92662c70c2b957479448252c661a --- arch/arm64/mm/proc.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index cacecc4ad3e5..b6f9053ab184 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -139,8 +139,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) - .section ".text.init", #alloc, #execinstr - /* * __cpu_setup * -- cgit v1.2.3 From b10551a35399a277afe9b2e28266e2a00a5f6253 Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 17 Dec 2015 01:38:31 -0800 Subject: UPSTREAM: arm64: Defer dcache flush in __cpu_copy_user_page Defer dcache flushing to __sync_icache_dcache by calling flush_dcache_page which clears PG_dcache_clean flag. Acked-by: Catalin Marinas Signed-off-by: Ashok Kumar Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit e6b1185f77351aa154e63bd54b05d07ff99d4ffa) Signed-off-by: Jeff Vander Stoep Change-Id: I2e02ce2f43f68287337bed30e3c3455c0eee4164 --- arch/arm64/mm/copypage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 13bbc3be6f5a..22e4cb4d6f53 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -24,8 +24,9 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kto); copy_page(kto, kfrom); - __flush_dcache_area(kto, PAGE_SIZE); + flush_dcache_page(page); } EXPORT_SYMBOL_GPL(__cpu_copy_user_page); -- cgit v1.2.3 From 350a6a0fe0fe70d15a59084c8f4863d10271887f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 13 Jan 2016 14:50:03 +0000 Subject: BACKPORT: arm64: kernel: fix architected PMU registers unconditional access The Performance Monitors extension is an optional feature of the AArch64 architecture, therefore, in order to access Performance Monitors registers safely, the kernel should detect the architected PMU unit presence through the ID_AA64DFR0_EL1 register PMUVer field before accessing them. This patch implements a guard by reading the ID_AA64DFR0_EL1 register PMUVer field to detect the architected PMU presence and prevent accessing PMU system registers if the Performance Monitors extension is not implemented in the core. Cc: Peter Maydell Cc: Mark Rutland Cc: Fixes: 60792ad349f3 ("arm64: kernel: enforce pmuserenr_el0 initialization and restore") Signed-off-by: Lorenzo Pieralisi Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit f436b2ac90a095746beb6729b8ee8ed87c9eaede) Signed-off-by: Jeff Vander Stoep Change-Id: Ie442b9feba5d143bd6b6c82d70190fc8bc95298d --- arch/arm64/kernel/head.S | 5 +++++ arch/arm64/mm/proc-macros.S | 12 ++++++++++++ arch/arm64/mm/proc.S | 2 ++ 3 files changed, 19 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b363f340f2c7..17ce7285bb12 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -515,9 +515,14 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems #endif /* EL2 debug */ + mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx x0, x0, #8, #4 + cmp x0, #1 + b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to msr mdcr_el2, x0 // all PMU counters from EL1 +4: /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 4c4d93c4bf65..d69dffffaa89 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -62,3 +62,15 @@ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH #endif .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b6f9053ab184..c164d2cb35c0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -117,6 +117,7 @@ ENTRY(cpu_do_resume) */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 mov x0, x12 dsb nsh // Make sure local tlb invalidation completed isb @@ -153,6 +154,7 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: * -- cgit v1.2.3 From d378add20954990566e91f914105e2638564d8ba Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 17 Dec 2015 01:38:32 -0800 Subject: BACKPORT: arm64: Use PoU cache instr for I/D coherency In systems with three levels of cache(PoU at L1 and PoC at L3), PoC cache flush instructions flushes L2 and L3 caches which could affect performance. For cache flushes for I and D coherency, PoU should suffice. So changing all I and D coherency related cache flushes to PoU. Introduced a new __clean_dcache_area_pou API for dcache flush till PoU and provided a common macro for __flush_dcache_area and __clean_dcache_area_pou. Also, now in __sync_icache_dcache, icache invalidation for non-aliasing VIPT icache is done only for that particular page instead of the earlier __flush_icache_all. Reviewed-by: Catalin Marinas Reviewed-by: Mark Rutland Signed-off-by: Ashok Kumar Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 0a28714c53fd4f7aea709be7577dfbe0095c8c3e) Signed-off-by: Jeff Vander Stoep Change-Id: I64f065140d5e8783e91ed53ae9c7a2e33a3e515a --- arch/arm64/include/asm/cacheflush.h | 1 + arch/arm64/mm/cache.S | 28 +++++++++++++++++----------- arch/arm64/mm/flush.c | 33 ++++++++++++++++++--------------- arch/arm64/mm/proc-macros.S | 22 ++++++++++++++++++++++ 4 files changed, 58 insertions(+), 26 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 54efedaf331f..7fc294c3bc5b 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index cfa44a6adc0a..6df07069a025 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -81,25 +81,31 @@ ENDPROC(__flush_cache_user_range) /* * __flush_dcache_area(kaddr, size) * - * Ensure that the data held in the page kaddr is written back to the - * page in question. + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned and invalidated to the PoC. * * - kaddr - kernel address * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_line_size x2, x3 - add x1, x0, x1 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D line / unified line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret ENDPIPROC(__flush_dcache_area) +/* + * __clean_dcache_area_pou(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoU. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pou) + dcache_by_line_op cvau, ish, x0, x1, x2, x3 + ret +ENDPROC(__clean_dcache_area_pou) + /* * __inval_cache_range(start, end) * - start - start address of region diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c26b804015e8..46649d6e6c5a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } +static void sync_icache_aliases(void *kaddr, unsigned long len) +{ + unsigned long addr = (unsigned long)kaddr; + + if (icache_is_aliasing()) { + __clean_dcache_area_pou(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } +} + static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len) { - if (vma->vm_flags & VM_EXEC) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __flush_dcache_area(kaddr, len); - __flush_icache_all(); - } else { - flush_icache_range(addr, addr + len); - } - } + if (vma->vm_flags & VM_EXEC) + sync_icache_aliases(kaddr, len); } /* @@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!page_mapping(page)) return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), - PAGE_SIZE << compound_order(page)); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + sync_icache_aliases(page_address(page), + PAGE_SIZE << compound_order(page)); + else if (icache_is_aivivt()) __flush_icache_all(); - } else if (icache_is_aivivt()) { - __flush_icache_all(); - } } /* diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index d69dffffaa89..984edcda1850 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -74,3 +74,25 @@ msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm -- cgit v1.2.3 From 357838314d69e67ff89db49a7170fa1968638b88 Mon Sep 17 00:00:00 2001 From: David Woods Date: Thu, 17 Dec 2015 14:31:26 -0500 Subject: UPSTREAM: arm64: hugetlb: add support for PTE contiguous bit The arm64 MMU supports a Contiguous bit which is a hint that the TTE is one of a set of contiguous entries which can be cached in a single TLB entry. Supporting this bit adds new intermediate huge page sizes. The set of huge page sizes available depends on the base page size. Without using contiguous pages the huge page sizes are as follows. 4KB: 2MB 1GB 64KB: 512MB With a 4KB granule, the contiguous bit groups together sets of 16 pages and with a 64KB granule it groups sets of 32 pages. This enables two new huge page sizes in each case, so that the full set of available sizes is as follows. 4KB: 64KB 2MB 32MB 1GB 64KB: 2MB 512MB 16GB If a 16KB granule is used then the contiguous bit groups 128 pages at the PTE level and 32 pages at the PMD level. If the base page size is set to 64KB then 2MB pages are enabled by default. It is possible in the future to make 2MB the default huge page size for both 4KB and 64KB granules. Reviewed-by: Chris Metcalf Reviewed-by: Steve Capper Signed-off-by: David Woods Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 66b3923a1a0f77a563b43f43f6ad091354abbfe9) Signed-off-by: Jeff Vander Stoep Change-Id: I5e99c5165bc5eb966adf4d4523632fd9eedd9602 --- arch/arm64/Kconfig | 3 - arch/arm64/include/asm/hugetlb.h | 44 ++---- arch/arm64/include/asm/pgtable-hwdef.h | 18 ++- arch/arm64/include/asm/pgtable.h | 10 +- arch/arm64/mm/hugetlbpage.c | 274 ++++++++++++++++++++++++++++++++- include/linux/hugetlb.h | 2 - 6 files changed, 313 insertions(+), 38 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 92f234a5579b..ac3dbd933064 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -564,9 +564,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_GENERAL_HUGETLB - def_bool y - config ARCH_WANT_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index bb4052e85dba..bbc1e35aa601 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, @@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page) clear_bit(PG_dcache_clean, &page->flags); } +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte +extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d6739e836f7b..5c25b831273d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -90,7 +90,23 @@ /* * Contiguous page definitions. */ -#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +#ifdef CONFIG_ARM64_64K_PAGES +#define CONT_PTE_SHIFT 5 +#define CONT_PMD_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define CONT_PTE_SHIFT 7 +#define CONT_PMD_SHIFT 5 +#else +#define CONT_PTE_SHIFT 4 +#define CONT_PMD_SHIFT 4 +#endif + +#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) +#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) +#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) /* the the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 824416a219e7..51f382b7e5c7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -227,7 +227,8 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_CONT)); + pte = set_pte_bit(pte, __pgprot(PTE_CONT)); + return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -235,6 +236,11 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pmd_t pmd_mkcont(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | PMD_SECT_CONT); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -308,7 +314,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 383b03ff38f8..82d607c3614e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,17 +41,289 @@ int pud_huge(pud_t pud) #endif } +static int find_num_contig(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, size_t *pgsize) +{ + pgd_t *pgd = pgd_offset(mm, addr); + pud_t *pud; + pmd_t *pmd; + + *pgsize = PAGE_SIZE; + if (!pte_cont(pte)) + return 1; + if (!pgd_present(*pgd)) { + VM_BUG_ON(!pgd_present(*pgd)); + return 1; + } + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + VM_BUG_ON(!pud_present(*pud)); + return 1; + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + VM_BUG_ON(!pmd_present(*pmd)); + return 1; + } + if ((pte_t *)pmd == ptep) { + *pgsize = PMD_SIZE; + return CONT_PMDS; + } + return CONT_PTES; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + size_t pgsize; + int i; + int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize); + unsigned long pfn; + pgprot_t hugeprot; + + if (ncontig == 1) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pfn = pte_pfn(pte); + hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); + for (i = 0; i < ncontig; i++) { + pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, + pte_val(pfn_pte(pfn, hugeprot))); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); + ptep++; + pfn += pgsize >> PAGE_SHIFT; + addr += pgsize; + } +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else if (sz == (PAGE_SIZE * CONT_PTES)) { + pmd_t *pmd = pmd_alloc(mm, pud, addr); + + WARN_ON(addr & (sz - 1)); + /* + * Note that if this code were ever ported to the + * 32-bit arm platform then it will cause trouble in + * the case where CONFIG_HIGHPTE is set, since there + * will be no pte_unmap() to correspond with this + * pte_alloc_map(). + */ + pte = pte_alloc_map(mm, NULL, pmd, addr); + } else if (sz == PMD_SIZE) { + if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && + pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } else if (sz == (PMD_SIZE * CONT_PMDS)) { + pmd_t *pmd; + + pmd = pmd_alloc(mm, pud, addr); + WARN_ON(addr & (sz - 1)); + return (pte_t *)pmd; + } + + pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, + sz, pte, pte_val(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + if (pte_cont(pmd_pte(*pmd))) { + pmd = pmd_offset( + pud, (addr & CONT_PMD_MASK)); + return (pte_t *)pmd; + } + if (pmd_huge(*pmd)) + return (pte_t *)pmd; + pte = pte_offset_kernel(pmd, addr); + if (pte_present(*pte) && pte_cont(*pte)) { + pte = pte_offset_kernel( + pmd, (addr & CONT_PTE_MASK)); + return pte; + } + return NULL; +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + + if (pagesize == CONT_PTE_SIZE) { + entry = pte_mkcont(entry); + } else if (pagesize == CONT_PMD_SIZE) { + entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); + } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + } + return entry; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte; + + if (pte_cont(*ptep)) { + int ncontig, i; + size_t pgsize; + pte_t *cpte; + bool is_dirty = false; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + /* save the 1st pte to return */ + pte = ptep_get_and_clear(mm, addr, cpte); + for (i = 1; i < ncontig; ++i) { + /* + * If HW_AFDBM is enabled, then the HW could + * turn on the dirty bit for any of the page + * in the set, so check them all. + */ + ++cpte; + if (pte_dirty(ptep_get_and_clear(mm, addr, cpte))) + is_dirty = true; + } + if (is_dirty) + return pte_mkdirty(pte); + else + return pte; + } else { + return ptep_get_and_clear(mm, addr, ptep); + } +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + pte_t *cpte; + + if (pte_cont(pte)) { + int ncontig, i, changed = 0; + size_t pgsize = 0; + unsigned long pfn = pte_pfn(pte); + /* Select all bits except the pfn */ + pgprot_t hugeprot = + __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ + pte_val(pte)); + + cpte = huge_pte_offset(vma->vm_mm, addr); + pfn = pte_pfn(*cpte); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) { + changed = ptep_set_access_flags(vma, addr, cpte, + pfn_pte(pfn, + hugeprot), + dirty); + pfn += pgsize >> PAGE_SHIFT; + } + return changed; + } else { + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + } +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_set_wrprotect(mm, addr, cpte); + } else { + ptep_set_wrprotect(mm, addr, ptep); + } +} + +void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(vma->vm_mm, addr); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_clear_flush(vma, addr, cpte); + } else { + ptep_clear_flush(vma, addr, ptep); + } +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else if (ps == (PAGE_SIZE * CONT_PTES)) { + hugetlb_add_hstate(CONT_PTE_SHIFT); + } else if (ps == (PMD_SIZE * CONT_PMDS)) { + hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; } return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_ARM64_64K_PAGES +static __init int add_default_hugepagesz(void) +{ + if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) + hugetlb_add_hstate(CONT_PMD_SHIFT); + return 0; +} +arch_initcall(add_default_hugepagesz); +#endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 685c262e0be8..b0eb06423d5e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -96,9 +96,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, struct address_space *mapping, pgoff_t idx, unsigned long address); -#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); -#endif extern int hugepages_treat_as_movable; extern int sysctl_hugetlb_shm_group; -- cgit v1.2.3 From 606d0cab2091ee86fa8dc7c77a53a9e59e9fac05 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Jan 2016 15:36:59 +0000 Subject: UPSTREAM: arm64: mm: move pgd_cache initialisation to pgtable_cache_init Initialising the suppport for EFI runtime services requires us to allocate a pgd off the back of an early_initcall. On systems where the PGD_SIZE is smaller than PAGE_SIZE (e.g. 64k pages and 48-bit VA), the pgd_cache isn't initialised at this stage, and we panic with a NULL dereference during boot: Unable to handle kernel NULL pointer dereference at virtual address 00000000 __create_mapping.isra.5+0x84/0x350 create_pgd_mapping+0x20/0x28 efi_create_mapping+0x5c/0x6c arm_enable_runtime_services+0x154/0x1e4 do_one_initcall+0x8c/0x190 kernel_init_freeable+0x84/0x1ec kernel_init+0x10/0xe0 ret_from_fork+0x10/0x50 This patch fixes the problem by initialising the pgd_cache earlier, in the pgtable_cache_init callback, which sounds suspiciously like what it was intended for. Reported-by: Dennis Chen Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 39b5be9b4233a9f212b98242bddf008f379b5122) Signed-off-by: Jeff Vander Stoep Change-Id: I124f03d19299be93124af35641294bf73c13bb22 --- arch/arm64/include/asm/pgtable.h | 3 ++- arch/arm64/mm/pgd.c | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 51f382b7e5c7..69d2e2f86bce 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -680,7 +680,8 @@ extern int kern_addr_valid(unsigned long addr); #include -#define pgtable_cache_init() do { } while (0) +void pgd_cache_init(void); +#define pgtable_cache_init pgd_cache_init /* * On AArch64, the cache coherency is handled via the set_pte_at() function. diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index cb3ba1b812e7..ae11d4e03d0e 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -static int __init pgd_cache_init(void) +void __init pgd_cache_init(void) { + if (PGD_SIZE == PAGE_SIZE) + return; + /* * Naturally aligned pgds required by the architecture. */ - if (PGD_SIZE != PAGE_SIZE) - pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, - SLAB_PANIC, NULL); - return 0; + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, + SLAB_PANIC, NULL); } -core_initcall(pgd_cache_init); -- cgit v1.2.3 From fd89b55b8391791a0cd49258a8922b4982d19f47 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jan 2016 14:50:21 +0100 Subject: UPSTREAM: arm64: kasan: ensure that the KASAN zero page is mapped read-only When switching from the early KASAN shadow region, which maps the entire shadow space read-write, to the permanent KASAN shadow region, which uses a zero page to shadow regions that are not subject to instrumentation, the lowest level table kasan_zero_pte[] may be reused unmodified, which means that the mappings of the zero page that it contains will still be read-write. So update it explicitly to map the zero page read only when we activate the permanent mapping. Acked-by: Andrey Ryabinin Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 7b1af9795773d745c2a8c7d4ca5f2936e8b6adfb) Signed-off-by: Jeff Vander Stoep Change-Id: I4745dc91236c2612ad3e13be1cc176ffd923f7da --- arch/arm64/mm/kasan_init.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cf038c7d9fa9..cab7a5be40aa 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1) void __init kasan_init(void) { struct memblock_region *reg; + int i; /* * We are going to perform proper setup of shadow memory. @@ -155,6 +156,14 @@ void __init kasan_init(void) pfn_to_nid(virt_to_pfn(start))); } + /* + * KAsan may reuse the contents of kasan_zero_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_zero_pte[i], + pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); + memset(kasan_zero_page, 0, PAGE_SIZE); cpu_set_ttbr1(__pa(swapper_pg_dir)); flush_tlb_all(); -- cgit v1.2.3 From da4712caec8748615aa17eeb9d6c3cd3b0f4f910 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 24 Jan 2016 15:24:12 +0900 Subject: UPSTREAM: arm64: Fix an enum typo in mm/dump.c This patch fixes a typo in mm/dump.c: "MODUELS_END_NR" should be "MODULES_END_NR". Signed-off-by: Masanari Iida Signed-off-by: Will Deacon Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit b3122023df935cf14bf951da98ca598d71b9f826) Signed-off-by: Jeff Vander Stoep Change-Id: I5e73d18fd70f20f200a974f5f2ba22cb7ae64952 --- arch/arm64/mm/dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 5a22a119a74c..0adbebbc2803 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -46,7 +46,7 @@ enum address_markers_idx { PCI_START_NR, PCI_END_NR, MODULES_START_NR, - MODUELS_END_NR, + MODULES_END_NR, KERNEL_SPACE_NR, }; -- cgit v1.2.3 From 0c8db8d0d4c615dd53b703fd632d4481e2bb18d6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:56 +0000 Subject: UPSTREAM: arm64: mm: specialise pagetable allocators We pass a size parameter to early_alloc and late_alloc, but these are only ever used to allocate single pages. In late_alloc we always allocate a single page. Both allocators provide us with zeroed pages (such that all entries are invalid), but we have no barriers between allocating a page and adding that page to existing (live) tables. A concurrent page table walk may see stale data, leading to a number of issues. This patch specialises the two allocators for page tables. The size parameter is removed and the necessary dsb(ishst) is folded into each. To make it clear that the functions are intended for use for page table allocation, they are renamed to {early,late}_pgtable_alloc, with the related function pointed renamed to pgtable_alloc. As the dsb(ishst) is now in the allocator, the existing barrier for the zero page is redundant and thus is removed. The previously missing include of barrier.h is added. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 21ab99c289d350f4ae454bc069870009db6df20e) Signed-off-by: Jeff Vander Stoep Change-Id: Ib88fafc146506943122aa1ca6ee5a6c331ddb26c --- arch/arm64/mm/mmu.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7b6e58a58bf3..ad8c076d0aef 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -62,15 +63,18 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_alloc(unsigned long sz) +static void __init *early_pgtable_alloc(void) { phys_addr_t phys; void *ptr; - phys = memblock_alloc(sz, sz); + phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); BUG_ON(!phys); ptr = __va(phys); - memset(ptr, 0, sz); + memset(ptr, 0, PAGE_SIZE); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); return ptr; } @@ -95,12 +99,12 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + pte = pgtable_alloc(); if (pmd_sect(*pmd)) split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); @@ -130,7 +134,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -139,7 +143,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pmd = pgtable_alloc(); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -174,7 +178,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + prot, pgtable_alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -195,13 +199,13 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pud = pgtable_alloc(); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -234,7 +238,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); + alloc_init_pmd(mm, pud, addr, next, phys, prot, + pgtable_alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -247,7 +252,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { unsigned long addr, length, end, next; @@ -265,18 +270,18 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); + alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void *late_alloc(unsigned long size) +static void *late_pgtable_alloc(void) { - void *ptr; - - BUG_ON(size > PAGE_SIZE); - ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); return ptr; } @@ -289,7 +294,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, - size, prot, early_alloc); + size, prot, early_pgtable_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -297,7 +302,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot) { __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_alloc); + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -310,7 +315,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } return __create_mapping(&init_mm, pgd_offset_k(virt), - phys, virt, size, prot, late_alloc); + phys, virt, size, prot, late_pgtable_alloc); } #ifdef CONFIG_DEBUG_RODATA @@ -457,7 +462,7 @@ void __init paging_init(void) fixup_executable(); /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); + zero_page = early_pgtable_alloc(); bootmem_init(); -- cgit v1.2.3 From 2563a08010f2a945a39c19853297ed980c8d469f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:57 +0000 Subject: UPSTREAM: arm64: mm: place empty_zero_page in bss Currently the zero page is set up in paging_init, and thus we cannot use the zero page earlier. We use the zero page as a reserved TTBR value from which no TLB entries may be allocated (e.g. when uninstalling the idmap). To enable such usage earlier (as may be required for invasive changes to the kernel page tables), and to minimise the time that the idmap is active, we need to be able to use the zero page before paging_init. This patch follows the example set by x86, by allocating the zero page at compile time, in .bss. This means that the zero page itself is available immediately upon entry to start_kernel (as we zero .bss before this), and also means that the zero page takes up no space in the raw Image binary. The associated struct page is allocated in bootmem_init, and remains unavailable until this time. Outside of arch code, the only users of empty_zero_page assume that the empty_zero_page symbol refers to the zeroed memory itself, and that ZERO_PAGE(x) must be used to acquire the associated struct page, following the example of x86. This patch also brings arm64 inline with these assumptions. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 5227cfa71f9e8574373f4d0e9e754942d76cdf67) Signed-off-by: Jeff Vander Stoep Change-Id: I43005dd8533d9968d2cef48bd2821d4507cae5ad --- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/kernel/head.S | 1 + arch/arm64/mm/mmu.c | 9 +-------- 4 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 24165784b803..600eacb9f7d5 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -48,7 +48,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = page_to_phys(empty_zero_page); + unsigned long ttbr = virt_to_phys(empty_zero_page); asm( " msr ttbr0_el1, %0 // set TTBR0\n" diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 69d2e2f86bce..e11dc90fbf56 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -121,8 +121,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 917d98108b3f..53b9f9f128c2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -421,6 +421,7 @@ __mmap_switched: adr_l x2, __bss_stop sub x2, x2, x0 bl __pi_memset + dsb ishst // Make zero page visible to PTW adr_l sp, initial_sp, x4 mov x4, sp diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ad8c076d0aef..acf0442b9759 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -49,7 +49,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -struct page *empty_zero_page; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -456,18 +456,11 @@ void fixup_init(void) */ void __init paging_init(void) { - void *zero_page; - map_mem(); fixup_executable(); - /* allocate the zero page. */ - zero_page = early_pgtable_alloc(); - bootmem_init(); - empty_zero_page = virt_to_page(zero_page); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. -- cgit v1.2.3 From c66ef5f947144ca420c7483226e3ac64ad8fd813 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:58 +0000 Subject: UPSTREAM: arm64: unify idmap removal We currently open-code the removal of the idmap and restoration of the current task's MMU state in a few places. Before introducing yet more copies of this sequence, unify these to call a new helper, cpu_uninstall_idmap. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Lorenzo Pieralisi Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 9e8e865bbe294a69666a1996bda3e87825b258c0) Signed-off-by: Jeff Vander Stoep Change-Id: I6e9cb0253a1d2d63232f8fa0b3f39f8f6987b239 --- arch/arm64/include/asm/mmu_context.h | 25 +++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 1 + arch/arm64/kernel/smp.c | 4 +--- arch/arm64/kernel/suspend.c | 20 ++++---------------- arch/arm64/mm/mmu.c | 4 +--- 5 files changed, 32 insertions(+), 22 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 600eacb9f7d5..b1b2514d8883 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) @@ -89,6 +90,30 @@ static inline void cpu_set_default_tcr_t0sz(void) : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } +/* + * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. + * + * The idmap lives in the same VA range as userspace, but uses global entries + * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from + * speculative TLB fetches, we must temporarily install the reserved page + * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ. + * + * If current is a not a user task, the mm covers the TTBR1_EL1 page tables, + * which should not be installed in TTBR0_EL1. In this case we can leave the + * reserved page tables in place. + */ +static inline void cpu_uninstall_idmap(void) +{ + struct mm_struct *mm = current->active_mm; + + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); +} + /* * It would be nice to return ASIDs back to the allocator, but unfortunately * that introduces a race with a generation rollover where we could erroneously diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7f0e7226795e..de8e187ec8c6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -62,6 +62,7 @@ #include #include #include +#include phys_addr_t __fdt_pointer __initdata; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b1adc51b2c2e..68e7f79630d4 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -149,9 +149,7 @@ asmlinkage void secondary_start_kernel(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); preempt_disable(); trace_hardirqs_off(); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1095aa483a1c..66055392f445 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - struct mm_struct *mm = current->active_mm; int ret; unsigned long flags; @@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend_enter(arg, fn); if (ret == 0) { /* - * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; set the TTBR0 to the reserved - * page tables to prevent speculative TLB allocations, flush - * the local tlb and set the default tcr_el1.t0sz so that - * the TTBR0 address space set-up is properly restored. - * If the current active_mm != &init_mm we entered cpu_suspend - * with mappings in TTBR0 that must be restored, so we switch - * them back to complete the address space configuration - * restoration before returning. + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); - - if (mm != &init_mm) - cpu_switch_mm(mm->pgd, mm); + cpu_uninstall_idmap(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acf0442b9759..b00462172705 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -465,9 +465,7 @@ void __init paging_init(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); } /* -- cgit v1.2.3 From 8ac5abfecf99f0c1c065c83a0ff7481d29173e03 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:44:59 +0000 Subject: UPSTREAM: arm64: unmap idmap earlier During boot we leave the idmap in place until paging_init, as we previously had to wait for the zero page to become allocated and accessible. Now that we have a statically-allocated zero page, we can uninstall the idmap much earlier in the boot process, making it far easier to spot accidental use of physical addresses. This also brings the cold boot path in line with the secondary boot path. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 86ccce896cb0aa800a7a6dcd29b41ffc4eeb1a75) Signed-off-by: Jeff Vander Stoep Change-Id: I6375bd9855e45727790697875b7cd19f84a4dd7f --- arch/arm64/kernel/setup.c | 6 ++++++ arch/arm64/mm/mmu.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index de8e187ec8c6..079db3ba21c4 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -314,6 +314,12 @@ void __init setup_arch(char **cmdline_p) */ local_async_enable(); + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_uninstall_idmap(); + efi_init(); arm64_memblock_init(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b00462172705..c0a4160331f8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -460,12 +460,6 @@ void __init paging_init(void) fixup_executable(); bootmem_init(); - - /* - * TTBR0 is only used for the identity mapping at this stage. Make it - * point to zero page to avoid speculatively fetching new entries. - */ - cpu_uninstall_idmap(); } /* -- cgit v1.2.3 From 214ae8984d113452991df67aa2820a28755219c3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:01 +0000 Subject: UPSTREAM: arm64: mm: add code to safely replace TTBR1_EL1 If page tables are modified without suitable TLB maintenance, the ARM architecture permits multiple TLB entries to be allocated for the same VA. When this occurs, it is permitted that TLB conflict aborts are raised in response to synchronous data/instruction accesses, and/or and amalgamation of the TLB entries may be used as a result of a TLB lookup. The presence of conflicting TLB entries may result in a variety of behaviours detrimental to the system (e.g. erroneous physical addresses may be used by I-cache fetches and/or page table walks). Some of these cases may result in unexpected changes of hardware state, and/or result in the (asynchronous) delivery of SError. To avoid these issues, we must avoid situations where conflicting entries may be allocated into TLBs. For user and module mappings we can follow a strict break-before-make approach, but this cannot work for modifications to the swapper page tables that cover the kernel text and data. Instead, this patch adds code which is intended to be executed from the idmap, which can safely unmap the swapper page tables as it only requires the idmap to be active. This enables us to uninstall the active TTBR1_EL1 entry, invalidate TLBs, then install a new TTBR1_EL1 entry without potentially unmapping code or data required for the sequence. This avoids the risk of conflict, but requires that updates are staged in a copy of the swapper page tables prior to being installed. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 50e1881ddde2a986c7d0d2150985239e5e3d7d96) Signed-off-by: Jeff Vander Stoep Change-Id: Ibd0a7c802a1abe0ba08a99819fd62ac646186005 --- arch/arm64/include/asm/mmu_context.h | 19 +++++++++++++++++++ arch/arm64/mm/proc.S | 28 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 944f2730a940..a00f7cf35bbd 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -126,6 +126,25 @@ static inline void cpu_install_idmap(void) cpu_switch_mm(idmap_pg_dir, &init_mm); } +/* + * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, + * avoiding the possibility of conflicting TLB entries being allocated. + */ +static inline void cpu_replace_ttbr1(pgd_t *pgd) +{ + typedef void (ttbr_replace_func)(phys_addr_t); + extern ttbr_replace_func idmap_cpu_replace_ttbr1; + ttbr_replace_func *replace_phys; + + phys_addr_t pgd_phys = virt_to_phys(pgd); + + replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1); + + cpu_install_idmap(); + replace_phys(pgd_phys); + cpu_uninstall_idmap(); +} + /* * It would be nice to return ASIDs back to the allocator, but unfortunately * that introduces a race with a generation rollover where we could erroneously diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c164d2cb35c0..0c19534a901e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,6 +140,34 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) + .pushsection ".idmap.text", "ax" +/* + * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * + * This is the low-level counterpart to cpu_replace_ttbr1, and should not be + * called by anything else. It can only be executed from a TTBR0 mapping. + */ +ENTRY(idmap_cpu_replace_ttbr1) + mrs x2, daif + msr daifset, #0xf + + adrp x1, empty_zero_page + msr ttbr1_el1, x1 + isb + + tlbi vmalle1 + dsb nsh + isb + + msr ttbr1_el1, x0 + isb + + msr daif, x2 + + ret +ENDPROC(idmap_cpu_replace_ttbr1) + .popsection + /* * __cpu_setup * -- cgit v1.2.3 From 0991212c3218e24c047ce32fa462931a5a53024e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:02 +0000 Subject: UPSTREAM: arm64: kasan: avoid TLB conflicts The page table modification performed during the KASAN init risks the allocation of conflicting TLB entries, as it swaps a set of valid global entries for another without suitable TLB maintenance. The presence of conflicting TLB entries can result in the delivery of synchronous TLB conflict aborts, or may result in the use of erroneous data being returned in response to a TLB lookup. This can affect explicit data accesses from software as well as translations performed asynchronously (e.g. as part of page table walks or speculative I-cache fetches), and can therefore result in a wide variety of problems. To avoid this, use cpu_replace_ttbr1 to swap the page tables. This ensures that when the new tables are installed there are no stale entries from the old tables which may conflict. As all updates are made to the tables while they are not active, the updates themselves are safe. At the same time, add the missing barrier to ensure that the tmp_pg_dir entries updated via memcpy are visible to the page table walkers at the point the tmp_pg_dir is installed. All other page table updates made as part of KASAN initialisation have the requisite barriers due to the use of the standard page table accessors. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Cc: Andrey Ryabinin Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit c1a88e9124a499939ebd8069d5e4d3937f019157) Signed-off-by: Jeff Vander Stoep Change-Id: I0934b236e25e015d6946dcedcca14ba9512418f7 --- arch/arm64/mm/kasan_init.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cab7a5be40aa..263b59020500 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -108,15 +109,6 @@ static void __init clear_pgds(unsigned long start, set_pgd(pgd_offset_k(start), __pgd(0)); } -static void __init cpu_set_ttbr1(unsigned long ttbr1) -{ - asm( - " msr ttbr1_el1, %0\n" - " isb" - : - : "r" (ttbr1)); -} - void __init kasan_init(void) { struct memblock_region *reg; @@ -130,8 +122,8 @@ void __init kasan_init(void) * setup will be finished. */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); - cpu_set_ttbr1(__pa(tmp_pg_dir)); - flush_tlb_all(); + dsb(ishst); + cpu_replace_ttbr1(tmp_pg_dir); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -165,8 +157,7 @@ void __init kasan_init(void) pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); memset(kasan_zero_page, 0, PAGE_SIZE); - cpu_set_ttbr1(__pa(swapper_pg_dir)); - flush_tlb_all(); + cpu_replace_ttbr1(swapper_pg_dir); /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; -- cgit v1.2.3 From 1845f62c4eb95f0dd288ac54bba784e0acb53171 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:05 +0000 Subject: UPSTREAM: arm64: mm: avoid redundant __pa(__va(x)) When we "upgrade" to a section mapping, we free any table we made redundant by giving it back to memblock. To get the PA, we acquire the physical address and convert this to a VA, then subsequently convert this back to a PA. This works currently, but will not work if the tables are not accessed via linear map VAs (e.g. is we use fixmap slots). This patch uses {pmd,pud}_page_paddr to acquire the PA. This avoids the __pa(__va()) round trip, saving some work and avoiding reliance on the linear mapping. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 316b39db06718d59d82736df9fc65cf05b467cc7) Signed-off-by: Jeff Vander Stoep Change-Id: I26ed77790b5b00a15ea09a5a23a176de5b73a002 --- arch/arm64/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c0a4160331f8..fa2471ac191a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -171,7 +171,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, if (!pmd_none(old_pmd)) { flush_tlb_all(); if (pmd_table(old_pmd)) { - phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + phys_addr_t table = pmd_page_paddr(old_pmd); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } @@ -232,7 +232,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, if (!pud_none(old_pud)) { flush_tlb_all(); if (pud_table(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + phys_addr_t table = pud_page_paddr(old_pud); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } -- cgit v1.2.3 From 7739d1799b628e8a52402f25034c960ee2e27a9a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:08 +0000 Subject: UPSTREAM: arm64: mm: use fixmap when creating page tables As a preparatory step to allow us to allocate early page tables from unmapped memory using memblock_alloc, modify the __create_mapping callees to map and unmap the tables they modify using fixmap entries. All but the top-level pgd initialisation is performed via the fixmap. Subsequent patches will inject the pgd physical address, and migrate to using the FIX_PGD slot. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit f4710445458c0a1bd1c3c014ada2e7d7dc7b882f) Signed-off-by: Jeff Vander Stoep Change-Id: I71d2fb02d009915b5aab7f3467c4a3c658e898de --- arch/arm64/mm/mmu.c | 61 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 20 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index fa2471ac191a..d7fa7f52f446 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,19 +63,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_pgtable_alloc(void) +static phys_addr_t __init early_pgtable_alloc(void) { phys_addr_t phys; void *ptr; phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); BUG_ON(!phys); - ptr = __va(phys); + + /* + * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE + * slot will be free, so we can (ab)use the FIX_PTE slot to initialise + * any level of table. + */ + ptr = pte_set_fixmap(phys); + memset(ptr, 0, PAGE_SIZE); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); - return ptr; + /* + * Implicit barriers also ensure the zeroed page is visible to the page + * table walker + */ + pte_clear_fixmap(); + + return phys; } /* @@ -99,24 +110,28 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = pgtable_alloc(); + phys_addr_t pte_phys = pgtable_alloc(); + pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); - __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); flush_tlb_all(); + pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); - pte = pte_offset_kernel(pmd, addr); + pte = pte_set_fixmap_offset(pmd, addr); do { set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); + + pte_clear_fixmap(); } static void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -134,7 +149,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -143,7 +158,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = pgtable_alloc(); + phys_addr_t pmd_phys = pgtable_alloc(); + pmd = pmd_set_fixmap(pmd_phys); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -151,12 +167,13 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, */ split_pud(pud, pmd); } - pud_populate(mm, pud, pmd); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); flush_tlb_all(); + pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); - pmd = pmd_offset(pud, addr); + pmd = pmd_set_fixmap_offset(pud, addr); do { next = pmd_addr_end(addr, end); /* try section mapping first */ @@ -182,6 +199,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } phys += next - addr; } while (pmd++, addr = next, addr != end); + + pmd_clear_fixmap(); } static inline bool use_1G_block(unsigned long addr, unsigned long next, @@ -199,18 +218,18 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = pgtable_alloc(); - pgd_populate(mm, pgd, pud); + phys_addr_t pud_phys = pgtable_alloc(); + __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); } BUG_ON(pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + pud = pud_set_fixmap_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -243,6 +262,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } phys += next - addr; } while (pud++, addr = next, addr != end); + + pud_clear_fixmap(); } /* @@ -252,7 +273,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void)) { unsigned long addr, length, end, next; @@ -275,14 +296,14 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, } while (pgd++, addr = next, addr != end); } -static void *late_pgtable_alloc(void) +static phys_addr_t late_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); - return ptr; + return __pa(ptr); } static void __init create_mapping(phys_addr_t phys, unsigned long virt, -- cgit v1.2.3 From caf0cdb7ae7d5b5a4da584ff37228c3f26a55aa0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:09 +0000 Subject: UPSTREAM: arm64: mm: allocate pagetables anywhere Now that create_mapping uses fixmap slots to modify pte, pmd, and pud entries, we can access page tables anywhere in physical memory, regardless of the extent of the linear mapping. Given that, we no longer need to limit memblock allocations during page table creation, and can leave the limit as its default MEMBLOCK_ALLOC_ANYWHERE. We never add memory which will fall outside of the linear map range given phys_offset and MAX_MEMBLOCK_ADDR are configured appropriately, so any tables we create will fall in the linear map of the final tables. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit cdef5f6e9e0e5ee397759b664a9f875ff59ccf01) Signed-off-by: Jeff Vander Stoep Change-Id: I9b6487491f47351303a147147c3d274da20def59 --- arch/arm64/mm/mmu.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d7fa7f52f446..e41f30b35928 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -383,20 +383,6 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end) static void __init map_mem(void) { struct memblock_region *reg; - phys_addr_t limit; - - /* - * Temporarily limit the memblock range. We need to do this as - * create_mapping requires puds, pmds and ptes to be allocated from - * memory addressable from the initial direct kernel mapping. - * - * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, - * memory starting from PHYS_OFFSET (which must be aligned to 2MB as - * per Documentation/arm64/booting.txt). - */ - limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; - memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -406,29 +392,8 @@ static void __init map_mem(void) if (start >= end) break; - if (ARM64_SWAPPER_USES_SECTION_MAPS) { - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. With - * the section maps, if the first block doesn't end on section - * size boundary, create_mapping() will try to allocate a pte - * page, which may be returned from an unmapped area. - * When section maps are not used, the pte page table for the - * current limit is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, SECTION_SIZE); - if (end < limit) { - limit = end & SECTION_MASK; - memblock_set_current_limit(limit); - } - } __map_memblock(start, end); } - - /* Limit no longer required. */ - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } static void __init fixup_executable(void) -- cgit v1.2.3 From 880bee2ed6ea6051d1c387a8eb2cd81140b2ec64 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:10 +0000 Subject: UPSTREAM: arm64: mm: allow passing a pgdir to alloc_init_* To allow us to initialise pgdirs which are fixmapped, allow explicitly passing a pgdir rather than an mm. A new __create_pgd_mapping function is added for this, with existing __create_mapping callers migrated to this. The mm argument was previously only used at the top level. Now that it is redundant at all levels, it is removed. To indicate its new found similarity to alloc_init_{pud,pmd,pte}, __create_mapping is renamed to init_pgd. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 11509a306bb6ea595878b2d246d2d56b1783e040) Signed-off-by: Jeff Vander Stoep Change-Id: I554f90b986a0fce6c96c0a0a9da1d6e61602d0a9 --- arch/arm64/mm/mmu.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e41f30b35928..f949fe238643 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -146,8 +146,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } -static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, +static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { @@ -215,8 +214,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, return true; } -static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, +static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { @@ -257,7 +255,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, + alloc_init_pmd(pud, addr, next, phys, prot, pgtable_alloc); } phys += next - addr; @@ -270,8 +268,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, - phys_addr_t phys, unsigned long virt, +static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { @@ -291,7 +288,7 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -306,6 +303,14 @@ static phys_addr_t late_pgtable_alloc(void) return __pa(ptr); } +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*alloc)(void)) +{ + init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); +} + static void __init create_mapping(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { @@ -314,16 +319,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, - size, prot, early_pgtable_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + early_pgtable_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_pgtable_alloc); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -335,8 +340,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt), - phys, virt, size, prot, late_pgtable_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + late_pgtable_alloc); } #ifdef CONFIG_DEBUG_RODATA -- cgit v1.2.3 From 63048c3fad6dd5219008c7d842cf1a5e4bac5508 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 25 Jan 2016 11:45:12 +0000 Subject: BACKPORT: arm64: mm: create new fine-grained mappings at boot At boot we may change the granularity of the tables mapping the kernel (by splitting or making sections). This may happen when we create the linear mapping (in __map_memblock), or at any point we try to apply fine-grained permissions to the kernel (e.g. fixup_executable, mark_rodata_ro, fixup_init). Changing the active page tables in this manner may result in multiple entries for the same address being allocated into TLBs, risking problems such as TLB conflict aborts or issues derived from the amalgamation of TLB entries. Generally, a break-before-make (BBM) approach is necessary to avoid conflicts, but we cannot do this for the kernel tables as it risks unmapping text or data being used to do so. Instead, we can create a new set of tables from scratch in the safety of the existing mappings, and subsequently migrate over to these using the new cpu_replace_ttbr1 helper, which avoids the two sets of tables being active simultaneously. To avoid issues when we later modify permissions of the page tables (e.g. in fixup_init), we must create the page tables at a granularity such that later modification does not result in splitting of tables. This patch applies this strategy, creating a new set of fine-grained page tables from scratch, and safely migrating to them. The existing fixmap and kasan shadow page tables are reused in the new fine-grained tables. Signed-off-by: Mark Rutland Reviewed-by: Catalin Marinas Cc: Andrey Ryabinin Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Tested-by: Jeremy Linton Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas Bug: 30369029 Patchset: rework-pagetable (cherry picked from commit 068a17a5805dfbca4bbf03e664ca6b19709cc7a8) Signed-off-by: Jeff Vander Stoep Change-Id: I7ddaa296b94106846ebf8392d91186df8673df64 --- arch/arm64/include/asm/kasan.h | 3 + arch/arm64/mm/kasan_init.c | 15 ++++ arch/arm64/mm/mmu.c | 154 ++++++++++++++++++++++++----------------- 3 files changed, 110 insertions(+), 62 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 2774fa384c47..de0d21211c34 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -7,6 +7,7 @@ #include #include +#include /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. @@ -28,10 +29,12 @@ #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) void kasan_init(void); +void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); #else static inline void kasan_init(void) { } +static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif #endif diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 263b59020500..cc569a38bc76 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -97,6 +97,21 @@ asmlinkage void __init kasan_early_init(void) kasan_map_early_shadow(); } +/* + * Copy the current shadow region into a new pgdir. + */ +void __init kasan_copy_shadow(pgd_t *pgdir) +{ + pgd_t *pgd, *pgd_new, *pgd_end; + + pgd = pgd_offset_k(KASAN_SHADOW_START); + pgd_end = pgd_offset_k(KASAN_SHADOW_END); + pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); + do { + set_pgd(pgd_new, *pgd); + } while (pgd++, pgd_new++, pgd != pgd_end); +} + static void __init clear_pgds(unsigned long start, unsigned long end) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f949fe238643..6f5770a1b140 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -344,48 +345,42 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, late_pgtable_alloc); } -#ifdef CONFIG_DEBUG_RODATA -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { + + unsigned long kernel_start = __pa(_stext); + unsigned long kernel_end = __pa(_end); + /* - * Set up the executable regions using the existing section mappings - * for now. This will get more fine grained later once all memory - * is mapped + * The kernel itself is mapped at page granularity. Map all other + * memory, making sure we don't overwrite the existing kernel mappings. */ - unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); - - if (end < kernel_x_start) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else if (start >= kernel_x_end) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else { - if (start < kernel_x_start) - create_mapping(start, __phys_to_virt(start), - kernel_x_start - start, - PAGE_KERNEL); - create_mapping(kernel_x_start, - __phys_to_virt(kernel_x_start), - kernel_x_end - kernel_x_start, - PAGE_KERNEL_EXEC); - if (kernel_x_end < end) - create_mapping(kernel_x_end, - __phys_to_virt(kernel_x_end), - end - kernel_x_end, - PAGE_KERNEL); + + /* No overlap with the kernel. */ + if (end < kernel_start || start >= kernel_end) { + __create_pgd_mapping(pgd, start, __phys_to_virt(start), + end - start, PAGE_KERNEL, + early_pgtable_alloc); + return; } + + /* + * This block overlaps the kernel mapping. Map the portion(s) which + * don't overlap. + */ + if (start < kernel_start) + __create_pgd_mapping(pgd, start, + __phys_to_virt(start), + kernel_start - start, PAGE_KERNEL, + early_pgtable_alloc); + if (kernel_end < end) + __create_pgd_mapping(pgd, kernel_end, + __phys_to_virt(kernel_end), + end - kernel_end, PAGE_KERNEL, + early_pgtable_alloc); } -#else -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) -{ - create_mapping(start, __phys_to_virt(start), end - start, - PAGE_KERNEL_EXEC); -} -#endif -static void __init map_mem(void) +static void __init map_mem(pgd_t *pgd) { struct memblock_region *reg; @@ -397,33 +392,10 @@ static void __init map_mem(void) if (start >= end) break; - __map_memblock(start, end); + __map_memblock(pgd, start, end); } } -static void __init fixup_executable(void) -{ -#ifdef CONFIG_DEBUG_RODATA - /* now that we are actually fully mapped, make the start/end more fine grained */ - if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_start = round_down(__pa(_stext), - SWAPPER_BLOCK_SIZE); - - create_mapping(aligned_start, __phys_to_virt(aligned_start), - __pa(_stext) - aligned_start, - PAGE_KERNEL); - } - - if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_end = round_up(__pa(__init_end), - SWAPPER_BLOCK_SIZE); - create_mapping(__pa(__init_end), (unsigned long)__init_end, - aligned_end - __pa(__init_end), - PAGE_KERNEL); - } -#endif -} - #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void) { @@ -441,14 +413,72 @@ void fixup_init(void) PAGE_KERNEL); } +static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot) +{ + phys_addr_t pa_start = __pa(va_start); + unsigned long size = va_end - va_start; + + BUG_ON(!PAGE_ALIGNED(pa_start)); + BUG_ON(!PAGE_ALIGNED(size)); + + __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc); +} + +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgd) +{ + + map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC); + map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC); + map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL); + + /* + * The fixmap falls in a separate pgd to the kernel, and doesn't live + * in the carveout for the swapper_pg_dir. We can simply re-use the + * existing dir for the fixmap. + */ + set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START)); + + kasan_copy_shadow(pgd); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. */ void __init paging_init(void) { - map_mem(); - fixup_executable(); + phys_addr_t pgd_phys = early_pgtable_alloc(); + pgd_t *pgd = pgd_set_fixmap(pgd_phys); + + map_kernel(pgd); + map_mem(pgd); + + /* + * We want to reuse the original swapper_pg_dir so we don't have to + * communicate the new address to non-coherent secondaries in + * secondary_entry, and so cpu_switch_mm can generate the address with + * adrp+add rather than a load from some global variable. + * + * To do this we need to go via a temporary pgd. + */ + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); + + pgd_clear_fixmap(); + memblock_free(pgd_phys, PAGE_SIZE); + + /* + * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd + * allocated with it. + */ + memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, + SWAPPER_DIR_SIZE - PAGE_SIZE); bootmem_init(); } -- cgit v1.2.3