summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorSrinivasarao P <spathi@codeaurora.org>2018-01-09 15:59:02 +0530
committerSrinivasarao P <spathi@codeaurora.org>2018-01-18 12:45:07 +0530
commitdd4f1e35fa5f5a452d34e1fcc681472f2011f93d (patch)
tree8f7c973e9920c4df577c5b7093da541ce12da7d1 /arch/x86
parent33260fbfb3632f876669fa815db1edb6d688ace3 (diff)
parent2fea0397a8e708952a22918bd89cce28b3087c11 (diff)
Merge android-4.4.106 (2fea039) into msm-4.4
* refs/heads/tmp-2fea039 Linux 4.4.106 usb: gadget: ffs: Forbid usb_ep_alloc_request from sleeping arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers" Revert "x86/efi: Hoist page table switching code into efi_call_virt()" Revert "x86/efi: Build our own page table structures" net/packet: fix a race in packet_bind() and packet_notifier() packet: fix crash in fanout_demux_rollover() sit: update frag_off info rds: Fix NULL pointer dereference in __rds_rdma_map tipc: fix memory leak in tipc_accept_from_sock() more bio_map_user_iov() leak fixes s390: always save and restore all registers on context switch ipmi: Stop timers before cleaning up the module audit: ensure that 'audit=1' actually enables audit for PID 1 ipvlan: fix ipv6 outbound device afs: Connect up the CB.ProbeUuid IB/mlx5: Assign send CQ and recv CQ of UMR QP IB/mlx4: Increase maximal message size under UD QP xfrm: Copy policy family in clone_policy jump_label: Invoke jump_label_test() via early_initcall() atm: horizon: Fix irq release error sctp: use the right sk after waking up from wait_buf sleep sctp: do not free asoc when it is already dead in sctp_sendmsg sparc64/mm: set fields in deferred pages block: wake up all tasks blocked in get_request() sunrpc: Fix rpc_task_begin trace point NFS: Fix a typo in nfs_rename() dynamic-debug-howto: fix optional/omitted ending line number to be LARGE instead of 0 lib/genalloc.c: make the avail variable an atomic_long_t route: update fnhe_expires for redirect when the fnhe exists route: also update fnhe_genid when updating a route cache mac80211_hwsim: Fix memory leak in hwsim_new_radio_nl() kbuild: pkg: use --transform option to prefix paths in tar EDAC, i5000, i5400: Fix definition of NRECMEMB register EDAC, i5000, i5400: Fix use of MTR_DRAM_WIDTH macro powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested drm/amd/amdgpu: fix console deadlock if late init failed axonram: Fix gendisk handling netfilter: don't track fragmented packets zram: set physical queue limits to avoid array out of bounds accesses i2c: riic: fix restart condition crypto: s5p-sss - Fix completing crypto request in IRQ handler ipv6: reorder icmpv6_init() and ip6_mr_init() bnx2x: do not rollback VF MAC/VLAN filters we did not configure bnx2x: fix possible overrun of VFPF multicast addresses array bnx2x: prevent crash when accessing PTP with interface down spi_ks8995: fix "BUG: key accdaa28 not in .data!" arm64: KVM: Survive unknown traps from guests arm: KVM: Survive unknown traps from guests KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset irqchip/crossbar: Fix incorrect type of register size scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq libata: drop WARN from protocol error in ata_sff_qc_issue() kvm: nVMX: VMCLEAR should not cause the vCPU to shut down USB: gadgetfs: Fix a potential memory leak in 'dev_config()' usb: gadget: configs: plug memory leak HID: chicony: Add support for another ASUS Zen AiO keyboard gpio: altera: Use handle_level_irq when configured as a level_high ARM: OMAP2+: Release device node after it is no longer needed. ARM: OMAP2+: Fix device node reference counts module: set __jump_table alignment to 8 selftest/powerpc: Fix false failures for skipped tests x86/hpet: Prevent might sleep splat on resume ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure vti6: Don't report path MTU below IPV6_MIN_MTU. Revert "s390/kbuild: enable modversions for symbols exported from asm" Revert "spi: SPI_FSL_DSPI should depend on HAS_DMA" Revert "drm/armada: Fix compile fail" mm: drop unused pmdp_huge_get_and_clear_notify() thp: fix MADV_DONTNEED vs. numa balancing race thp: reduce indentation level in change_huge_pmd() scsi: storvsc: Workaround for virtual DVD SCSI version ARM: avoid faulting on qemu ARM: BUG if jumping to usermode address in kernel mode arm64: fpsimd: Prevent registers leaking from dead tasks KVM: VMX: remove I/O port 0x80 bypass on Intel hosts arm64: KVM: fix VTTBR_BADDR_MASK BUG_ON off-by-one media: dvb: i2c transfers over usb cannot be done from stack drm/exynos: gem: Drop NONCONTIG flag for buffers allocated without IOMMU drm: extra printk() wrapper macros kdb: Fix handling of kallsyms_symbol_next() return value s390: fix compat system call table iommu/vt-d: Fix scatterlist offset handling ALSA: usb-audio: Add check return value for usb_string() ALSA: usb-audio: Fix out-of-bound error ALSA: seq: Remove spurious WARN_ON() at timer check ALSA: pcm: prevent UAF in snd_pcm_info x86/PCI: Make broadcom_postcore_init() check acpi_disabled X.509: reject invalid BIT STRING for subjectPublicKey ASN.1: check for error from ASN1_OP_END__ACT actions ASN.1: fix out-of-bounds read when parsing indefinite length item efi: Move some sysfs files to be read-only by root scsi: libsas: align sata_device's rps_resp on a cacheline isa: Prevent NULL dereference in isa_bus driver callbacks hv: kvp: Avoid reading past allocated blocks from KVP file virtio: release virtio index when fail to device_register can: usb_8dev: cancel urb on -EPIPE and -EPROTO can: esd_usb2: cancel urb on -EPIPE and -EPROTO can: ems_usb: cancel urb on -EPIPE and -EPROTO can: kvaser_usb: cancel urb on -EPIPE and -EPROTO can: kvaser_usb: ratelimit errors if incomplete messages are received can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback() can: kvaser_usb: free buf in error paths can: ti_hecc: Fix napi poll return value for repoll BACKPORT: irq: Make the irqentry text section unconditional UPSTREAM: arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections UPSTREAM: x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text UPSTREAM: kasan: make get_wild_bug_type() static UPSTREAM: kasan: separate report parts by empty lines UPSTREAM: kasan: improve double-free report format UPSTREAM: kasan: print page description after stacks UPSTREAM: kasan: improve slab object description UPSTREAM: kasan: change report header UPSTREAM: kasan: simplify address description logic UPSTREAM: kasan: change allocation and freeing stack traces headers UPSTREAM: kasan: unify report headers UPSTREAM: kasan: introduce helper functions for determining bug type BACKPORT: kasan: report only the first error by default UPSTREAM: kasan: fix races in quarantine_remove_cache() UPSTREAM: kasan: resched in quarantine_remove_cache() BACKPORT: kasan, sched/headers: Uninline kasan_enable/disable_current() BACKPORT: kasan: drain quarantine of memcg slab objects UPSTREAM: kasan: eliminate long stalls during quarantine reduction UPSTREAM: kasan: support panic_on_warn UPSTREAM: x86/suspend: fix false positive KASAN warning on suspend/resume UPSTREAM: kasan: support use-after-scope detection UPSTREAM: kasan/tests: add tests for user memory access functions UPSTREAM: mm, kasan: add a ksize() test UPSTREAM: kasan: test fix: warn if the UAF could not be detected in kmalloc_uaf2 UPSTREAM: kasan: modify kmalloc_large_oob_right(), add kmalloc_pagealloc_oob_right() UPSTREAM: lib/stackdepot: export save/fetch stack for drivers UPSTREAM: lib/stackdepot.c: bump stackdepot capacity from 16MB to 128MB BACKPORT: kprobes: Unpoison stack in jprobe_return() for KASAN UPSTREAM: kasan: remove the unnecessary WARN_ONCE from quarantine.c UPSTREAM: kasan: avoid overflowing quarantine size on low memory systems UPSTREAM: kasan: improve double-free reports BACKPORT: mm: coalesce split strings BACKPORT: mm/kasan: get rid of ->state in struct kasan_alloc_meta UPSTREAM: mm/kasan: get rid of ->alloc_size in struct kasan_alloc_meta UPSTREAM: mm: kasan: remove unused 'reserved' field from struct kasan_alloc_meta UPSTREAM: mm/kasan, slub: don't disable interrupts when object leaves quarantine UPSTREAM: mm/kasan: don't reduce quarantine in atomic contexts UPSTREAM: mm/kasan: fix corruptions and false positive reports UPSTREAM: lib/stackdepot.c: use __GFP_NOWARN for stack allocations BACKPORT: mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB UPSTREAM: kasan/quarantine: fix bugs on qlist_move_cache() UPSTREAM: mm: mempool: kasan: don't poot mempool objects in quarantine UPSTREAM: kasan: change memory hot-add error messages to info messages BACKPORT: mm/kasan: add API to check memory regions UPSTREAM: mm/kasan: print name of mem[set,cpy,move]() caller in report UPSTREAM: mm: kasan: initial memory quarantine implementation UPSTREAM: lib/stackdepot: avoid to return 0 handle UPSTREAM: lib/stackdepot.c: allow the stack trace hash to be zero UPSTREAM: mm, kasan: fix compilation for CONFIG_SLAB BACKPORT: mm, kasan: stackdepot implementation. Enable stackdepot for SLAB BACKPORT: mm, kasan: add GFP flags to KASAN API UPSTREAM: mm, kasan: SLAB support UPSTREAM: mm/slab: align cache size first before determination of OFF_SLAB candidate UPSTREAM: mm/slab: use more appropriate condition check for debug_pagealloc UPSTREAM: mm/slab: factor out debugging initialization in cache_init_objs() UPSTREAM: mm/slab: remove object status buffer for DEBUG_SLAB_LEAK UPSTREAM: mm/slab: alternative implementation for DEBUG_SLAB_LEAK UPSTREAM: mm/slab: clean up DEBUG_PAGEALLOC processing code UPSTREAM: mm/slab: activate debug_pagealloc in SLAB when it is actually enabled sched: EAS/WALT: Don't take into account of running task's util BACKPORT: schedutil: Reset cached freq if it is not in sync with next_freq UPSTREAM: kasan: add functions to clear stack poison Conflicts: arch/arm/include/asm/kvm_arm.h arch/arm64/kernel/vmlinux.lds.S include/linux/kasan.h kernel/softirq.c lib/Kconfig lib/Kconfig.kasan lib/Makefile lib/stackdepot.c mm/kasan/kasan.c sound/usb/mixer.c Change-Id: If70ced6da5f19be3dd92d10a8d8cd4d5841e5870 Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/entry/entry_64.S6
-rw-r--r--arch/x86/include/asm/efi.h26
-rw-r--r--arch/x86/kernel/Makefile18
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S9
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c5
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kvm/vmx.c31
-rw-r--r--arch/x86/mm/pageattr.c17
-rw-r--r--arch/x86/pci/broadcom_bus.c2
-rw-r--r--arch/x86/platform/efi/efi.c39
-rw-r--r--arch/x86/platform/efi/efi_32.c5
-rw-r--r--arch/x86/platform/efi/efi_64.c137
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S43
14 files changed, 157 insertions, 184 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cc0f2f5da19b..644b8b4e89a9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -672,9 +672,15 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
.endm
#endif
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+#define POP_SECTION_IRQENTRY .popsection
+
.macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
.endm
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c9e6eab2075b..08b1f2f6ea50 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,7 +3,6 @@
#include <asm/fpu/api.h>
#include <asm/pgtable.h>
-#include <asm/tlb.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -67,17 +66,6 @@ extern u64 asmlinkage efi_call(void *fp, ...);
#define efi_call_phys(f, args...) efi_call((f), args)
-/*
- * Scratch space used for switching the pagetable in the EFI stub
- */
-struct efi_scratch {
- u64 r15;
- u64 prev_cr3;
- pgd_t *efi_pgt;
- bool use_pgd;
- u64 phys_stack;
-} __packed;
-
#define efi_call_virt(f, ...) \
({ \
efi_status_t __s; \
@@ -85,20 +73,7 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
- \
- if (efi_scratch.use_pgd) { \
- efi_scratch.prev_cr3 = read_cr3(); \
- write_cr3((unsigned long)efi_scratch.efi_pgt); \
- __flush_tlb_all(); \
- } \
- \
__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
- \
- if (efi_scratch.use_pgd) { \
- write_cr3(efi_scratch.prev_cr3); \
- __flush_tlb_all(); \
- } \
- \
__kernel_fpu_end(); \
preempt_enable(); \
__s; \
@@ -138,7 +113,6 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
-extern int __init efi_alloc_page_tables(void);
extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
extern void __init old_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ffe01d0..616ebd22ef9a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-KASAN_SANITIZE_head$(BITS).o := n
-KASAN_SANITIZE_dumpstack.o := n
-KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_head$(BITS).o := n
+KASAN_SANITIZE_dumpstack.o := n
+KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_stacktrace.o := n
+
+OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_test_nx.o := y
+
+# If instrumentation of this dir is enabled, boot hangs during first second.
+# Probably could be more selective here, but note that files related to irqs,
+# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
+# non-deterministic coverage.
+KCOV_INSTRUMENT := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8c35df468104..48e3979d174a 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -107,6 +107,15 @@ ENTRY(do_suspend_lowlevel)
movq pt_regs_r14(%rax), %r14
movq pt_regs_r15(%rax), %r15
+#ifdef CONFIG_KASAN
+ /*
+ * The suspend path may have poisoned some areas deeper in the stack,
+ * which we now need to unpoison.
+ */
+ movq %rsp, %rdi
+ call kasan_unpoison_task_stack_below
+#endif
+
xorl %eax, %eax
addq $8, %rsp
jmp restore_processor_state
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acc9b8f19ca8..f48eb8eeefe2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -353,7 +353,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
- disable_irq(hdev->irq);
+ disable_hardirq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 99d293ea2b49..3eb804335458 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -49,7 +49,7 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
-
+#include <linux/kasan.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
@@ -1078,6 +1078,9 @@ void jprobe_return(void)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ /* Unpoison stack redzones in the frames we are going to jump over. */
+ kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
+
asm volatile (
#ifdef CONFIG_X86_64
" xchg %%rbx,%%rsp \n"
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index fe133b710bef..1d0e36f909eb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ SECTIONS
KPROBES_TEXT
ENTRY_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
/* End of text section */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 253a8c8207bb..dcbafe53e2d4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6182,12 +6182,7 @@ static __init int hardware_setup(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
@@ -6929,9 +6924,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 zero = 0;
gpa_t vmptr;
- struct vmcs12 *vmcs12;
- struct page *page;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -6942,22 +6936,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- /*
- * For accurate processor emulation, VMCLEAR beyond available
- * physical memory should do nothing at all. However, it is
- * possible that a nested vmx bug, not a guest hypervisor bug,
- * resulted in this case, so let's shut down before doing any
- * more damage:
- */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- return 1;
- }
- vmcs12 = kmap(page);
- vmcs12->launch_state = 0;
- kunmap(page);
- nested_release_page(page);
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12, launch_state),
+ &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
@@ -10574,8 +10555,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
static void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
+ }
free_nested(to_vmx(vcpu));
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1924bba0f3af..4540e8880cd9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -911,10 +911,15 @@ static void populate_pte(struct cpa_data *cpa,
pte = pte_offset_kernel(pmd, start);
while (num_pages-- && start < end) {
- set_pte(pte, pfn_pte(cpa->pfn, pgprot));
+
+ /* deal with the NX bit */
+ if (!(pgprot_val(pgprot) & _PAGE_NX))
+ cpa->pfn &= ~_PAGE_NX;
+
+ set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
start += PAGE_SIZE;
- cpa->pfn++;
+ cpa->pfn += PAGE_SIZE;
pte++;
}
}
@@ -970,11 +975,11 @@ static int populate_pmd(struct cpa_data *cpa,
pmd = pmd_offset(pud, start);
- set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
+ set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE |
massage_pgprot(pmd_pgprot)));
start += PMD_SIZE;
- cpa->pfn += PMD_SIZE >> PAGE_SHIFT;
+ cpa->pfn += PMD_SIZE;
cur_pages += PMD_SIZE >> PAGE_SHIFT;
}
@@ -1043,11 +1048,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
* Map everything starting from the Gb boundary, possibly with 1G pages
*/
while (end - start >= PUD_SIZE) {
- set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
+ set_pud(pud, __pud(cpa->pfn | _PAGE_PSE |
massage_pgprot(pud_pgprot)));
start += PUD_SIZE;
- cpa->pfn += PUD_SIZE >> PAGE_SHIFT;
+ cpa->pfn += PUD_SIZE;
cur_pages += PUD_SIZE >> PAGE_SHIFT;
pud++;
}
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
* We should get host bridge information from ACPI unless the BIOS
* doesn't support it.
*/
- if (acpi_os_get_root_pointer())
+ if (!acpi_disabled && acpi_os_get_root_pointer())
return 0;
#endif
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 3c1f3cd7b2ba..ad285404ea7f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void)
* This function will switch the EFI runtime services to virtual mode.
* Essentially, we look through the EFI memmap and map every region that
* has the runtime attribute bit set in its memory descriptor into the
- * efi_pgd page table.
+ * ->trampoline_pgd page table using a top-down VA allocation scheme.
*
* The old method which used to update that memory descriptor with the
* virtual address obtained from ioremap() is still supported when the
@@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void)
*
* The new method does a pagetable switch in a preemption-safe manner
* so that we're in a different address space when calling a runtime
- * function. For function arguments passing we do copy the PUDs of the
- * kernel page table into efi_pgd prior to each call.
+ * function. For function arguments passing we do copy the PGDs of the
+ * kernel page table into ->trampoline_pgd prior to each call.
*
* Specially for kexec boot, efi runtime maps in previous kernel should
* be passed in via setup_data. In that case runtime ranges will be mapped
@@ -895,12 +895,6 @@ static void __init __efi_enter_virtual_mode(void)
efi.systab = NULL;
- if (efi_alloc_page_tables()) {
- pr_err("Failed to allocate EFI page tables\n");
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return;
- }
-
efi_merge_regions();
new_memmap = efi_map_regions(&count, &pg_shift);
if (!new_memmap) {
@@ -960,11 +954,28 @@ static void __init __efi_enter_virtual_mode(void)
efi_runtime_mkexec();
/*
- * We mapped the descriptor array into the EFI pagetable above
- * but we're not unmapping it here because if we're running in
- * EFI mixed mode we need all of memory to be accessible when
- * we pass parameters to the EFI runtime services in the
- * thunking code.
+ * We mapped the descriptor array into the EFI pagetable above but we're
+ * not unmapping it here. Here's why:
+ *
+ * We're copying select PGDs from the kernel page table to the EFI page
+ * table and when we do so and make changes to those PGDs like unmapping
+ * stuff from them, those changes appear in the kernel page table and we
+ * go boom.
+ *
+ * From setup_real_mode():
+ *
+ * ...
+ * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+ *
+ * In this particular case, our allocation is in PGD 0 of the EFI page
+ * table but we've copied that PGD from PGD[272] of the EFI page table:
+ *
+ * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
+ *
+ * where the direct memory mapping in kernel space is.
+ *
+ * new_memmap's VA comes from that direct mapping and thus clearing it,
+ * it would get cleared in the kernel page table too.
*
* efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
*/
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 58d669bc8250..ed5b67338294 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -38,11 +38,6 @@
* say 0 - 3G.
*/
-int __init efi_alloc_page_tables(void)
-{
- return 0;
-}
-
void efi_sync_low_kernel_mappings(void) {}
void __init efi_dump_pagetable(void) {}
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 18dfaad71c99..a0ac0f9c307f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -40,7 +40,6 @@
#include <asm/fixmap.h>
#include <asm/realmode.h>
#include <asm/time.h>
-#include <asm/pgalloc.h>
/*
* We allocate runtime services regions bottom-up, starting from -4G, i.e.
@@ -48,7 +47,16 @@
*/
static u64 efi_va = EFI_VA_START;
-struct efi_scratch efi_scratch;
+/*
+ * Scratch space used for switching the pagetable in the EFI stub
+ */
+struct efi_scratch {
+ u64 r15;
+ u64 prev_cr3;
+ pgd_t *efi_pgt;
+ bool use_pgd;
+ u64 phys_stack;
+} __packed;
static void __init early_code_mapping_set_exec(int executable)
{
@@ -75,11 +83,8 @@ pgd_t * __init efi_call_phys_prolog(void)
int pgd;
int n_pgds;
- if (!efi_enabled(EFI_OLD_MEMMAP)) {
- save_pgd = (pgd_t *)read_cr3();
- write_cr3((unsigned long)efi_scratch.efi_pgt);
- goto out;
- }
+ if (!efi_enabled(EFI_OLD_MEMMAP))
+ return NULL;
early_code_mapping_set_exec(1);
@@ -91,7 +96,6 @@ pgd_t * __init efi_call_phys_prolog(void)
vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
}
-out:
__flush_tlb_all();
return save_pgd;
@@ -105,11 +109,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
int pgd_idx;
int nr_pgds;
- if (!efi_enabled(EFI_OLD_MEMMAP)) {
- write_cr3((unsigned long)save_pgd);
- __flush_tlb_all();
+ if (!save_pgd)
return;
- }
nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
@@ -122,97 +123,27 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
early_code_mapping_set_exec(0);
}
-static pgd_t *efi_pgd;
-
-/*
- * We need our own copy of the higher levels of the page tables
- * because we want to avoid inserting EFI region mappings (EFI_VA_END
- * to EFI_VA_START) into the standard kernel page tables. Everything
- * else can be shared, see efi_sync_low_kernel_mappings().
- */
-int __init efi_alloc_page_tables(void)
-{
- pgd_t *pgd;
- pud_t *pud;
- gfp_t gfp_mask;
-
- if (efi_enabled(EFI_OLD_MEMMAP))
- return 0;
-
- gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
- if (!efi_pgd)
- return -ENOMEM;
-
- pgd = efi_pgd + pgd_index(EFI_VA_END);
-
- pud = pud_alloc_one(NULL, 0);
- if (!pud) {
- free_page((unsigned long)efi_pgd);
- return -ENOMEM;
- }
-
- pgd_populate(NULL, pgd, pud);
-
- return 0;
-}
-
/*
* Add low kernel mappings for passing arguments to EFI functions.
*/
void efi_sync_low_kernel_mappings(void)
{
- unsigned num_entries;
- pgd_t *pgd_k, *pgd_efi;
- pud_t *pud_k, *pud_efi;
+ unsigned num_pgds;
+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
if (efi_enabled(EFI_OLD_MEMMAP))
return;
- /*
- * We can share all PGD entries apart from the one entry that
- * covers the EFI runtime mapping space.
- *
- * Make sure the EFI runtime region mappings are guaranteed to
- * only span a single PGD entry and that the entry also maps
- * other important kernel regions.
- */
- BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
- BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
- (EFI_VA_END & PGDIR_MASK));
-
- pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
- pgd_k = pgd_offset_k(PAGE_OFFSET);
+ num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
- num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
- memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
-
- /*
- * We share all the PUD entries apart from those that map the
- * EFI regions. Copy around them.
- */
- BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
- BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
-
- pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
- pud_efi = pud_offset(pgd_efi, 0);
-
- pgd_k = pgd_offset_k(EFI_VA_END);
- pud_k = pud_offset(pgd_k, 0);
-
- num_entries = pud_index(EFI_VA_END);
- memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
-
- pud_efi = pud_offset(pgd_efi, EFI_VA_START);
- pud_k = pud_offset(pgd_k, EFI_VA_START);
-
- num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
- memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
+ memcpy(pgd + pgd_index(PAGE_OFFSET),
+ init_mm.pgd + pgd_index(PAGE_OFFSET),
+ sizeof(pgd_t) * num_pgds);
}
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
- unsigned long pfn, text;
+ unsigned long text;
struct page *page;
unsigned npages;
pgd_t *pgd;
@@ -220,8 +151,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
- efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
- pgd = efi_pgd;
+ efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
+ pgd = __va(efi_scratch.efi_pgt);
/*
* It can happen that the physical address of new_memmap lands in memory
@@ -229,8 +160,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
* and ident-map those pages containing the map before calling
* phys_efi_set_virtual_address_map().
*/
- pfn = pa_memmap >> PAGE_SHIFT;
- if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) {
+ if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {
pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
return 1;
}
@@ -255,9 +185,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
npages = (_end - _text) >> PAGE_SHIFT;
text = __pa(_text);
- pfn = text >> PAGE_SHIFT;
- if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) {
+ if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) {
pr_err("Failed to map kernel text 1:1\n");
return 1;
}
@@ -267,20 +196,20 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
- kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+
+ kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
}
static void __init __map_region(efi_memory_desc_t *md, u64 va)
{
- unsigned long flags = 0;
- unsigned long pfn;
- pgd_t *pgd = efi_pgd;
+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+ unsigned long pf = 0;
if (!(md->attribute & EFI_MEMORY_WB))
- flags |= _PAGE_PCD;
+ pf |= _PAGE_PCD;
- pfn = md->phys_addr >> PAGE_SHIFT;
- if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags))
+ if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
md->phys_addr, va);
}
@@ -383,7 +312,9 @@ void __init efi_runtime_mkexec(void)
void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
- ptdump_walk_pgd_level(NULL, efi_pgd);
+ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+
+ ptdump_walk_pgd_level(NULL, pgd);
#endif
}
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 32020cb8bb08..86d0f9e08dd9 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -38,6 +38,41 @@
mov %rsi, %cr0; \
mov (%rsp), %rsp
+ /* stolen from gcc */
+ .macro FLUSH_TLB_ALL
+ movq %r15, efi_scratch(%rip)
+ movq %r14, efi_scratch+8(%rip)
+ movq %cr4, %r15
+ movq %r15, %r14
+ andb $0x7f, %r14b
+ movq %r14, %cr4
+ movq %r15, %cr4
+ movq efi_scratch+8(%rip), %r14
+ movq efi_scratch(%rip), %r15
+ .endm
+
+ .macro SWITCH_PGT
+ cmpb $0, efi_scratch+24(%rip)
+ je 1f
+ movq %r15, efi_scratch(%rip) # r15
+ # save previous CR3
+ movq %cr3, %r15
+ movq %r15, efi_scratch+8(%rip) # prev_cr3
+ movq efi_scratch+16(%rip), %r15 # EFI pgt
+ movq %r15, %cr3
+ 1:
+ .endm
+
+ .macro RESTORE_PGT
+ cmpb $0, efi_scratch+24(%rip)
+ je 2f
+ movq efi_scratch+8(%rip), %r15
+ movq %r15, %cr3
+ movq efi_scratch(%rip), %r15
+ FLUSH_TLB_ALL
+ 2:
+ .endm
+
ENTRY(efi_call)
SAVE_XMM
mov (%rsp), %rax
@@ -48,8 +83,16 @@ ENTRY(efi_call)
mov %r8, %r9
mov %rcx, %r8
mov %rsi, %rcx
+ SWITCH_PGT
call *%rdi
+ RESTORE_PGT
addq $48, %rsp
RESTORE_XMM
ret
ENDPROC(efi_call)
+
+ .data
+ENTRY(efi_scratch)
+ .fill 3,8,0
+ .byte 0
+ .quad 0