diff options
author | Srinivasarao P <spathi@codeaurora.org> | 2018-01-09 15:59:02 +0530 |
---|---|---|
committer | Srinivasarao P <spathi@codeaurora.org> | 2018-01-18 12:45:07 +0530 |
commit | dd4f1e35fa5f5a452d34e1fcc681472f2011f93d (patch) | |
tree | 8f7c973e9920c4df577c5b7093da541ce12da7d1 /arch/x86 | |
parent | 33260fbfb3632f876669fa815db1edb6d688ace3 (diff) | |
parent | 2fea0397a8e708952a22918bd89cce28b3087c11 (diff) |
Merge android-4.4.106 (2fea039) into msm-4.4
* refs/heads/tmp-2fea039
Linux 4.4.106
usb: gadget: ffs: Forbid usb_ep_alloc_request from sleeping
arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one
Revert "x86/mm/pat: Ensure cpa->pfn only contains page frame numbers"
Revert "x86/efi: Hoist page table switching code into efi_call_virt()"
Revert "x86/efi: Build our own page table structures"
net/packet: fix a race in packet_bind() and packet_notifier()
packet: fix crash in fanout_demux_rollover()
sit: update frag_off info
rds: Fix NULL pointer dereference in __rds_rdma_map
tipc: fix memory leak in tipc_accept_from_sock()
more bio_map_user_iov() leak fixes
s390: always save and restore all registers on context switch
ipmi: Stop timers before cleaning up the module
audit: ensure that 'audit=1' actually enables audit for PID 1
ipvlan: fix ipv6 outbound device
afs: Connect up the CB.ProbeUuid
IB/mlx5: Assign send CQ and recv CQ of UMR QP
IB/mlx4: Increase maximal message size under UD QP
xfrm: Copy policy family in clone_policy
jump_label: Invoke jump_label_test() via early_initcall()
atm: horizon: Fix irq release error
sctp: use the right sk after waking up from wait_buf sleep
sctp: do not free asoc when it is already dead in sctp_sendmsg
sparc64/mm: set fields in deferred pages
block: wake up all tasks blocked in get_request()
sunrpc: Fix rpc_task_begin trace point
NFS: Fix a typo in nfs_rename()
dynamic-debug-howto: fix optional/omitted ending line number to be LARGE instead of 0
lib/genalloc.c: make the avail variable an atomic_long_t
route: update fnhe_expires for redirect when the fnhe exists
route: also update fnhe_genid when updating a route cache
mac80211_hwsim: Fix memory leak in hwsim_new_radio_nl()
kbuild: pkg: use --transform option to prefix paths in tar
EDAC, i5000, i5400: Fix definition of NRECMEMB register
EDAC, i5000, i5400: Fix use of MTR_DRAM_WIDTH macro
powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested
drm/amd/amdgpu: fix console deadlock if late init failed
axonram: Fix gendisk handling
netfilter: don't track fragmented packets
zram: set physical queue limits to avoid array out of bounds accesses
i2c: riic: fix restart condition
crypto: s5p-sss - Fix completing crypto request in IRQ handler
ipv6: reorder icmpv6_init() and ip6_mr_init()
bnx2x: do not rollback VF MAC/VLAN filters we did not configure
bnx2x: fix possible overrun of VFPF multicast addresses array
bnx2x: prevent crash when accessing PTP with interface down
spi_ks8995: fix "BUG: key accdaa28 not in .data!"
arm64: KVM: Survive unknown traps from guests
arm: KVM: Survive unknown traps from guests
KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset
irqchip/crossbar: Fix incorrect type of register size
scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters
workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq
libata: drop WARN from protocol error in ata_sff_qc_issue()
kvm: nVMX: VMCLEAR should not cause the vCPU to shut down
USB: gadgetfs: Fix a potential memory leak in 'dev_config()'
usb: gadget: configs: plug memory leak
HID: chicony: Add support for another ASUS Zen AiO keyboard
gpio: altera: Use handle_level_irq when configured as a level_high
ARM: OMAP2+: Release device node after it is no longer needed.
ARM: OMAP2+: Fix device node reference counts
module: set __jump_table alignment to 8
selftest/powerpc: Fix false failures for skipped tests
x86/hpet: Prevent might sleep splat on resume
ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure
vti6: Don't report path MTU below IPV6_MIN_MTU.
Revert "s390/kbuild: enable modversions for symbols exported from asm"
Revert "spi: SPI_FSL_DSPI should depend on HAS_DMA"
Revert "drm/armada: Fix compile fail"
mm: drop unused pmdp_huge_get_and_clear_notify()
thp: fix MADV_DONTNEED vs. numa balancing race
thp: reduce indentation level in change_huge_pmd()
scsi: storvsc: Workaround for virtual DVD SCSI version
ARM: avoid faulting on qemu
ARM: BUG if jumping to usermode address in kernel mode
arm64: fpsimd: Prevent registers leaking from dead tasks
KVM: VMX: remove I/O port 0x80 bypass on Intel hosts
arm64: KVM: fix VTTBR_BADDR_MASK BUG_ON off-by-one
media: dvb: i2c transfers over usb cannot be done from stack
drm/exynos: gem: Drop NONCONTIG flag for buffers allocated without IOMMU
drm: extra printk() wrapper macros
kdb: Fix handling of kallsyms_symbol_next() return value
s390: fix compat system call table
iommu/vt-d: Fix scatterlist offset handling
ALSA: usb-audio: Add check return value for usb_string()
ALSA: usb-audio: Fix out-of-bound error
ALSA: seq: Remove spurious WARN_ON() at timer check
ALSA: pcm: prevent UAF in snd_pcm_info
x86/PCI: Make broadcom_postcore_init() check acpi_disabled
X.509: reject invalid BIT STRING for subjectPublicKey
ASN.1: check for error from ASN1_OP_END__ACT actions
ASN.1: fix out-of-bounds read when parsing indefinite length item
efi: Move some sysfs files to be read-only by root
scsi: libsas: align sata_device's rps_resp on a cacheline
isa: Prevent NULL dereference in isa_bus driver callbacks
hv: kvp: Avoid reading past allocated blocks from KVP file
virtio: release virtio index when fail to device_register
can: usb_8dev: cancel urb on -EPIPE and -EPROTO
can: esd_usb2: cancel urb on -EPIPE and -EPROTO
can: ems_usb: cancel urb on -EPIPE and -EPROTO
can: kvaser_usb: cancel urb on -EPIPE and -EPROTO
can: kvaser_usb: ratelimit errors if incomplete messages are received
can: kvaser_usb: Fix comparison bug in kvaser_usb_read_bulk_callback()
can: kvaser_usb: free buf in error paths
can: ti_hecc: Fix napi poll return value for repoll
BACKPORT: irq: Make the irqentry text section unconditional
UPSTREAM: arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections
UPSTREAM: x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text
UPSTREAM: kasan: make get_wild_bug_type() static
UPSTREAM: kasan: separate report parts by empty lines
UPSTREAM: kasan: improve double-free report format
UPSTREAM: kasan: print page description after stacks
UPSTREAM: kasan: improve slab object description
UPSTREAM: kasan: change report header
UPSTREAM: kasan: simplify address description logic
UPSTREAM: kasan: change allocation and freeing stack traces headers
UPSTREAM: kasan: unify report headers
UPSTREAM: kasan: introduce helper functions for determining bug type
BACKPORT: kasan: report only the first error by default
UPSTREAM: kasan: fix races in quarantine_remove_cache()
UPSTREAM: kasan: resched in quarantine_remove_cache()
BACKPORT: kasan, sched/headers: Uninline kasan_enable/disable_current()
BACKPORT: kasan: drain quarantine of memcg slab objects
UPSTREAM: kasan: eliminate long stalls during quarantine reduction
UPSTREAM: kasan: support panic_on_warn
UPSTREAM: x86/suspend: fix false positive KASAN warning on suspend/resume
UPSTREAM: kasan: support use-after-scope detection
UPSTREAM: kasan/tests: add tests for user memory access functions
UPSTREAM: mm, kasan: add a ksize() test
UPSTREAM: kasan: test fix: warn if the UAF could not be detected in kmalloc_uaf2
UPSTREAM: kasan: modify kmalloc_large_oob_right(), add kmalloc_pagealloc_oob_right()
UPSTREAM: lib/stackdepot: export save/fetch stack for drivers
UPSTREAM: lib/stackdepot.c: bump stackdepot capacity from 16MB to 128MB
BACKPORT: kprobes: Unpoison stack in jprobe_return() for KASAN
UPSTREAM: kasan: remove the unnecessary WARN_ONCE from quarantine.c
UPSTREAM: kasan: avoid overflowing quarantine size on low memory systems
UPSTREAM: kasan: improve double-free reports
BACKPORT: mm: coalesce split strings
BACKPORT: mm/kasan: get rid of ->state in struct kasan_alloc_meta
UPSTREAM: mm/kasan: get rid of ->alloc_size in struct kasan_alloc_meta
UPSTREAM: mm: kasan: remove unused 'reserved' field from struct kasan_alloc_meta
UPSTREAM: mm/kasan, slub: don't disable interrupts when object leaves quarantine
UPSTREAM: mm/kasan: don't reduce quarantine in atomic contexts
UPSTREAM: mm/kasan: fix corruptions and false positive reports
UPSTREAM: lib/stackdepot.c: use __GFP_NOWARN for stack allocations
BACKPORT: mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB
UPSTREAM: kasan/quarantine: fix bugs on qlist_move_cache()
UPSTREAM: mm: mempool: kasan: don't poot mempool objects in quarantine
UPSTREAM: kasan: change memory hot-add error messages to info messages
BACKPORT: mm/kasan: add API to check memory regions
UPSTREAM: mm/kasan: print name of mem[set,cpy,move]() caller in report
UPSTREAM: mm: kasan: initial memory quarantine implementation
UPSTREAM: lib/stackdepot: avoid to return 0 handle
UPSTREAM: lib/stackdepot.c: allow the stack trace hash to be zero
UPSTREAM: mm, kasan: fix compilation for CONFIG_SLAB
BACKPORT: mm, kasan: stackdepot implementation. Enable stackdepot for SLAB
BACKPORT: mm, kasan: add GFP flags to KASAN API
UPSTREAM: mm, kasan: SLAB support
UPSTREAM: mm/slab: align cache size first before determination of OFF_SLAB candidate
UPSTREAM: mm/slab: use more appropriate condition check for debug_pagealloc
UPSTREAM: mm/slab: factor out debugging initialization in cache_init_objs()
UPSTREAM: mm/slab: remove object status buffer for DEBUG_SLAB_LEAK
UPSTREAM: mm/slab: alternative implementation for DEBUG_SLAB_LEAK
UPSTREAM: mm/slab: clean up DEBUG_PAGEALLOC processing code
UPSTREAM: mm/slab: activate debug_pagealloc in SLAB when it is actually enabled
sched: EAS/WALT: Don't take into account of running task's util
BACKPORT: schedutil: Reset cached freq if it is not in sync with next_freq
UPSTREAM: kasan: add functions to clear stack poison
Conflicts:
arch/arm/include/asm/kvm_arm.h
arch/arm64/kernel/vmlinux.lds.S
include/linux/kasan.h
kernel/softirq.c
lib/Kconfig
lib/Kconfig.kasan
lib/Makefile
lib/stackdepot.c
mm/kasan/kasan.c
sound/usb/mixer.c
Change-Id: If70ced6da5f19be3dd92d10a8d8cd4d5841e5870
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/entry/entry_64.S | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 26 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 18 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/wakeup_64.S | 9 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 1 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 31 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 17 | ||||
-rw-r--r-- | arch/x86/pci/broadcom_bus.c | 2 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 39 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 5 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 137 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 43 |
14 files changed, 157 insertions, 184 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index cc0f2f5da19b..644b8b4e89a9 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -672,9 +672,15 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) .endm #endif +/* Make sure APIC interrupt handlers end up in the irqentry section: */ +#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" +#define POP_SECTION_IRQENTRY .popsection + .macro apicinterrupt num sym do_sym +PUSH_SECTION_IRQENTRY apicinterrupt3 \num \sym \do_sym trace_apicinterrupt \num \sym +POP_SECTION_IRQENTRY .endm #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c9e6eab2075b..08b1f2f6ea50 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,7 +3,6 @@ #include <asm/fpu/api.h> #include <asm/pgtable.h> -#include <asm/tlb.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -67,17 +66,6 @@ extern u64 asmlinkage efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) -/* - * Scratch space used for switching the pagetable in the EFI stub - */ -struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; -} __packed; - #define efi_call_virt(f, ...) \ ({ \ efi_status_t __s; \ @@ -85,20 +73,7 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ - \ - if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = read_cr3(); \ - write_cr3((unsigned long)efi_scratch.efi_pgt); \ - __flush_tlb_all(); \ - } \ - \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ - \ - if (efi_scratch.use_pgd) { \ - write_cr3(efi_scratch.prev_cr3); \ - __flush_tlb_all(); \ - } \ - \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ @@ -138,7 +113,6 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); -extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1b78ffe01d0..616ebd22ef9a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif -KASAN_SANITIZE_head$(BITS).o := n -KASAN_SANITIZE_dumpstack.o := n -KASAN_SANITIZE_dumpstack_$(BITS).o := n +KASAN_SANITIZE_head$(BITS).o := n +KASAN_SANITIZE_dumpstack.o := n +KASAN_SANITIZE_dumpstack_$(BITS).o := n +KASAN_SANITIZE_stacktrace.o := n + +OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_test_nx.o := y + +# If instrumentation of this dir is enabled, boot hangs during first second. +# Probably could be more selective here, but note that files related to irqs, +# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to +# non-deterministic coverage. +KCOV_INSTRUMENT := n CFLAGS_irq.o := -I$(src)/../include/asm/trace diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8c35df468104..48e3979d174a 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -107,6 +107,15 @@ ENTRY(do_suspend_lowlevel) movq pt_regs_r14(%rax), %r14 movq pt_regs_r15(%rax), %r15 +#ifdef CONFIG_KASAN + /* + * The suspend path may have poisoned some areas deeper in the stack, + * which we now need to unpoison. + */ + movq %rsp, %rdi + call kasan_unpoison_task_stack_below +#endif + xorl %eax, %eax addq $8, %rsp jmp restore_processor_state diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index acc9b8f19ca8..f48eb8eeefe2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -353,7 +353,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 99d293ea2b49..3eb804335458 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -49,7 +49,7 @@ #include <linux/kdebug.h> #include <linux/kallsyms.h> #include <linux/ftrace.h> - +#include <linux/kasan.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> @@ -1078,6 +1078,9 @@ void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* Unpoison stack redzones in the frames we are going to jump over. */ + kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp); + asm volatile ( #ifdef CONFIG_X86_64 " xchg %%rbx,%%rsp \n" diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index fe133b710bef..1d0e36f909eb 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS KPROBES_TEXT ENTRY_TEXT IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) /* End of text section */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 253a8c8207bb..dcbafe53e2d4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6182,12 +6182,7 @@ static __init int hardware_setup(void) memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); @@ -6929,9 +6924,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -6942,22 +6936,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); @@ -10574,8 +10555,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1924bba0f3af..4540e8880cd9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -911,10 +911,15 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); while (num_pages-- && start < end) { - set_pte(pte, pfn_pte(cpa->pfn, pgprot)); + + /* deal with the NX bit */ + if (!(pgprot_val(pgprot) & _PAGE_NX)) + cpa->pfn &= ~_PAGE_NX; + + set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); start += PAGE_SIZE; - cpa->pfn++; + cpa->pfn += PAGE_SIZE; pte++; } } @@ -970,11 +975,11 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | + set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pmd_pgprot))); start += PMD_SIZE; - cpa->pfn += PMD_SIZE >> PAGE_SHIFT; + cpa->pfn += PMD_SIZE; cur_pages += PMD_SIZE >> PAGE_SHIFT; } @@ -1043,11 +1048,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | + set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pud_pgprot))); start += PUD_SIZE; - cpa->pfn += PUD_SIZE >> PAGE_SHIFT; + cpa->pfn += PUD_SIZE; cur_pages += PUD_SIZE >> PAGE_SHIFT; pud++; } diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index bb461cfd01ab..526536c81ddc 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void) * We should get host bridge information from ACPI unless the BIOS * doesn't support it. */ - if (acpi_os_get_root_pointer()) + if (!acpi_disabled && acpi_os_get_root_pointer()) return 0; #endif diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3c1f3cd7b2ba..ad285404ea7f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void) * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor into the - * efi_pgd page table. + * ->trampoline_pgd page table using a top-down VA allocation scheme. * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the @@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void) * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime - * function. For function arguments passing we do copy the PUDs of the - * kernel page table into efi_pgd prior to each call. + * function. For function arguments passing we do copy the PGDs of the + * kernel page table into ->trampoline_pgd prior to each call. * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped @@ -895,12 +895,6 @@ static void __init __efi_enter_virtual_mode(void) efi.systab = NULL; - if (efi_alloc_page_tables()) { - pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } - efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { @@ -960,11 +954,28 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_mkexec(); /* - * We mapped the descriptor array into the EFI pagetable above - * but we're not unmapping it here because if we're running in - * EFI mixed mode we need all of memory to be accessible when - * we pass parameters to the EFI runtime services in the - * thunking code. + * We mapped the descriptor array into the EFI pagetable above but we're + * not unmapping it here. Here's why: + * + * We're copying select PGDs from the kernel page table to the EFI page + * table and when we do so and make changes to those PGDs like unmapping + * stuff from them, those changes appear in the kernel page table and we + * go boom. + * + * From setup_real_mode(): + * + * ... + * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + * + * In this particular case, our allocation is in PGD 0 of the EFI page + * table but we've copied that PGD from PGD[272] of the EFI page table: + * + * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 + * + * where the direct memory mapping in kernel space is. + * + * new_memmap's VA comes from that direct mapping and thus clearing it, + * it would get cleared in the kernel page table too. * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 58d669bc8250..ed5b67338294 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -38,11 +38,6 @@ * say 0 - 3G. */ -int __init efi_alloc_page_tables(void) -{ - return 0; -} - void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 18dfaad71c99..a0ac0f9c307f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -40,7 +40,6 @@ #include <asm/fixmap.h> #include <asm/realmode.h> #include <asm/time.h> -#include <asm/pgalloc.h> /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -48,7 +47,16 @@ */ static u64 efi_va = EFI_VA_START; -struct efi_scratch efi_scratch; +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; static void __init early_code_mapping_set_exec(int executable) { @@ -75,11 +83,8 @@ pgd_t * __init efi_call_phys_prolog(void) int pgd; int n_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) { - save_pgd = (pgd_t *)read_cr3(); - write_cr3((unsigned long)efi_scratch.efi_pgt); - goto out; - } + if (!efi_enabled(EFI_OLD_MEMMAP)) + return NULL; early_code_mapping_set_exec(1); @@ -91,7 +96,6 @@ pgd_t * __init efi_call_phys_prolog(void) vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } -out: __flush_tlb_all(); return save_pgd; @@ -105,11 +109,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) int pgd_idx; int nr_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) { - write_cr3((unsigned long)save_pgd); - __flush_tlb_all(); + if (!save_pgd) return; - } nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); @@ -122,97 +123,27 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } -static pgd_t *efi_pgd; - -/* - * We need our own copy of the higher levels of the page tables - * because we want to avoid inserting EFI region mappings (EFI_VA_END - * to EFI_VA_START) into the standard kernel page tables. Everything - * else can be shared, see efi_sync_low_kernel_mappings(). - */ -int __init efi_alloc_page_tables(void) -{ - pgd_t *pgd; - pud_t *pud; - gfp_t gfp_mask; - - if (efi_enabled(EFI_OLD_MEMMAP)) - return 0; - - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); - if (!efi_pgd) - return -ENOMEM; - - pgd = efi_pgd + pgd_index(EFI_VA_END); - - pud = pud_alloc_one(NULL, 0); - if (!pud) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } - - pgd_populate(NULL, pgd, pud); - - return 0; -} - /* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { - unsigned num_entries; - pgd_t *pgd_k, *pgd_efi; - pud_t *pud_k, *pud_efi; + unsigned num_pgds; + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); if (efi_enabled(EFI_OLD_MEMMAP)) return; - /* - * We can share all PGD entries apart from the one entry that - * covers the EFI runtime mapping space. - * - * Make sure the EFI runtime region mappings are guaranteed to - * only span a single PGD entry and that the entry also maps - * other important kernel regions. - */ - BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); - BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != - (EFI_VA_END & PGDIR_MASK)); - - pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); - pgd_k = pgd_offset_k(PAGE_OFFSET); + num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); - num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); - memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - - /* - * We share all the PUD entries apart from those that map the - * EFI regions. Copy around them. - */ - BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); - BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); - - pgd_efi = efi_pgd + pgd_index(EFI_VA_END); - pud_efi = pud_offset(pgd_efi, 0); - - pgd_k = pgd_offset_k(EFI_VA_END); - pud_k = pud_offset(pgd_k, 0); - - num_entries = pud_index(EFI_VA_END); - memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); - - pud_efi = pud_offset(pgd_efi, EFI_VA_START); - pud_k = pud_offset(pgd_k, EFI_VA_START); - - num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); - memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + memcpy(pgd + pgd_index(PAGE_OFFSET), + init_mm.pgd + pgd_index(PAGE_OFFSET), + sizeof(pgd_t) * num_pgds); } int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text; + unsigned long text; struct page *page; unsigned npages; pgd_t *pgd; @@ -220,8 +151,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); - pgd = efi_pgd; + efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; + pgd = __va(efi_scratch.efi_pgt); /* * It can happen that the physical address of new_memmap lands in memory @@ -229,8 +160,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * and ident-map those pages containing the map before calling * phys_efi_set_virtual_address_map(). */ - pfn = pa_memmap >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { + if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -255,9 +185,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (_end - _text) >> PAGE_SHIFT; text = __pa(_text); - pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -267,20 +196,20 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) { - unsigned long flags = 0; - unsigned long pfn; - pgd_t *pgd = efi_pgd; + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned long pf = 0; if (!(md->attribute & EFI_MEMORY_WB)) - flags |= _PAGE_PCD; + pf |= _PAGE_PCD; - pfn = md->phys_addr >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) + if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", md->phys_addr, va); } @@ -383,7 +312,9 @@ void __init efi_runtime_mkexec(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - ptdump_walk_pgd_level(NULL, efi_pgd); + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + ptdump_walk_pgd_level(NULL, pgd); #endif } diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 32020cb8bb08..86d0f9e08dd9 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -38,6 +38,41 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp + /* stolen from gcc */ + .macro FLUSH_TLB_ALL + movq %r15, efi_scratch(%rip) + movq %r14, efi_scratch+8(%rip) + movq %cr4, %r15 + movq %r15, %r14 + andb $0x7f, %r14b + movq %r14, %cr4 + movq %r15, %cr4 + movq efi_scratch+8(%rip), %r14 + movq efi_scratch(%rip), %r15 + .endm + + .macro SWITCH_PGT + cmpb $0, efi_scratch+24(%rip) + je 1f + movq %r15, efi_scratch(%rip) # r15 + # save previous CR3 + movq %cr3, %r15 + movq %r15, efi_scratch+8(%rip) # prev_cr3 + movq efi_scratch+16(%rip), %r15 # EFI pgt + movq %r15, %cr3 + 1: + .endm + + .macro RESTORE_PGT + cmpb $0, efi_scratch+24(%rip) + je 2f + movq efi_scratch+8(%rip), %r15 + movq %r15, %cr3 + movq efi_scratch(%rip), %r15 + FLUSH_TLB_ALL + 2: + .endm + ENTRY(efi_call) SAVE_XMM mov (%rsp), %rax @@ -48,8 +83,16 @@ ENTRY(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $48, %rsp RESTORE_XMM ret ENDPROC(efi_call) + + .data +ENTRY(efi_scratch) + .fill 3,8,0 + .byte 0 + .quad 0 |